From c62d81bcfe82526cc3da10cf4fc63faad368bc60 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@unipv.it>
Date: Sun, 20 Sep 2009 23:28:04 +0200
Subject: mtd: use bbm.h in nand.h

This consolidates common code in nand.h and bbm.h. The
comments and data structures were the same, this keeps
the comment from nand.h as it fits 80 columns, while the one
in bbm.h did not.

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/bbm.h  | 35 +++++++++++-----------
 include/linux/mtd/nand.h | 76 +-----------------------------------------------
 2 files changed, 19 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index fff8c53e5434..9c3757c5759d 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h
@@ -19,22 +19,21 @@
 
 /**
  * struct nand_bbt_descr - bad block table descriptor
- * @options:		options for this descriptor
- * @pages:		the page(s) where we find the bbt, used with
- * 			option BBT_ABSPAGE when bbt is searched,
- * 			then we store the found bbts pages here.
- *			Its an array and supports up to 8 chips now
- * @offs:		offset of the pattern in the oob area of the page
- * @veroffs:		offset of the bbt version counter in the oob area of the page
- * @version:		version read from the bbt page during scan
- * @len:		length of the pattern, if 0 no pattern check is performed
- * @maxblocks:		maximum number of blocks to search for a bbt. This
- *			number of blocks is reserved at the end of the device
- *			where the tables are written.
- * @reserved_block_code: if non-0, this pattern denotes a reserved
- *			(rather than bad) block in the stored bbt
- * @pattern:		pattern to identify bad block table or factory marked
- *			good / bad blocks, can be NULL, if len = 0
+ * @options:	options for this descriptor
+ * @pages:	the page(s) where we find the bbt, used with option BBT_ABSPAGE
+ *		when bbt is searched, then we store the found bbts pages here.
+ *		Its an array and supports up to 8 chips now
+ * @offs:	offset of the pattern in the oob area of the page
+ * @veroffs:	offset of the bbt version counter in the oob are of the page
+ * @version:	version read from the bbt page during scan
+ * @len:	length of the pattern, if 0 no pattern check is performed
+ * @maxblocks:	maximum number of blocks to search for a bbt. This number of
+ *		blocks is reserved at the end of the device where the tables are
+ *		written.
+ * @reserved_block_code: if non-0, this pattern denotes a reserved (rather than
+ *              bad) block in the stored bbt
+ * @pattern:	pattern to identify bad block table or factory marked good /
+ *		bad blocks, can be NULL, if len = 0
  *
  * Descriptor for the bad block table marker and the descriptor for the
  * pattern which identifies good and bad blocks. The assumption is made
@@ -90,7 +89,9 @@ struct nand_bbt_descr {
 /*
  * Constants for oob configuration
  */
-#define ONENAND_BADBLOCK_POS	0
+#define NAND_SMALL_BADBLOCK_POS		5
+#define NAND_LARGE_BADBLOCK_POS		0
+#define ONENAND_BADBLOCK_POS		0
 
 /*
  * Bad block scanning errors
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7a232a9bdd62..d87ada538d17 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -21,6 +21,7 @@
 #include <linux/wait.h>
 #include <linux/spinlock.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/bbm.h>
 
 struct mtd_info;
 /* Scan and identify a NAND device */
@@ -470,75 +471,6 @@ struct nand_manufacturers {
 extern struct nand_flash_dev nand_flash_ids[];
 extern struct nand_manufacturers nand_manuf_ids[];
 
-/**
- * struct nand_bbt_descr - bad block table descriptor
- * @options:	options for this descriptor
- * @pages:	the page(s) where we find the bbt, used with option BBT_ABSPAGE
- *		when bbt is searched, then we store the found bbts pages here.
- *		Its an array and supports up to 8 chips now
- * @offs:	offset of the pattern in the oob area of the page
- * @veroffs:	offset of the bbt version counter in the oob are of the page
- * @version:	version read from the bbt page during scan
- * @len:	length of the pattern, if 0 no pattern check is performed
- * @maxblocks:	maximum number of blocks to search for a bbt. This number of
- *		blocks is reserved at the end of the device where the tables are
- *		written.
- * @reserved_block_code: if non-0, this pattern denotes a reserved (rather than
- *              bad) block in the stored bbt
- * @pattern:	pattern to identify bad block table or factory marked good /
- *		bad blocks, can be NULL, if len = 0
- *
- * Descriptor for the bad block table marker and the descriptor for the
- * pattern which identifies good and bad blocks. The assumption is made
- * that the pattern and the version count are always located in the oob area
- * of the first block.
- */
-struct nand_bbt_descr {
-	int	options;
-	int	pages[NAND_MAX_CHIPS];
-	int	offs;
-	int	veroffs;
-	uint8_t	version[NAND_MAX_CHIPS];
-	int	len;
-	int	maxblocks;
-	int	reserved_block_code;
-	uint8_t	*pattern;
-};
-
-/* Options for the bad block table descriptors */
-
-/* The number of bits used per block in the bbt on the device */
-#define NAND_BBT_NRBITS_MSK	0x0000000F
-#define NAND_BBT_1BIT		0x00000001
-#define NAND_BBT_2BIT		0x00000002
-#define NAND_BBT_4BIT		0x00000004
-#define NAND_BBT_8BIT		0x00000008
-/* The bad block table is in the last good block of the device */
-#define	NAND_BBT_LASTBLOCK	0x00000010
-/* The bbt is at the given page, else we must scan for the bbt */
-#define NAND_BBT_ABSPAGE	0x00000020
-/* The bbt is at the given page, else we must scan for the bbt */
-#define NAND_BBT_SEARCH		0x00000040
-/* bbt is stored per chip on multichip devices */
-#define NAND_BBT_PERCHIP	0x00000080
-/* bbt has a version counter at offset veroffs */
-#define NAND_BBT_VERSION	0x00000100
-/* Create a bbt if none axists */
-#define NAND_BBT_CREATE		0x00000200
-/* Search good / bad pattern through all pages of a block */
-#define NAND_BBT_SCANALLPAGES	0x00000400
-/* Scan block empty during good / bad block scan */
-#define NAND_BBT_SCANEMPTY	0x00000800
-/* Write bbt if neccecary */
-#define NAND_BBT_WRITE		0x00001000
-/* Read and write back block contents when writing bbt */
-#define NAND_BBT_SAVECONTENT	0x00002000
-/* Search good / bad pattern on the first and the second page */
-#define NAND_BBT_SCAN2NDPAGE	0x00004000
-
-/* The maximum number of blocks to scan for a bbt */
-#define NAND_BBT_SCAN_MAXBLOCKS	4
-
 extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd);
 extern int nand_update_bbt(struct mtd_info *mtd, loff_t offs);
 extern int nand_default_bbt(struct mtd_info *mtd);
@@ -548,12 +480,6 @@ extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
 			size_t * retlen, uint8_t * buf);
 
-/*
-* Constants for oob configuration
-*/
-#define NAND_SMALL_BADBLOCK_POS		5
-#define NAND_LARGE_BADBLOCK_POS		0
-
 /**
  * struct platform_nand_chip - chip level device structure
  * @nr_chips:		max. number of chips to scan for
-- 
cgit v1.2.3


From 30631cb82d5c6c662d5ec682beaa834c1f9f0987 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@unipv.it>
Date: Sun, 20 Sep 2009 23:28:14 +0200
Subject: mtd: unify status enum from three headers

nand.h, onenand.h and flashchip.h defined enumeration types
for chip status using the same symbolic names. This prevented
a board file to include more than one of them. In particular,
no nand and onenand platform devices could live in the same file.
This patch augments flashchip.h with a few status values in order
to cover all cases, so nand.h and onenand.h can use flstate_t
without declaring their own status enum.

Signed-off-by: Alessandro Rubini <rubini@unipv.it>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/flashchip.h |  7 +++++++
 include/linux/mtd/nand.h      | 17 ++---------------
 include/linux/mtd/onenand.h   | 19 ++-----------------
 3 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index d4f38c5fd44e..f350a4879f75 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -38,6 +38,13 @@ typedef enum {
 	FL_XIP_WHILE_ERASING,
 	FL_XIP_WHILE_WRITING,
 	FL_SHUTDOWN,
+	/* These 2 come from nand_state_t, which has been unified here */
+	FL_READING,
+	FL_CACHEDPRG,
+	/* These 2 come from onenand_state_t, which has been unified here */
+	FL_RESETING,
+	FL_OTPING,
+
 	FL_UNKNOWN
 } flstate_t;
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index d87ada538d17..2476078a032f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -21,6 +21,7 @@
 #include <linux/wait.h>
 #include <linux/spinlock.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/flashchip.h>
 #include <linux/mtd/bbm.h>
 
 struct mtd_info;
@@ -203,20 +204,6 @@ typedef enum {
 #define NAND_CI_CHIPNR_MSK	0x03
 #define NAND_CI_CELLTYPE_MSK	0x0C
 
-/*
- * nand_state_t - chip states
- * Enumeration for NAND flash chip state
- */
-typedef enum {
-	FL_READY,
-	FL_READING,
-	FL_WRITING,
-	FL_ERASING,
-	FL_SYNCING,
-	FL_CACHEDPRG,
-	FL_PM_SUSPENDED,
-} nand_state_t;
-
 /* Keep gcc happy */
 struct nand_chip;
 
@@ -403,7 +390,7 @@ struct nand_chip {
 	uint8_t		cellinfo;
 	int		badblockpos;
 
-	nand_state_t	state;
+	flstate_t	state;
 
 	uint8_t		*oob_poi;
 	struct nand_hw_control  *controller;
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 4e49f3350678..f57e29e17bb0 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -14,6 +14,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/completion.h>
+#include <linux/mtd/flashchip.h>
 #include <linux/mtd/onenand_regs.h>
 #include <linux/mtd/bbm.h>
 
@@ -25,22 +26,6 @@ extern int onenand_scan(struct mtd_info *mtd, int max_chips);
 /* Free resources held by the OneNAND device */
 extern void onenand_release(struct mtd_info *mtd);
 
-/*
- * onenand_state_t - chip states
- * Enumeration for OneNAND flash chip state
- */
-typedef enum {
-	FL_READY,
-	FL_READING,
-	FL_WRITING,
-	FL_ERASING,
-	FL_SYNCING,
-	FL_LOCKING,
-	FL_RESETING,
-	FL_OTPING,
-	FL_PM_SUSPENDED,
-} onenand_state_t;
-
 /**
  * struct onenand_bufferram - OneNAND BufferRAM Data
  * @blockpage:		block & page address in BufferRAM
@@ -137,7 +122,7 @@ struct onenand_chip {
 
 	spinlock_t		chip_lock;
 	wait_queue_head_t	wq;
-	onenand_state_t		state;
+	flstate_t		state;
 	unsigned char		*page_buf;
 	unsigned char		*oob_buf;
 
-- 
cgit v1.2.3


From dc7a08166f3a5f23e79e839a8a88849bd3397c32 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 27 Oct 2009 14:41:35 -0400
Subject: nfs: new subdir Documentation/filesystems/nfs

We're adding enough nfs documentation that it may as well have its own
subdirectory.

Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 Documentation/filesystems/00-INDEX             |  10 +-
 Documentation/filesystems/Exporting            | 147 --------------
 Documentation/filesystems/nfs-rdma.txt         | 271 -------------------------
 Documentation/filesystems/nfs.txt              |  98 ---------
 Documentation/filesystems/nfs/00-INDEX         |  12 ++
 Documentation/filesystems/nfs/Exporting        | 147 ++++++++++++++
 Documentation/filesystems/nfs/nfs-rdma.txt     | 271 +++++++++++++++++++++++++
 Documentation/filesystems/nfs/nfs.txt          |  98 +++++++++
 Documentation/filesystems/nfs/nfs41-server.txt | 222 ++++++++++++++++++++
 Documentation/filesystems/nfs/nfsroot.txt      | 270 ++++++++++++++++++++++++
 Documentation/filesystems/nfs41-server.txt     | 222 --------------------
 Documentation/filesystems/nfsroot.txt          | 270 ------------------------
 Documentation/filesystems/porting              |   2 +-
 Documentation/kernel-parameters.txt            |   6 +-
 fs/cifs/export.c                               |   2 +-
 fs/exportfs/expfs.c                            |   2 +-
 fs/isofs/export.c                              |   2 +-
 fs/nfs/Kconfig                                 |   2 +-
 include/linux/exportfs.h                       |   2 +-
 net/ipv4/Kconfig                               |   6 +-
 net/ipv4/ipconfig.c                            |   2 +-
 21 files changed, 1035 insertions(+), 1029 deletions(-)
 delete mode 100644 Documentation/filesystems/Exporting
 delete mode 100644 Documentation/filesystems/nfs-rdma.txt
 delete mode 100644 Documentation/filesystems/nfs.txt
 create mode 100644 Documentation/filesystems/nfs/00-INDEX
 create mode 100644 Documentation/filesystems/nfs/Exporting
 create mode 100644 Documentation/filesystems/nfs/nfs-rdma.txt
 create mode 100644 Documentation/filesystems/nfs/nfs.txt
 create mode 100644 Documentation/filesystems/nfs/nfs41-server.txt
 create mode 100644 Documentation/filesystems/nfs/nfsroot.txt
 delete mode 100644 Documentation/filesystems/nfs41-server.txt
 delete mode 100644 Documentation/filesystems/nfsroot.txt

(limited to 'include/linux')

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index f15621ee5599..482151c883a5 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -1,7 +1,5 @@
 00-INDEX
 	- this file (info on some of the filesystems supported by linux).
-Exporting
-	- explanation of how to make filesystems exportable.
 Locking
 	- info on locking rules as they pertain to Linux VFS.
 9p.txt
@@ -66,12 +64,8 @@ mandatory-locking.txt
 	- info on the Linux implementation of Sys V mandatory file locking.
 ncpfs.txt
 	- info on Novell Netware(tm) filesystem using NCP protocol.
-nfs41-server.txt
-	- info on the Linux server implementation of NFSv4 minor version 1.
-nfs-rdma.txt
-	- how to install and setup the Linux NFS/RDMA client and server software.
-nfsroot.txt
-	- short guide on setting up a diskless box with NFS root filesystem.
+nfs/
+	- nfs-related documentation.
 nilfs2.txt
 	- info and mount options for the NILFS2 filesystem.
 ntfs.txt
diff --git a/Documentation/filesystems/Exporting b/Documentation/filesystems/Exporting
deleted file mode 100644
index 87019d2b5981..000000000000
--- a/Documentation/filesystems/Exporting
+++ /dev/null
@@ -1,147 +0,0 @@
-
-Making Filesystems Exportable
-=============================
-
-Overview
---------
-
-All filesystem operations require a dentry (or two) as a starting
-point.  Local applications have a reference-counted hold on suitable
-dentries via open file descriptors or cwd/root.  However remote
-applications that access a filesystem via a remote filesystem protocol
-such as NFS may not be able to hold such a reference, and so need a
-different way to refer to a particular dentry.  As the alternative
-form of reference needs to be stable across renames, truncates, and
-server-reboot (among other things, though these tend to be the most
-problematic), there is no simple answer like 'filename'.
-
-The mechanism discussed here allows each filesystem implementation to
-specify how to generate an opaque (outside of the filesystem) byte
-string for any dentry, and how to find an appropriate dentry for any
-given opaque byte string.
-This byte string will be called a "filehandle fragment" as it
-corresponds to part of an NFS filehandle.
-
-A filesystem which supports the mapping between filehandle fragments
-and dentries will be termed "exportable".
-
-
-
-Dcache Issues
--------------
-
-The dcache normally contains a proper prefix of any given filesystem
-tree.  This means that if any filesystem object is in the dcache, then
-all of the ancestors of that filesystem object are also in the dcache.
-As normal access is by filename this prefix is created naturally and
-maintained easily (by each object maintaining a reference count on
-its parent).
-
-However when objects are included into the dcache by interpreting a
-filehandle fragment, there is no automatic creation of a path prefix
-for the object.  This leads to two related but distinct features of
-the dcache that are not needed for normal filesystem access.
-
-1/ The dcache must sometimes contain objects that are not part of the
-   proper prefix. i.e that are not connected to the root.
-2/ The dcache must be prepared for a newly found (via ->lookup) directory
-   to already have a (non-connected) dentry, and must be able to move
-   that dentry into place (based on the parent and name in the
-   ->lookup).   This is particularly needed for directories as
-   it is a dcache invariant that directories only have one dentry.
-
-To implement these features, the dcache has:
-
-a/ A dentry flag DCACHE_DISCONNECTED which is set on
-   any dentry that might not be part of the proper prefix.
-   This is set when anonymous dentries are created, and cleared when a
-   dentry is noticed to be a child of a dentry which is in the proper
-   prefix. 
-
-b/ A per-superblock list "s_anon" of dentries which are the roots of
-   subtrees that are not in the proper prefix.  These dentries, as
-   well as the proper prefix, need to be released at unmount time.  As
-   these dentries will not be hashed, they are linked together on the
-   d_hash list_head.
-
-c/ Helper routines to allocate anonymous dentries, and to help attach
-   loose directory dentries at lookup time. They are:
-    d_alloc_anon(inode) will return a dentry for the given inode.
-      If the inode already has a dentry, one of those is returned.
-      If it doesn't, a new anonymous (IS_ROOT and
-        DCACHE_DISCONNECTED) dentry is allocated and attached.
-      In the case of a directory, care is taken that only one dentry
-      can ever be attached.
-    d_splice_alias(inode, dentry) will make sure that there is a
-      dentry with the same name and parent as the given dentry, and
-      which refers to the given inode.
-      If the inode is a directory and already has a dentry, then that
-      dentry is d_moved over the given dentry.
-      If the passed dentry gets attached, care is taken that this is
-      mutually exclusive to a d_alloc_anon operation.
-      If the passed dentry is used, NULL is returned, else the used
-      dentry is returned.  This corresponds to the calling pattern of
-      ->lookup.
-  
- 
-Filesystem Issues
------------------
-
-For a filesystem to be exportable it must:
- 
-   1/ provide the filehandle fragment routines described below.
-   2/ make sure that d_splice_alias is used rather than d_add
-      when ->lookup finds an inode for a given parent and name.
-      Typically the ->lookup routine will end with a:
-
-		return d_splice_alias(inode, dentry);
-	}
-
-
-
-  A file system implementation declares that instances of the filesystem
-are exportable by setting the s_export_op field in the struct
-super_block.  This field must point to a "struct export_operations"
-struct which has the following members:
-
- encode_fh  (optional)
-    Takes a dentry and creates a filehandle fragment which can later be used
-    to find or create a dentry for the same object.  The default
-    implementation creates a filehandle fragment that encodes a 32bit inode
-    and generation number for the inode encoded, and if necessary the
-    same information for the parent.
-
-  fh_to_dentry (mandatory)
-    Given a filehandle fragment, this should find the implied object and
-    create a dentry for it (possibly with d_alloc_anon).
-
-  fh_to_parent (optional but strongly recommended)
-    Given a filehandle fragment, this should find the parent of the
-    implied object and create a dentry for it (possibly with d_alloc_anon).
-    May fail if the filehandle fragment is too small.
-
-  get_parent (optional but strongly recommended)
-    When given a dentry for a directory, this should return  a dentry for
-    the parent.  Quite possibly the parent dentry will have been allocated
-    by d_alloc_anon.  The default get_parent function just returns an error
-    so any filehandle lookup that requires finding a parent will fail.
-    ->lookup("..") is *not* used as a default as it can leave ".." entries
-    in the dcache which are too messy to work with.
-
-  get_name (optional)
-    When given a parent dentry and a child dentry, this should find a name
-    in the directory identified by the parent dentry, which leads to the
-    object identified by the child dentry.  If no get_name function is
-    supplied, a default implementation is provided which uses vfs_readdir
-    to find potential names, and matches inode numbers to find the correct
-    match.
-
-
-A filehandle fragment consists of an array of 1 or more 4byte words,
-together with a one byte "type".
-The decode_fh routine should not depend on the stated size that is
-passed to it.  This size may be larger than the original filehandle
-generated by encode_fh, in which case it will have been padded with
-nuls.  Rather, the encode_fh routine should choose a "type" which
-indicates the decode_fh how much of the filehandle is valid, and how
-it should be interpreted.
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
deleted file mode 100644
index e386f7e4bcee..000000000000
--- a/Documentation/filesystems/nfs-rdma.txt
+++ /dev/null
@@ -1,271 +0,0 @@
-################################################################################
-#									       #
-#				NFS/RDMA README				       #
-#									       #
-################################################################################
-
- Author: NetApp and Open Grid Computing
- Date: May 29, 2008
-
-Table of Contents
-~~~~~~~~~~~~~~~~~
- - Overview
- - Getting Help
- - Installation
- - Check RDMA and NFS Setup
- - NFS/RDMA Setup
-
-Overview
-~~~~~~~~
-
-  This document describes how to install and setup the Linux NFS/RDMA client
-  and server software.
-
-  The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
-  was first included in the following release, Linux 2.6.25.
-
-  In our testing, we have obtained excellent performance results (full 10Gbit
-  wire bandwidth at minimal client CPU) under many workloads. The code passes
-  the full Connectathon test suite and operates over both Infiniband and iWARP
-  RDMA adapters.
-
-Getting Help
-~~~~~~~~~~~~
-
-  If you get stuck, you can ask questions on the
-
-                nfs-rdma-devel@lists.sourceforge.net
-
-  mailing list.
-
-Installation
-~~~~~~~~~~~~
-
-  These instructions are a step by step guide to building a machine for
-  use with NFS/RDMA.
-
-  - Install an RDMA device
-
-    Any device supported by the drivers in drivers/infiniband/hw is acceptable.
-
-    Testing has been performed using several Mellanox-based IB cards, the
-    Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
-
-  - Install a Linux distribution and tools
-
-    The first kernel release to contain both the NFS/RDMA client and server was
-    Linux 2.6.25  Therefore, a distribution compatible with this and subsequent
-    Linux kernel release should be installed.
-
-    The procedures described in this document have been tested with
-    distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
-
-  - Install nfs-utils-1.1.2 or greater on the client
-
-    An NFS/RDMA mount point can be obtained by using the mount.nfs command in
-    nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
-    version with support for NFS/RDMA mounts, but for various reasons we
-    recommend using nfs-utils-1.1.2 or greater). To see which version of
-    mount.nfs you are using, type:
-
-    $ /sbin/mount.nfs -V
-
-    If the version is less than 1.1.2 or the command does not exist,
-    you should install the latest version of nfs-utils.
-
-    Download the latest package from:
-
-    http://www.kernel.org/pub/linux/utils/nfs
-
-    Uncompress the package and follow the installation instructions.
-
-    If you will not need the idmapper and gssd executables (you do not need
-    these to create an NFS/RDMA enabled mount command), the installation
-    process can be simplified by disabling these features when running
-    configure:
-
-    $ ./configure --disable-gss --disable-nfsv4
-
-    To build nfs-utils you will need the tcp_wrappers package installed. For
-    more information on this see the package's README and INSTALL files.
-
-    After building the nfs-utils package, there will be a mount.nfs binary in
-    the utils/mount directory. This binary can be used to initiate NFS v2, v3,
-    or v4 mounts. To initiate a v4 mount, the binary must be called
-    mount.nfs4.  The standard technique is to create a symlink called
-    mount.nfs4 to mount.nfs.
-
-    This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
-
-    $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
-
-    In this location, mount.nfs will be invoked automatically for NFS mounts
-    by the system mount command.
-
-    NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
-    on the NFS client machine. You do not need this specific version of
-    nfs-utils on the server. Furthermore, only the mount.nfs command from
-    nfs-utils-1.1.2 is needed on the client.
-
-  - Install a Linux kernel with NFS/RDMA
-
-    The NFS/RDMA client and server are both included in the mainline Linux
-    kernel version 2.6.25 and later. This and other versions of the 2.6 Linux
-    kernel can be found at:
-
-    ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
-
-    Download the sources and place them in an appropriate location.
-
-  - Configure the RDMA stack
-
-    Make sure your kernel configuration has RDMA support enabled. Under
-    Device Drivers -> InfiniBand support, update the kernel configuration
-    to enable InfiniBand support [NOTE: the option name is misleading. Enabling
-    InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
-
-    Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
-    iWARP adapter support (amso, cxgb3, etc.).
-
-    If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
-
-  - Configure the NFS client and server
-
-    Your kernel configuration must also have NFS file system support and/or
-    NFS server support enabled. These and other NFS related configuration
-    options can be found under File Systems -> Network File Systems.
-
-  - Build, install, reboot
-
-    The NFS/RDMA code will be enabled automatically if NFS and RDMA
-    are turned on. The NFS/RDMA client and server are configured via the hidden
-    SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
-    value of SUNRPC_XPRT_RDMA will be:
-
-     - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
-       and server will not be built
-     - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
-       in this case the NFS/RDMA client and server will be built as modules
-     - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
-       and server will be built into the kernel
-
-    Therefore, if you have followed the steps above and turned no NFS and RDMA,
-    the NFS/RDMA client and server will be built.
-
-    Build a new kernel, install it, boot it.
-
-Check RDMA and NFS Setup
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-    Before configuring the NFS/RDMA software, it is a good idea to test
-    your new kernel to ensure that the kernel is working correctly.
-    In particular, it is a good idea to verify that the RDMA stack
-    is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
-    is working properly.
-
-  - Check RDMA Setup
-
-    If you built the RDMA components as modules, load them at
-    this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
-    card:
-
-    $ modprobe ib_mthca
-    $ modprobe ib_ipoib
-
-    If you are using InfiniBand, make sure there is a Subnet Manager (SM)
-    running on the network. If your IB switch has an embedded SM, you can
-    use it. Otherwise, you will need to run an SM, such as OpenSM, on one
-    of your end nodes.
-
-    If an SM is running on your network, you should see the following:
-
-    $ cat /sys/class/infiniband/driverX/ports/1/state
-    4: ACTIVE
-
-    where driverX is mthca0, ipath5, ehca3, etc.
-
-    To further test the InfiniBand software stack, use IPoIB (this
-    assumes you have two IB hosts named host1 and host2):
-
-    host1$ ifconfig ib0 a.b.c.x
-    host2$ ifconfig ib0 a.b.c.y
-    host1$ ping a.b.c.y
-    host2$ ping a.b.c.x
-
-    For other device types, follow the appropriate procedures.
-
-  - Check NFS Setup
-
-    For the NFS components enabled above (client and/or server),
-    test their functionality over standard Ethernet using TCP/IP or UDP/IP.
-
-NFS/RDMA Setup
-~~~~~~~~~~~~~~
-
-  We recommend that you use two machines, one to act as the client and
-  one to act as the server.
-
-  One time configuration:
-
-  - On the server system, configure the /etc/exports file and
-    start the NFS/RDMA server.
-
-    Exports entries with the following formats have been tested:
-
-    /vol0   192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
-    /vol0   192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
-
-    The IP address(es) is(are) the client's IPoIB address for an InfiniBand
-    HCA or the cleint's iWARP address(es) for an RNIC.
-
-    NOTE: The "insecure" option must be used because the NFS/RDMA client does
-    not use a reserved port.
-
- Each time a machine boots:
-
-  - Load and configure the RDMA drivers
-
-    For InfiniBand using a Mellanox adapter:
-
-    $ modprobe ib_mthca
-    $ modprobe ib_ipoib
-    $ ifconfig ib0 a.b.c.d
-
-    NOTE: use unique addresses for the client and server
-
-  - Start the NFS server
-
-    If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
-    kernel config), load the RDMA transport module:
-
-    $ modprobe svcrdma
-
-    Regardless of how the server was built (module or built-in), start the
-    server:
-
-    $ /etc/init.d/nfs start
-
-    or
-
-    $ service nfs start
-
-    Instruct the server to listen on the RDMA transport:
-
-    $ echo rdma 20049 > /proc/fs/nfsd/portlist
-
-  - On the client system
-
-    If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
-    kernel config), load the RDMA client module:
-
-    $ modprobe xprtrdma.ko
-
-    Regardless of how the client was built (module or built-in), use this
-    command to mount the NFS/RDMA server:
-
-    $ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
-
-    To verify that the mount is using RDMA, run "cat /proc/mounts" and check
-    the "proto" field for the given mount.
-
-  Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/filesystems/nfs.txt b/Documentation/filesystems/nfs.txt
deleted file mode 100644
index f50f26ce6cd0..000000000000
--- a/Documentation/filesystems/nfs.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-
-The NFS client
-==============
-
-The NFS version 2 protocol was first documented in RFC1094 (March 1989).
-Since then two more major releases of NFS have been published, with NFSv3
-being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
-2003).
-
-The Linux NFS client currently supports all the above published versions,
-and work is in progress on adding support for minor version 1 of the NFSv4
-protocol.
-
-The purpose of this document is to provide information on some of the
-upcall interfaces that are used in order to provide the NFS client with
-some of the information that it requires in order to fully comply with
-the NFS spec.
-
-The DNS resolver
-================
-
-NFSv4 allows for one server to refer the NFS client to data that has been
-migrated onto another server by means of the special "fs_locations"
-attribute. See
-	http://tools.ietf.org/html/rfc3530#section-6
-and
-	http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
-
-The fs_locations information can take the form of either an ip address and
-a path, or a DNS hostname and a path. The latter requires the NFS client to
-do a DNS lookup in order to mount the new volume, and hence the need for an
-upcall to allow userland to provide this service.
-
-Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
-/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
-
-   (1) The process checks the dns_resolve cache to see if it contains a
-       valid entry. If so, it returns that entry and exits.
-
-   (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
-       (may be changed using the 'nfs.cache_getent' kernel boot parameter)
-       is run, with two arguments:
-		- the cache name, "dns_resolve"
-		- the hostname to resolve
-
-   (3) After looking up the corresponding ip address, the helper script
-       writes the result into the rpc_pipefs pseudo-file
-       '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
-       in the following (text) format:
-
-		"<ip address> <hostname> <ttl>\n"
-
-       Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
-       (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
-       <hostname> is identical to the second argument of the helper
-       script, and <ttl> is the 'time to live' of this cache entry (in
-       units of seconds).
-
-       Note: If <ip address> is invalid, say the string "0", then a negative
-       entry is created, which will cause the kernel to treat the hostname
-       as having no valid DNS translation.
-
-
-
-
-A basic sample /sbin/nfs_cache_getent
-=====================================
-
-#!/bin/bash
-#
-ttl=600
-#
-cut=/usr/bin/cut
-getent=/usr/bin/getent
-rpc_pipefs=/var/lib/nfs/rpc_pipefs
-#
-die()
-{
-	echo "Usage: $0 cache_name entry_name"
-	exit 1
-}
-
-[ $# -lt 2 ] && die
-cachename="$1"
-cache_path=${rpc_pipefs}/cache/${cachename}/channel
-
-case "${cachename}" in
-	dns_resolve)
-		name="$2"
-		result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
-		[ -z "${result}" ] && result="0"
-		;;
-	*)
-		die
-		;;
-esac
-echo "${result} ${name} ${ttl}" >${cache_path}
-
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
new file mode 100644
index 000000000000..6ff3d212027b
--- /dev/null
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -0,0 +1,12 @@
+00-INDEX
+	- this file (nfs-related documentation).
+Exporting
+	- explanation of how to make filesystems exportable.
+nfs.txt
+	- nfs client, and DNS resolution for fs_locations.
+nfs41-server.txt
+	- info on the Linux server implementation of NFSv4 minor version 1.
+nfs-rdma.txt
+	- how to install and setup the Linux NFS/RDMA client and server software
+nfsroot.txt
+	- short guide on setting up a diskless box with NFS root filesystem.
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
new file mode 100644
index 000000000000..87019d2b5981
--- /dev/null
+++ b/Documentation/filesystems/nfs/Exporting
@@ -0,0 +1,147 @@
+
+Making Filesystems Exportable
+=============================
+
+Overview
+--------
+
+All filesystem operations require a dentry (or two) as a starting
+point.  Local applications have a reference-counted hold on suitable
+dentries via open file descriptors or cwd/root.  However remote
+applications that access a filesystem via a remote filesystem protocol
+such as NFS may not be able to hold such a reference, and so need a
+different way to refer to a particular dentry.  As the alternative
+form of reference needs to be stable across renames, truncates, and
+server-reboot (among other things, though these tend to be the most
+problematic), there is no simple answer like 'filename'.
+
+The mechanism discussed here allows each filesystem implementation to
+specify how to generate an opaque (outside of the filesystem) byte
+string for any dentry, and how to find an appropriate dentry for any
+given opaque byte string.
+This byte string will be called a "filehandle fragment" as it
+corresponds to part of an NFS filehandle.
+
+A filesystem which supports the mapping between filehandle fragments
+and dentries will be termed "exportable".
+
+
+
+Dcache Issues
+-------------
+
+The dcache normally contains a proper prefix of any given filesystem
+tree.  This means that if any filesystem object is in the dcache, then
+all of the ancestors of that filesystem object are also in the dcache.
+As normal access is by filename this prefix is created naturally and
+maintained easily (by each object maintaining a reference count on
+its parent).
+
+However when objects are included into the dcache by interpreting a
+filehandle fragment, there is no automatic creation of a path prefix
+for the object.  This leads to two related but distinct features of
+the dcache that are not needed for normal filesystem access.
+
+1/ The dcache must sometimes contain objects that are not part of the
+   proper prefix. i.e that are not connected to the root.
+2/ The dcache must be prepared for a newly found (via ->lookup) directory
+   to already have a (non-connected) dentry, and must be able to move
+   that dentry into place (based on the parent and name in the
+   ->lookup).   This is particularly needed for directories as
+   it is a dcache invariant that directories only have one dentry.
+
+To implement these features, the dcache has:
+
+a/ A dentry flag DCACHE_DISCONNECTED which is set on
+   any dentry that might not be part of the proper prefix.
+   This is set when anonymous dentries are created, and cleared when a
+   dentry is noticed to be a child of a dentry which is in the proper
+   prefix. 
+
+b/ A per-superblock list "s_anon" of dentries which are the roots of
+   subtrees that are not in the proper prefix.  These dentries, as
+   well as the proper prefix, need to be released at unmount time.  As
+   these dentries will not be hashed, they are linked together on the
+   d_hash list_head.
+
+c/ Helper routines to allocate anonymous dentries, and to help attach
+   loose directory dentries at lookup time. They are:
+    d_alloc_anon(inode) will return a dentry for the given inode.
+      If the inode already has a dentry, one of those is returned.
+      If it doesn't, a new anonymous (IS_ROOT and
+        DCACHE_DISCONNECTED) dentry is allocated and attached.
+      In the case of a directory, care is taken that only one dentry
+      can ever be attached.
+    d_splice_alias(inode, dentry) will make sure that there is a
+      dentry with the same name and parent as the given dentry, and
+      which refers to the given inode.
+      If the inode is a directory and already has a dentry, then that
+      dentry is d_moved over the given dentry.
+      If the passed dentry gets attached, care is taken that this is
+      mutually exclusive to a d_alloc_anon operation.
+      If the passed dentry is used, NULL is returned, else the used
+      dentry is returned.  This corresponds to the calling pattern of
+      ->lookup.
+  
+ 
+Filesystem Issues
+-----------------
+
+For a filesystem to be exportable it must:
+ 
+   1/ provide the filehandle fragment routines described below.
+   2/ make sure that d_splice_alias is used rather than d_add
+      when ->lookup finds an inode for a given parent and name.
+      Typically the ->lookup routine will end with a:
+
+		return d_splice_alias(inode, dentry);
+	}
+
+
+
+  A file system implementation declares that instances of the filesystem
+are exportable by setting the s_export_op field in the struct
+super_block.  This field must point to a "struct export_operations"
+struct which has the following members:
+
+ encode_fh  (optional)
+    Takes a dentry and creates a filehandle fragment which can later be used
+    to find or create a dentry for the same object.  The default
+    implementation creates a filehandle fragment that encodes a 32bit inode
+    and generation number for the inode encoded, and if necessary the
+    same information for the parent.
+
+  fh_to_dentry (mandatory)
+    Given a filehandle fragment, this should find the implied object and
+    create a dentry for it (possibly with d_alloc_anon).
+
+  fh_to_parent (optional but strongly recommended)
+    Given a filehandle fragment, this should find the parent of the
+    implied object and create a dentry for it (possibly with d_alloc_anon).
+    May fail if the filehandle fragment is too small.
+
+  get_parent (optional but strongly recommended)
+    When given a dentry for a directory, this should return  a dentry for
+    the parent.  Quite possibly the parent dentry will have been allocated
+    by d_alloc_anon.  The default get_parent function just returns an error
+    so any filehandle lookup that requires finding a parent will fail.
+    ->lookup("..") is *not* used as a default as it can leave ".." entries
+    in the dcache which are too messy to work with.
+
+  get_name (optional)
+    When given a parent dentry and a child dentry, this should find a name
+    in the directory identified by the parent dentry, which leads to the
+    object identified by the child dentry.  If no get_name function is
+    supplied, a default implementation is provided which uses vfs_readdir
+    to find potential names, and matches inode numbers to find the correct
+    match.
+
+
+A filehandle fragment consists of an array of 1 or more 4byte words,
+together with a one byte "type".
+The decode_fh routine should not depend on the stated size that is
+passed to it.  This size may be larger than the original filehandle
+generated by encode_fh, in which case it will have been padded with
+nuls.  Rather, the encode_fh routine should choose a "type" which
+indicates the decode_fh how much of the filehandle is valid, and how
+it should be interpreted.
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
new file mode 100644
index 000000000000..e386f7e4bcee
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -0,0 +1,271 @@
+################################################################################
+#									       #
+#				NFS/RDMA README				       #
+#									       #
+################################################################################
+
+ Author: NetApp and Open Grid Computing
+ Date: May 29, 2008
+
+Table of Contents
+~~~~~~~~~~~~~~~~~
+ - Overview
+ - Getting Help
+ - Installation
+ - Check RDMA and NFS Setup
+ - NFS/RDMA Setup
+
+Overview
+~~~~~~~~
+
+  This document describes how to install and setup the Linux NFS/RDMA client
+  and server software.
+
+  The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
+  was first included in the following release, Linux 2.6.25.
+
+  In our testing, we have obtained excellent performance results (full 10Gbit
+  wire bandwidth at minimal client CPU) under many workloads. The code passes
+  the full Connectathon test suite and operates over both Infiniband and iWARP
+  RDMA adapters.
+
+Getting Help
+~~~~~~~~~~~~
+
+  If you get stuck, you can ask questions on the
+
+                nfs-rdma-devel@lists.sourceforge.net
+
+  mailing list.
+
+Installation
+~~~~~~~~~~~~
+
+  These instructions are a step by step guide to building a machine for
+  use with NFS/RDMA.
+
+  - Install an RDMA device
+
+    Any device supported by the drivers in drivers/infiniband/hw is acceptable.
+
+    Testing has been performed using several Mellanox-based IB cards, the
+    Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
+
+  - Install a Linux distribution and tools
+
+    The first kernel release to contain both the NFS/RDMA client and server was
+    Linux 2.6.25  Therefore, a distribution compatible with this and subsequent
+    Linux kernel release should be installed.
+
+    The procedures described in this document have been tested with
+    distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
+
+  - Install nfs-utils-1.1.2 or greater on the client
+
+    An NFS/RDMA mount point can be obtained by using the mount.nfs command in
+    nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
+    version with support for NFS/RDMA mounts, but for various reasons we
+    recommend using nfs-utils-1.1.2 or greater). To see which version of
+    mount.nfs you are using, type:
+
+    $ /sbin/mount.nfs -V
+
+    If the version is less than 1.1.2 or the command does not exist,
+    you should install the latest version of nfs-utils.
+
+    Download the latest package from:
+
+    http://www.kernel.org/pub/linux/utils/nfs
+
+    Uncompress the package and follow the installation instructions.
+
+    If you will not need the idmapper and gssd executables (you do not need
+    these to create an NFS/RDMA enabled mount command), the installation
+    process can be simplified by disabling these features when running
+    configure:
+
+    $ ./configure --disable-gss --disable-nfsv4
+
+    To build nfs-utils you will need the tcp_wrappers package installed. For
+    more information on this see the package's README and INSTALL files.
+
+    After building the nfs-utils package, there will be a mount.nfs binary in
+    the utils/mount directory. This binary can be used to initiate NFS v2, v3,
+    or v4 mounts. To initiate a v4 mount, the binary must be called
+    mount.nfs4.  The standard technique is to create a symlink called
+    mount.nfs4 to mount.nfs.
+
+    This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
+
+    $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
+
+    In this location, mount.nfs will be invoked automatically for NFS mounts
+    by the system mount command.
+
+    NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
+    on the NFS client machine. You do not need this specific version of
+    nfs-utils on the server. Furthermore, only the mount.nfs command from
+    nfs-utils-1.1.2 is needed on the client.
+
+  - Install a Linux kernel with NFS/RDMA
+
+    The NFS/RDMA client and server are both included in the mainline Linux
+    kernel version 2.6.25 and later. This and other versions of the 2.6 Linux
+    kernel can be found at:
+
+    ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
+
+    Download the sources and place them in an appropriate location.
+
+  - Configure the RDMA stack
+
+    Make sure your kernel configuration has RDMA support enabled. Under
+    Device Drivers -> InfiniBand support, update the kernel configuration
+    to enable InfiniBand support [NOTE: the option name is misleading. Enabling
+    InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
+
+    Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
+    iWARP adapter support (amso, cxgb3, etc.).
+
+    If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
+
+  - Configure the NFS client and server
+
+    Your kernel configuration must also have NFS file system support and/or
+    NFS server support enabled. These and other NFS related configuration
+    options can be found under File Systems -> Network File Systems.
+
+  - Build, install, reboot
+
+    The NFS/RDMA code will be enabled automatically if NFS and RDMA
+    are turned on. The NFS/RDMA client and server are configured via the hidden
+    SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
+    value of SUNRPC_XPRT_RDMA will be:
+
+     - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
+       and server will not be built
+     - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
+       in this case the NFS/RDMA client and server will be built as modules
+     - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
+       and server will be built into the kernel
+
+    Therefore, if you have followed the steps above and turned no NFS and RDMA,
+    the NFS/RDMA client and server will be built.
+
+    Build a new kernel, install it, boot it.
+
+Check RDMA and NFS Setup
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Before configuring the NFS/RDMA software, it is a good idea to test
+    your new kernel to ensure that the kernel is working correctly.
+    In particular, it is a good idea to verify that the RDMA stack
+    is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
+    is working properly.
+
+  - Check RDMA Setup
+
+    If you built the RDMA components as modules, load them at
+    this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
+    card:
+
+    $ modprobe ib_mthca
+    $ modprobe ib_ipoib
+
+    If you are using InfiniBand, make sure there is a Subnet Manager (SM)
+    running on the network. If your IB switch has an embedded SM, you can
+    use it. Otherwise, you will need to run an SM, such as OpenSM, on one
+    of your end nodes.
+
+    If an SM is running on your network, you should see the following:
+
+    $ cat /sys/class/infiniband/driverX/ports/1/state
+    4: ACTIVE
+
+    where driverX is mthca0, ipath5, ehca3, etc.
+
+    To further test the InfiniBand software stack, use IPoIB (this
+    assumes you have two IB hosts named host1 and host2):
+
+    host1$ ifconfig ib0 a.b.c.x
+    host2$ ifconfig ib0 a.b.c.y
+    host1$ ping a.b.c.y
+    host2$ ping a.b.c.x
+
+    For other device types, follow the appropriate procedures.
+
+  - Check NFS Setup
+
+    For the NFS components enabled above (client and/or server),
+    test their functionality over standard Ethernet using TCP/IP or UDP/IP.
+
+NFS/RDMA Setup
+~~~~~~~~~~~~~~
+
+  We recommend that you use two machines, one to act as the client and
+  one to act as the server.
+
+  One time configuration:
+
+  - On the server system, configure the /etc/exports file and
+    start the NFS/RDMA server.
+
+    Exports entries with the following formats have been tested:
+
+    /vol0   192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
+    /vol0   192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
+
+    The IP address(es) is(are) the client's IPoIB address for an InfiniBand
+    HCA or the cleint's iWARP address(es) for an RNIC.
+
+    NOTE: The "insecure" option must be used because the NFS/RDMA client does
+    not use a reserved port.
+
+ Each time a machine boots:
+
+  - Load and configure the RDMA drivers
+
+    For InfiniBand using a Mellanox adapter:
+
+    $ modprobe ib_mthca
+    $ modprobe ib_ipoib
+    $ ifconfig ib0 a.b.c.d
+
+    NOTE: use unique addresses for the client and server
+
+  - Start the NFS server
+
+    If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+    kernel config), load the RDMA transport module:
+
+    $ modprobe svcrdma
+
+    Regardless of how the server was built (module or built-in), start the
+    server:
+
+    $ /etc/init.d/nfs start
+
+    or
+
+    $ service nfs start
+
+    Instruct the server to listen on the RDMA transport:
+
+    $ echo rdma 20049 > /proc/fs/nfsd/portlist
+
+  - On the client system
+
+    If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+    kernel config), load the RDMA client module:
+
+    $ modprobe xprtrdma.ko
+
+    Regardless of how the client was built (module or built-in), use this
+    command to mount the NFS/RDMA server:
+
+    $ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
+
+    To verify that the mount is using RDMA, run "cat /proc/mounts" and check
+    the "proto" field for the given mount.
+
+  Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/filesystems/nfs/nfs.txt b/Documentation/filesystems/nfs/nfs.txt
new file mode 100644
index 000000000000..f50f26ce6cd0
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs.txt
@@ -0,0 +1,98 @@
+
+The NFS client
+==============
+
+The NFS version 2 protocol was first documented in RFC1094 (March 1989).
+Since then two more major releases of NFS have been published, with NFSv3
+being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
+2003).
+
+The Linux NFS client currently supports all the above published versions,
+and work is in progress on adding support for minor version 1 of the NFSv4
+protocol.
+
+The purpose of this document is to provide information on some of the
+upcall interfaces that are used in order to provide the NFS client with
+some of the information that it requires in order to fully comply with
+the NFS spec.
+
+The DNS resolver
+================
+
+NFSv4 allows for one server to refer the NFS client to data that has been
+migrated onto another server by means of the special "fs_locations"
+attribute. See
+	http://tools.ietf.org/html/rfc3530#section-6
+and
+	http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
+
+The fs_locations information can take the form of either an ip address and
+a path, or a DNS hostname and a path. The latter requires the NFS client to
+do a DNS lookup in order to mount the new volume, and hence the need for an
+upcall to allow userland to provide this service.
+
+Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
+/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
+
+   (1) The process checks the dns_resolve cache to see if it contains a
+       valid entry. If so, it returns that entry and exits.
+
+   (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
+       (may be changed using the 'nfs.cache_getent' kernel boot parameter)
+       is run, with two arguments:
+		- the cache name, "dns_resolve"
+		- the hostname to resolve
+
+   (3) After looking up the corresponding ip address, the helper script
+       writes the result into the rpc_pipefs pseudo-file
+       '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
+       in the following (text) format:
+
+		"<ip address> <hostname> <ttl>\n"
+
+       Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
+       (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
+       <hostname> is identical to the second argument of the helper
+       script, and <ttl> is the 'time to live' of this cache entry (in
+       units of seconds).
+
+       Note: If <ip address> is invalid, say the string "0", then a negative
+       entry is created, which will cause the kernel to treat the hostname
+       as having no valid DNS translation.
+
+
+
+
+A basic sample /sbin/nfs_cache_getent
+=====================================
+
+#!/bin/bash
+#
+ttl=600
+#
+cut=/usr/bin/cut
+getent=/usr/bin/getent
+rpc_pipefs=/var/lib/nfs/rpc_pipefs
+#
+die()
+{
+	echo "Usage: $0 cache_name entry_name"
+	exit 1
+}
+
+[ $# -lt 2 ] && die
+cachename="$1"
+cache_path=${rpc_pipefs}/cache/${cachename}/channel
+
+case "${cachename}" in
+	dns_resolve)
+		name="$2"
+		result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
+		[ -z "${result}" ] && result="0"
+		;;
+	*)
+		die
+		;;
+esac
+echo "${result} ${name} ${ttl}" >${cache_path}
+
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
new file mode 100644
index 000000000000..1bd0d0c05171
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -0,0 +1,222 @@
+NFSv4.1 Server Implementation
+
+Server support for minorversion 1 can be controlled using the
+/proc/fs/nfsd/versions control file.  The string output returned
+by reading this file will contain either "+4.1" or "-4.1"
+correspondingly.
+
+Currently, server support for minorversion 1 is disabled by default.
+It can be enabled at run time by writing the string "+4.1" to
+the /proc/fs/nfsd/versions control file.  Note that to write this
+control file, the nfsd service must be taken down.  Use your user-mode
+nfs-utils to set this up; see rpc.nfsd(8)
+
+(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
+"-4", respectively.  Therefore, code meant to work on both new and old
+kernels must turn 4.1 on or off *before* turning support for version 4
+on or off; rpc.nfsd does this correctly.)
+
+The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
+on the latest NFSv4.1 Internet Draft:
+http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
+
+From the many new features in NFSv4.1 the current implementation
+focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
+"exactly once" semantics and better control and throttling of the
+resources allocated for each client.
+
+Other NFSv4.1 features, Parallel NFS operations in particular,
+are still under development out of tree.
+See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
+for more information.
+
+The current implementation is intended for developers only: while it
+does support ordinary file operations on clients we have tested against
+(including the linux client), it is incomplete in ways which may limit
+features unexpectedly, cause known bugs in rare cases, or cause
+interoperability problems with future clients.  Known issues:
+
+	- gss support is questionable: currently mounts with kerberos
+	  from a linux client are possible, but we aren't really
+	  conformant with the spec (for example, we don't use kerberos
+	  on the backchannel correctly).
+	- no trunking support: no clients currently take advantage of
+	  trunking, but this is a mandatory feature, and its use is
+	  recommended to clients in a number of places.  (E.g. to ensure
+	  timely renewal in case an existing connection's retry timeouts
+	  have gotten too long; see section 8.3 of the draft.)
+	  Therefore, lack of this feature may cause future clients to
+	  fail.
+	- Incomplete backchannel support: incomplete backchannel gss
+	  support and no support for BACKCHANNEL_CTL mean that
+	  callbacks (hence delegations and layouts) may not be
+	  available and clients confused by the incomplete
+	  implementation may fail.
+	- Server reboot recovery is unsupported; if the server reboots,
+	  clients may fail.
+	- We do not support SSV, which provides security for shared
+	  client-server state (thus preventing unauthorized tampering
+	  with locks and opens, for example).  It is mandatory for
+	  servers to support this, though no clients use it yet.
+	- Mandatory operations which we do not support, such as
+	  DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
+	  TEST_STATEID, are not currently used by clients, but will be
+	  (and the spec recommends their uses in common cases), and
+	  clients should not be expected to know how to recover from the
+	  case where they are not supported.  This will eventually cause
+	  interoperability failures.
+
+In addition, some limitations are inherited from the current NFSv4
+implementation:
+
+	- Incomplete delegation enforcement: if a file is renamed or
+	  unlinked, a client holding a delegation may continue to
+	  indefinitely allow opens of the file under the old name.
+
+The table below, taken from the NFSv4.1 document, lists
+the operations that are mandatory to implement (REQ), optional
+(OPT), and NFSv4.0 operations that are required not to implement (MNI)
+in minor version 1.  The first column indicates the operations that
+are not supported yet by the linux server implementation.
+
+The OPTIONAL features identified and their abbreviations are as follows:
+	pNFS	Parallel NFS
+	FDELG	File Delegations
+	DDELG	Directory Delegations
+
+The following abbreviations indicate the linux server implementation status.
+	I	Implemented NFSv4.1 operations.
+	NS	Not Supported.
+	NS*	unimplemented optional feature.
+	P	pNFS features implemented out of tree.
+	PNS	pNFS features that are not supported yet (out of tree).
+
+Operations
+
+   +----------------------+------------+--------------+----------------+
+   | Operation            | REQ, REC,  | Feature      | Definition     |
+   |                      | OPT, or    | (REQ, REC,   |                |
+   |                      | MNI        | or OPT)      |                |
+   +----------------------+------------+--------------+----------------+
+   | ACCESS               | REQ        |              | Section 18.1   |
+NS | BACKCHANNEL_CTL      | REQ        |              | Section 18.33  |
+NS | BIND_CONN_TO_SESSION | REQ        |              | Section 18.34  |
+   | CLOSE                | REQ        |              | Section 18.2   |
+   | COMMIT               | REQ        |              | Section 18.3   |
+   | CREATE               | REQ        |              | Section 18.4   |
+I  | CREATE_SESSION       | REQ        |              | Section 18.36  |
+NS*| DELEGPURGE           | OPT        | FDELG (REQ)  | Section 18.5   |
+   | DELEGRETURN          | OPT        | FDELG,       | Section 18.6   |
+   |                      |            | DDELG, pNFS  |                |
+   |                      |            | (REQ)        |                |
+NS | DESTROY_CLIENTID     | REQ        |              | Section 18.50  |
+I  | DESTROY_SESSION      | REQ        |              | Section 18.37  |
+I  | EXCHANGE_ID          | REQ        |              | Section 18.35  |
+NS | FREE_STATEID         | REQ        |              | Section 18.38  |
+   | GETATTR              | REQ        |              | Section 18.7   |
+P  | GETDEVICEINFO        | OPT        | pNFS (REQ)   | Section 18.40  |
+P  | GETDEVICELIST        | OPT        | pNFS (OPT)   | Section 18.41  |
+   | GETFH                | REQ        |              | Section 18.8   |
+NS*| GET_DIR_DELEGATION   | OPT        | DDELG (REQ)  | Section 18.39  |
+P  | LAYOUTCOMMIT         | OPT        | pNFS (REQ)   | Section 18.42  |
+P  | LAYOUTGET            | OPT        | pNFS (REQ)   | Section 18.43  |
+P  | LAYOUTRETURN         | OPT        | pNFS (REQ)   | Section 18.44  |
+   | LINK                 | OPT        |              | Section 18.9   |
+   | LOCK                 | REQ        |              | Section 18.10  |
+   | LOCKT                | REQ        |              | Section 18.11  |
+   | LOCKU                | REQ        |              | Section 18.12  |
+   | LOOKUP               | REQ        |              | Section 18.13  |
+   | LOOKUPP              | REQ        |              | Section 18.14  |
+   | NVERIFY              | REQ        |              | Section 18.15  |
+   | OPEN                 | REQ        |              | Section 18.16  |
+NS*| OPENATTR             | OPT        |              | Section 18.17  |
+   | OPEN_CONFIRM         | MNI        |              | N/A            |
+   | OPEN_DOWNGRADE       | REQ        |              | Section 18.18  |
+   | PUTFH                | REQ        |              | Section 18.19  |
+   | PUTPUBFH             | REQ        |              | Section 18.20  |
+   | PUTROOTFH            | REQ        |              | Section 18.21  |
+   | READ                 | REQ        |              | Section 18.22  |
+   | READDIR              | REQ        |              | Section 18.23  |
+   | READLINK             | OPT        |              | Section 18.24  |
+NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
+   | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
+   | REMOVE               | REQ        |              | Section 18.25  |
+   | RENAME               | REQ        |              | Section 18.26  |
+   | RENEW                | MNI        |              | N/A            |
+   | RESTOREFH            | REQ        |              | Section 18.27  |
+   | SAVEFH               | REQ        |              | Section 18.28  |
+   | SECINFO              | REQ        |              | Section 18.29  |
+NS | SECINFO_NO_NAME      | REC        | pNFS files   | Section 18.45, |
+   |                      |            | layout (REQ) | Section 13.12  |
+I  | SEQUENCE             | REQ        |              | Section 18.46  |
+   | SETATTR              | REQ        |              | Section 18.30  |
+   | SETCLIENTID          | MNI        |              | N/A            |
+   | SETCLIENTID_CONFIRM  | MNI        |              | N/A            |
+NS | SET_SSV              | REQ        |              | Section 18.47  |
+NS | TEST_STATEID         | REQ        |              | Section 18.48  |
+   | VERIFY               | REQ        |              | Section 18.31  |
+NS*| WANT_DELEGATION      | OPT        | FDELG (OPT)  | Section 18.49  |
+   | WRITE                | REQ        |              | Section 18.32  |
+
+Callback Operations
+
+   +-------------------------+-----------+-------------+---------------+
+   | Operation               | REQ, REC, | Feature     | Definition    |
+   |                         | OPT, or   | (REQ, REC,  |               |
+   |                         | MNI       | or OPT)     |               |
+   +-------------------------+-----------+-------------+---------------+
+   | CB_GETATTR              | OPT       | FDELG (REQ) | Section 20.1  |
+P  | CB_LAYOUTRECALL         | OPT       | pNFS (REQ)  | Section 20.3  |
+NS*| CB_NOTIFY               | OPT       | DDELG (REQ) | Section 20.4  |
+P  | CB_NOTIFY_DEVICEID      | OPT       | pNFS (OPT)  | Section 20.12 |
+NS*| CB_NOTIFY_LOCK          | OPT       |             | Section 20.11 |
+NS*| CB_PUSH_DELEG           | OPT       | FDELG (OPT) | Section 20.5  |
+   | CB_RECALL               | OPT       | FDELG,      | Section 20.2  |
+   |                         |           | DDELG, pNFS |               |
+   |                         |           | (REQ)       |               |
+NS*| CB_RECALL_ANY           | OPT       | FDELG,      | Section 20.6  |
+   |                         |           | DDELG, pNFS |               |
+   |                         |           | (REQ)       |               |
+NS | CB_RECALL_SLOT          | REQ       |             | Section 20.8  |
+NS*| CB_RECALLABLE_OBJ_AVAIL | OPT       | DDELG, pNFS | Section 20.7  |
+   |                         |           | (REQ)       |               |
+I  | CB_SEQUENCE             | OPT       | FDELG,      | Section 20.9  |
+   |                         |           | DDELG, pNFS |               |
+   |                         |           | (REQ)       |               |
+NS*| CB_WANTS_CANCELLED      | OPT       | FDELG,      | Section 20.10 |
+   |                         |           | DDELG, pNFS |               |
+   |                         |           | (REQ)       |               |
+   +-------------------------+-----------+-------------+---------------+
+
+Implementation notes:
+
+DELEGPURGE:
+* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
+  CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
+  persist across client reboots).  Thus we need not implement this for
+  now.
+
+EXCHANGE_ID:
+* only SP4_NONE state protection supported
+* implementation ids are ignored
+
+CREATE_SESSION:
+* backchannel attributes are ignored
+* backchannel security parameters are ignored
+
+SEQUENCE:
+* no support for dynamic slot table renegotiation (optional)
+
+nfsv4.1 COMPOUND rules:
+The following cases aren't supported yet:
+* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION,
+  DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID.
+* DESTROY_SESSION MUST be the final operation in the COMPOUND request.
+
+Nonstandard compound limitations:
+* No support for a sessions fore channel RPC compound that requires both a
+  ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
+  fail to live up to the promise we made in CREATE_SESSION fore channel
+  negotiation.
+* No more than one IO operation (read, write, readdir) allowed per
+  compound.
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
new file mode 100644
index 000000000000..3ba0b945aaf8
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -0,0 +1,270 @@
+Mounting the root filesystem via NFS (nfsroot)
+===============================================
+
+Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
+Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
+Updated 2006 by Horms <horms@verge.net.au>
+
+
+
+In order to use a diskless system, such as an X-terminal or printer server
+for example, it is necessary for the root filesystem to be present on a
+non-disk device. This may be an initramfs (see Documentation/filesystems/
+ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt) or a
+filesystem mounted via NFS. The following text describes on how to use NFS
+for the root filesystem. For the rest of this text 'client' means the
+diskless system, and 'server' means the NFS server.
+
+
+
+
+1.) Enabling nfsroot capabilities
+    -----------------------------
+
+In order to use nfsroot, NFS client support needs to be selected as
+built-in during configuration. Once this has been selected, the nfsroot
+option will become available, which should also be selected.
+
+In the networking options, kernel level autoconfiguration can be selected,
+along with the types of autoconfiguration to support. Selecting all of
+DHCP, BOOTP and RARP is safe.
+
+
+
+
+2.) Kernel command line
+    -------------------
+
+When the kernel has been loaded by a boot loader (see below) it needs to be
+told what root fs device to use. And in the case of nfsroot, where to find
+both the server and the name of the directory on the server to mount as root.
+This can be established using the following kernel command line parameters:
+
+
+root=/dev/nfs
+
+  This is necessary to enable the pseudo-NFS-device. Note that it's not a
+  real device but just a synonym to tell the kernel to use NFS instead of
+  a real device.
+
+
+nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
+
+  If the `nfsroot' parameter is NOT given on the command line,
+  the default "/tftpboot/%s" will be used.
+
+  <server-ip>	Specifies the IP address of the NFS server.
+		The default address is determined by the `ip' parameter
+		(see below). This parameter allows the use of different
+		servers for IP autoconfiguration and NFS.
+
+  <root-dir>	Name of the directory on the server to mount as root.
+		If there is a "%s" token in the string, it will be
+		replaced by the ASCII-representation of the client's
+		IP address.
+
+  <nfs-options>	Standard NFS options. All options are separated by commas.
+		The following defaults are used:
+			port		= as given by server portmap daemon
+			rsize		= 4096
+			wsize		= 4096
+			timeo		= 7
+			retrans		= 3
+			acregmin	= 3
+			acregmax	= 60
+			acdirmin	= 30
+			acdirmax	= 60
+			flags		= hard, nointr, noposix, cto, ac
+
+
+ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
+
+  This parameter tells the kernel how to configure IP addresses of devices
+  and also how to set up the IP routing table. It was originally called
+  `nfsaddrs', but now the boot-time IP configuration works independently of
+  NFS, so it was renamed to `ip' and the old name remained as an alias for
+  compatibility reasons.
+
+  If this parameter is missing from the kernel command line, all fields are
+  assumed to be empty, and the defaults mentioned below apply. In general
+  this means that the kernel tries to configure everything using
+  autoconfiguration.
+
+  The <autoconf> parameter can appear alone as the value to the `ip'
+  parameter (without all the ':' characters before).  If the value is
+  "ip=off" or "ip=none", no autoconfiguration will take place, otherwise
+  autoconfiguration will take place.  The most common way to use this
+  is "ip=dhcp".
+
+  <client-ip>	IP address of the client.
+
+  		Default:  Determined using autoconfiguration.
+
+  <server-ip>	IP address of the NFS server. If RARP is used to determine
+		the client address and this parameter is NOT empty only
+		replies from the specified server are accepted.
+
+		Only required for NFS root. That is autoconfiguration
+		will not be triggered if it is missing and NFS root is not
+		in operation.
+
+		Default: Determined using autoconfiguration.
+		         The address of the autoconfiguration server is used.
+
+  <gw-ip>	IP address of a gateway if the server is on a different subnet.
+
+		Default: Determined using autoconfiguration.
+
+  <netmask>	Netmask for local network interface. If unspecified
+		the netmask is derived from the client IP address assuming
+		classful addressing.
+
+		Default:  Determined using autoconfiguration.
+
+  <hostname>	Name of the client. May be supplied by autoconfiguration,
+  		but its absence will not trigger autoconfiguration.
+
+  		Default: Client IP address is used in ASCII notation.
+
+  <device>	Name of network device to use.
+
+		Default: If the host only has one device, it is used.
+			 Otherwise the device is determined using
+			 autoconfiguration. This is done by sending
+			 autoconfiguration requests out of all devices,
+			 and using the device that received the first reply.
+
+  <autoconf>	Method to use for autoconfiguration. In the case of options
+                which specify multiple autoconfiguration protocols,
+		requests are sent using all protocols, and the first one
+		to reply is used.
+
+		Only autoconfiguration protocols that have been compiled
+		into the kernel will be used, regardless of the value of
+		this option.
+
+                  off or none: don't use autoconfiguration
+				(do static IP assignment instead)
+		  on or any:   use any protocol available in the kernel
+			       (default)
+		  dhcp:        use DHCP
+		  bootp:       use BOOTP
+		  rarp:        use RARP
+		  both:        use both BOOTP and RARP but not DHCP
+		               (old option kept for backwards compatibility)
+
+                Default: any
+
+
+
+
+3.) Boot Loader
+    ----------
+
+To get the kernel into memory different approaches can be used.
+They depend on various facilities being available:
+
+
+3.1)  Booting from a floppy using syslinux
+
+	When building kernels, an easy way to create a boot floppy that uses
+	syslinux is to use the zdisk or bzdisk make targets which use zimage
+      	and bzimage images respectively. Both targets accept the
+     	FDARGS parameter which can be used to set the kernel command line.
+
+	e.g.
+	   make bzdisk FDARGS="root=/dev/nfs"
+
+   	Note that the user running this command will need to have
+     	access to the floppy drive device, /dev/fd0
+
+     	For more information on syslinux, including how to create bootdisks
+     	for prebuilt kernels, see http://syslinux.zytor.com/
+
+	N.B: Previously it was possible to write a kernel directly to
+	     a floppy using dd, configure the boot device using rdev, and
+	     boot using the resulting floppy. Linux no longer supports this
+	     method of booting.
+
+3.2) Booting from a cdrom using isolinux
+
+     	When building kernels, an easy way to create a bootable cdrom that
+     	uses isolinux is to use the isoimage target which uses a bzimage
+     	image. Like zdisk and bzdisk, this target accepts the FDARGS
+     	parameter which can be used to set the kernel command line.
+
+	e.g.
+	  make isoimage FDARGS="root=/dev/nfs"
+
+     	The resulting iso image will be arch/<ARCH>/boot/image.iso
+     	This can be written to a cdrom using a variety of tools including
+     	cdrecord.
+
+	e.g.
+	  cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso
+
+     	For more information on isolinux, including how to create bootdisks
+     	for prebuilt kernels, see http://syslinux.zytor.com/
+
+3.2) Using LILO
+	When using LILO all the necessary command line parameters may be
+	specified using the 'append=' directive in the LILO configuration
+	file.
+
+	However, to use the 'root=' directive you also need to create
+	a dummy root device, which may be removed after LILO is run.
+
+	mknod /dev/boot255 c 0 255
+
+	For information on configuring LILO, please refer to its documentation.
+
+3.3) Using GRUB
+	When using GRUB, kernel parameter are simply appended after the kernel
+	specification: kernel <kernel> <parameters>
+
+3.4) Using loadlin
+	loadlin may be used to boot Linux from a DOS command prompt without
+	requiring a local hard disk to mount as root. This has not been
+	thoroughly tested by the authors of this document, but in general
+	it should be possible configure the kernel command line similarly
+	to the configuration of LILO.
+
+	Please refer to the loadlin documentation for further information.
+
+3.5) Using a boot ROM
+	This is probably the most elegant way of booting a diskless client.
+	With a boot ROM the kernel is loaded using the TFTP protocol. The
+	authors of this document are not aware of any no commercial boot
+	ROMs that support booting Linux over the network. However, there
+	are two free implementations of a boot ROM, netboot-nfs and
+	etherboot, both of which are available on sunsite.unc.edu, and both
+	of which contain everything you need to boot a diskless Linux client.
+
+3.6) Using pxelinux
+	Pxelinux may be used to boot linux using the PXE boot loader
+	which is present on many modern network cards.
+
+	When using pxelinux, the kernel image is specified using
+	"kernel <relative-path-below /tftpboot>". The nfsroot parameters
+	are passed to the kernel by adding them to the "append" line.
+	It is common to use serial console in conjunction with pxeliunx,
+	see Documentation/serial-console.txt for more information.
+
+	For more information on isolinux, including how to create bootdisks
+	for prebuilt kernels, see http://syslinux.zytor.com/
+
+
+
+
+4.) Credits
+    -------
+
+  The nfsroot code in the kernel and the RARP support have been written
+  by Gero Kuhlmann <gero@gkminix.han.de>.
+
+  The rest of the IP layer autoconfiguration code has been written
+  by Martin Mares <mj@atrey.karlin.mff.cuni.cz>.
+
+  In order to write the initial version of nfsroot I would like to thank
+  Jens-Uwe Mager <jum@anubis.han.de> for his help.
diff --git a/Documentation/filesystems/nfs41-server.txt b/Documentation/filesystems/nfs41-server.txt
deleted file mode 100644
index 1bd0d0c05171..000000000000
--- a/Documentation/filesystems/nfs41-server.txt
+++ /dev/null
@@ -1,222 +0,0 @@
-NFSv4.1 Server Implementation
-
-Server support for minorversion 1 can be controlled using the
-/proc/fs/nfsd/versions control file.  The string output returned
-by reading this file will contain either "+4.1" or "-4.1"
-correspondingly.
-
-Currently, server support for minorversion 1 is disabled by default.
-It can be enabled at run time by writing the string "+4.1" to
-the /proc/fs/nfsd/versions control file.  Note that to write this
-control file, the nfsd service must be taken down.  Use your user-mode
-nfs-utils to set this up; see rpc.nfsd(8)
-
-(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
-"-4", respectively.  Therefore, code meant to work on both new and old
-kernels must turn 4.1 on or off *before* turning support for version 4
-on or off; rpc.nfsd does this correctly.)
-
-The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
-on the latest NFSv4.1 Internet Draft:
-http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
-
-From the many new features in NFSv4.1 the current implementation
-focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
-"exactly once" semantics and better control and throttling of the
-resources allocated for each client.
-
-Other NFSv4.1 features, Parallel NFS operations in particular,
-are still under development out of tree.
-See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
-for more information.
-
-The current implementation is intended for developers only: while it
-does support ordinary file operations on clients we have tested against
-(including the linux client), it is incomplete in ways which may limit
-features unexpectedly, cause known bugs in rare cases, or cause
-interoperability problems with future clients.  Known issues:
-
-	- gss support is questionable: currently mounts with kerberos
-	  from a linux client are possible, but we aren't really
-	  conformant with the spec (for example, we don't use kerberos
-	  on the backchannel correctly).
-	- no trunking support: no clients currently take advantage of
-	  trunking, but this is a mandatory feature, and its use is
-	  recommended to clients in a number of places.  (E.g. to ensure
-	  timely renewal in case an existing connection's retry timeouts
-	  have gotten too long; see section 8.3 of the draft.)
-	  Therefore, lack of this feature may cause future clients to
-	  fail.
-	- Incomplete backchannel support: incomplete backchannel gss
-	  support and no support for BACKCHANNEL_CTL mean that
-	  callbacks (hence delegations and layouts) may not be
-	  available and clients confused by the incomplete
-	  implementation may fail.
-	- Server reboot recovery is unsupported; if the server reboots,
-	  clients may fail.
-	- We do not support SSV, which provides security for shared
-	  client-server state (thus preventing unauthorized tampering
-	  with locks and opens, for example).  It is mandatory for
-	  servers to support this, though no clients use it yet.
-	- Mandatory operations which we do not support, such as
-	  DESTROY_CLIENTID, FREE_STATEID, SECINFO_NO_NAME, and
-	  TEST_STATEID, are not currently used by clients, but will be
-	  (and the spec recommends their uses in common cases), and
-	  clients should not be expected to know how to recover from the
-	  case where they are not supported.  This will eventually cause
-	  interoperability failures.
-
-In addition, some limitations are inherited from the current NFSv4
-implementation:
-
-	- Incomplete delegation enforcement: if a file is renamed or
-	  unlinked, a client holding a delegation may continue to
-	  indefinitely allow opens of the file under the old name.
-
-The table below, taken from the NFSv4.1 document, lists
-the operations that are mandatory to implement (REQ), optional
-(OPT), and NFSv4.0 operations that are required not to implement (MNI)
-in minor version 1.  The first column indicates the operations that
-are not supported yet by the linux server implementation.
-
-The OPTIONAL features identified and their abbreviations are as follows:
-	pNFS	Parallel NFS
-	FDELG	File Delegations
-	DDELG	Directory Delegations
-
-The following abbreviations indicate the linux server implementation status.
-	I	Implemented NFSv4.1 operations.
-	NS	Not Supported.
-	NS*	unimplemented optional feature.
-	P	pNFS features implemented out of tree.
-	PNS	pNFS features that are not supported yet (out of tree).
-
-Operations
-
-   +----------------------+------------+--------------+----------------+
-   | Operation            | REQ, REC,  | Feature      | Definition     |
-   |                      | OPT, or    | (REQ, REC,   |                |
-   |                      | MNI        | or OPT)      |                |
-   +----------------------+------------+--------------+----------------+
-   | ACCESS               | REQ        |              | Section 18.1   |
-NS | BACKCHANNEL_CTL      | REQ        |              | Section 18.33  |
-NS | BIND_CONN_TO_SESSION | REQ        |              | Section 18.34  |
-   | CLOSE                | REQ        |              | Section 18.2   |
-   | COMMIT               | REQ        |              | Section 18.3   |
-   | CREATE               | REQ        |              | Section 18.4   |
-I  | CREATE_SESSION       | REQ        |              | Section 18.36  |
-NS*| DELEGPURGE           | OPT        | FDELG (REQ)  | Section 18.5   |
-   | DELEGRETURN          | OPT        | FDELG,       | Section 18.6   |
-   |                      |            | DDELG, pNFS  |                |
-   |                      |            | (REQ)        |                |
-NS | DESTROY_CLIENTID     | REQ        |              | Section 18.50  |
-I  | DESTROY_SESSION      | REQ        |              | Section 18.37  |
-I  | EXCHANGE_ID          | REQ        |              | Section 18.35  |
-NS | FREE_STATEID         | REQ        |              | Section 18.38  |
-   | GETATTR              | REQ        |              | Section 18.7   |
-P  | GETDEVICEINFO        | OPT        | pNFS (REQ)   | Section 18.40  |
-P  | GETDEVICELIST        | OPT        | pNFS (OPT)   | Section 18.41  |
-   | GETFH                | REQ        |              | Section 18.8   |
-NS*| GET_DIR_DELEGATION   | OPT        | DDELG (REQ)  | Section 18.39  |
-P  | LAYOUTCOMMIT         | OPT        | pNFS (REQ)   | Section 18.42  |
-P  | LAYOUTGET            | OPT        | pNFS (REQ)   | Section 18.43  |
-P  | LAYOUTRETURN         | OPT        | pNFS (REQ)   | Section 18.44  |
-   | LINK                 | OPT        |              | Section 18.9   |
-   | LOCK                 | REQ        |              | Section 18.10  |
-   | LOCKT                | REQ        |              | Section 18.11  |
-   | LOCKU                | REQ        |              | Section 18.12  |
-   | LOOKUP               | REQ        |              | Section 18.13  |
-   | LOOKUPP              | REQ        |              | Section 18.14  |
-   | NVERIFY              | REQ        |              | Section 18.15  |
-   | OPEN                 | REQ        |              | Section 18.16  |
-NS*| OPENATTR             | OPT        |              | Section 18.17  |
-   | OPEN_CONFIRM         | MNI        |              | N/A            |
-   | OPEN_DOWNGRADE       | REQ        |              | Section 18.18  |
-   | PUTFH                | REQ        |              | Section 18.19  |
-   | PUTPUBFH             | REQ        |              | Section 18.20  |
-   | PUTROOTFH            | REQ        |              | Section 18.21  |
-   | READ                 | REQ        |              | Section 18.22  |
-   | READDIR              | REQ        |              | Section 18.23  |
-   | READLINK             | OPT        |              | Section 18.24  |
-NS | RECLAIM_COMPLETE     | REQ        |              | Section 18.51  |
-   | RELEASE_LOCKOWNER    | MNI        |              | N/A            |
-   | REMOVE               | REQ        |              | Section 18.25  |
-   | RENAME               | REQ        |              | Section 18.26  |
-   | RENEW                | MNI        |              | N/A            |
-   | RESTOREFH            | REQ        |              | Section 18.27  |
-   | SAVEFH               | REQ        |              | Section 18.28  |
-   | SECINFO              | REQ        |              | Section 18.29  |
-NS | SECINFO_NO_NAME      | REC        | pNFS files   | Section 18.45, |
-   |                      |            | layout (REQ) | Section 13.12  |
-I  | SEQUENCE             | REQ        |              | Section 18.46  |
-   | SETATTR              | REQ        |              | Section 18.30  |
-   | SETCLIENTID          | MNI        |              | N/A            |
-   | SETCLIENTID_CONFIRM  | MNI        |              | N/A            |
-NS | SET_SSV              | REQ        |              | Section 18.47  |
-NS | TEST_STATEID         | REQ        |              | Section 18.48  |
-   | VERIFY               | REQ        |              | Section 18.31  |
-NS*| WANT_DELEGATION      | OPT        | FDELG (OPT)  | Section 18.49  |
-   | WRITE                | REQ        |              | Section 18.32  |
-
-Callback Operations
-
-   +-------------------------+-----------+-------------+---------------+
-   | Operation               | REQ, REC, | Feature     | Definition    |
-   |                         | OPT, or   | (REQ, REC,  |               |
-   |                         | MNI       | or OPT)     |               |
-   +-------------------------+-----------+-------------+---------------+
-   | CB_GETATTR              | OPT       | FDELG (REQ) | Section 20.1  |
-P  | CB_LAYOUTRECALL         | OPT       | pNFS (REQ)  | Section 20.3  |
-NS*| CB_NOTIFY               | OPT       | DDELG (REQ) | Section 20.4  |
-P  | CB_NOTIFY_DEVICEID      | OPT       | pNFS (OPT)  | Section 20.12 |
-NS*| CB_NOTIFY_LOCK          | OPT       |             | Section 20.11 |
-NS*| CB_PUSH_DELEG           | OPT       | FDELG (OPT) | Section 20.5  |
-   | CB_RECALL               | OPT       | FDELG,      | Section 20.2  |
-   |                         |           | DDELG, pNFS |               |
-   |                         |           | (REQ)       |               |
-NS*| CB_RECALL_ANY           | OPT       | FDELG,      | Section 20.6  |
-   |                         |           | DDELG, pNFS |               |
-   |                         |           | (REQ)       |               |
-NS | CB_RECALL_SLOT          | REQ       |             | Section 20.8  |
-NS*| CB_RECALLABLE_OBJ_AVAIL | OPT       | DDELG, pNFS | Section 20.7  |
-   |                         |           | (REQ)       |               |
-I  | CB_SEQUENCE             | OPT       | FDELG,      | Section 20.9  |
-   |                         |           | DDELG, pNFS |               |
-   |                         |           | (REQ)       |               |
-NS*| CB_WANTS_CANCELLED      | OPT       | FDELG,      | Section 20.10 |
-   |                         |           | DDELG, pNFS |               |
-   |                         |           | (REQ)       |               |
-   +-------------------------+-----------+-------------+---------------+
-
-Implementation notes:
-
-DELEGPURGE:
-* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
-  CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
-  persist across client reboots).  Thus we need not implement this for
-  now.
-
-EXCHANGE_ID:
-* only SP4_NONE state protection supported
-* implementation ids are ignored
-
-CREATE_SESSION:
-* backchannel attributes are ignored
-* backchannel security parameters are ignored
-
-SEQUENCE:
-* no support for dynamic slot table renegotiation (optional)
-
-nfsv4.1 COMPOUND rules:
-The following cases aren't supported yet:
-* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION,
-  DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID.
-* DESTROY_SESSION MUST be the final operation in the COMPOUND request.
-
-Nonstandard compound limitations:
-* No support for a sessions fore channel RPC compound that requires both a
-  ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
-  fail to live up to the promise we made in CREATE_SESSION fore channel
-  negotiation.
-* No more than one IO operation (read, write, readdir) allowed per
-  compound.
diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
deleted file mode 100644
index 3ba0b945aaf8..000000000000
--- a/Documentation/filesystems/nfsroot.txt
+++ /dev/null
@@ -1,270 +0,0 @@
-Mounting the root filesystem via NFS (nfsroot)
-===============================================
-
-Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
-Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
-Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
-Updated 2006 by Horms <horms@verge.net.au>
-
-
-
-In order to use a diskless system, such as an X-terminal or printer server
-for example, it is necessary for the root filesystem to be present on a
-non-disk device. This may be an initramfs (see Documentation/filesystems/
-ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt) or a
-filesystem mounted via NFS. The following text describes on how to use NFS
-for the root filesystem. For the rest of this text 'client' means the
-diskless system, and 'server' means the NFS server.
-
-
-
-
-1.) Enabling nfsroot capabilities
-    -----------------------------
-
-In order to use nfsroot, NFS client support needs to be selected as
-built-in during configuration. Once this has been selected, the nfsroot
-option will become available, which should also be selected.
-
-In the networking options, kernel level autoconfiguration can be selected,
-along with the types of autoconfiguration to support. Selecting all of
-DHCP, BOOTP and RARP is safe.
-
-
-
-
-2.) Kernel command line
-    -------------------
-
-When the kernel has been loaded by a boot loader (see below) it needs to be
-told what root fs device to use. And in the case of nfsroot, where to find
-both the server and the name of the directory on the server to mount as root.
-This can be established using the following kernel command line parameters:
-
-
-root=/dev/nfs
-
-  This is necessary to enable the pseudo-NFS-device. Note that it's not a
-  real device but just a synonym to tell the kernel to use NFS instead of
-  a real device.
-
-
-nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
-
-  If the `nfsroot' parameter is NOT given on the command line,
-  the default "/tftpboot/%s" will be used.
-
-  <server-ip>	Specifies the IP address of the NFS server.
-		The default address is determined by the `ip' parameter
-		(see below). This parameter allows the use of different
-		servers for IP autoconfiguration and NFS.
-
-  <root-dir>	Name of the directory on the server to mount as root.
-		If there is a "%s" token in the string, it will be
-		replaced by the ASCII-representation of the client's
-		IP address.
-
-  <nfs-options>	Standard NFS options. All options are separated by commas.
-		The following defaults are used:
-			port		= as given by server portmap daemon
-			rsize		= 4096
-			wsize		= 4096
-			timeo		= 7
-			retrans		= 3
-			acregmin	= 3
-			acregmax	= 60
-			acdirmin	= 30
-			acdirmax	= 60
-			flags		= hard, nointr, noposix, cto, ac
-
-
-ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
-
-  This parameter tells the kernel how to configure IP addresses of devices
-  and also how to set up the IP routing table. It was originally called
-  `nfsaddrs', but now the boot-time IP configuration works independently of
-  NFS, so it was renamed to `ip' and the old name remained as an alias for
-  compatibility reasons.
-
-  If this parameter is missing from the kernel command line, all fields are
-  assumed to be empty, and the defaults mentioned below apply. In general
-  this means that the kernel tries to configure everything using
-  autoconfiguration.
-
-  The <autoconf> parameter can appear alone as the value to the `ip'
-  parameter (without all the ':' characters before).  If the value is
-  "ip=off" or "ip=none", no autoconfiguration will take place, otherwise
-  autoconfiguration will take place.  The most common way to use this
-  is "ip=dhcp".
-
-  <client-ip>	IP address of the client.
-
-  		Default:  Determined using autoconfiguration.
-
-  <server-ip>	IP address of the NFS server. If RARP is used to determine
-		the client address and this parameter is NOT empty only
-		replies from the specified server are accepted.
-
-		Only required for NFS root. That is autoconfiguration
-		will not be triggered if it is missing and NFS root is not
-		in operation.
-
-		Default: Determined using autoconfiguration.
-		         The address of the autoconfiguration server is used.
-
-  <gw-ip>	IP address of a gateway if the server is on a different subnet.
-
-		Default: Determined using autoconfiguration.
-
-  <netmask>	Netmask for local network interface. If unspecified
-		the netmask is derived from the client IP address assuming
-		classful addressing.
-
-		Default:  Determined using autoconfiguration.
-
-  <hostname>	Name of the client. May be supplied by autoconfiguration,
-  		but its absence will not trigger autoconfiguration.
-
-  		Default: Client IP address is used in ASCII notation.
-
-  <device>	Name of network device to use.
-
-		Default: If the host only has one device, it is used.
-			 Otherwise the device is determined using
-			 autoconfiguration. This is done by sending
-			 autoconfiguration requests out of all devices,
-			 and using the device that received the first reply.
-
-  <autoconf>	Method to use for autoconfiguration. In the case of options
-                which specify multiple autoconfiguration protocols,
-		requests are sent using all protocols, and the first one
-		to reply is used.
-
-		Only autoconfiguration protocols that have been compiled
-		into the kernel will be used, regardless of the value of
-		this option.
-
-                  off or none: don't use autoconfiguration
-				(do static IP assignment instead)
-		  on or any:   use any protocol available in the kernel
-			       (default)
-		  dhcp:        use DHCP
-		  bootp:       use BOOTP
-		  rarp:        use RARP
-		  both:        use both BOOTP and RARP but not DHCP
-		               (old option kept for backwards compatibility)
-
-                Default: any
-
-
-
-
-3.) Boot Loader
-    ----------
-
-To get the kernel into memory different approaches can be used.
-They depend on various facilities being available:
-
-
-3.1)  Booting from a floppy using syslinux
-
-	When building kernels, an easy way to create a boot floppy that uses
-	syslinux is to use the zdisk or bzdisk make targets which use zimage
-      	and bzimage images respectively. Both targets accept the
-     	FDARGS parameter which can be used to set the kernel command line.
-
-	e.g.
-	   make bzdisk FDARGS="root=/dev/nfs"
-
-   	Note that the user running this command will need to have
-     	access to the floppy drive device, /dev/fd0
-
-     	For more information on syslinux, including how to create bootdisks
-     	for prebuilt kernels, see http://syslinux.zytor.com/
-
-	N.B: Previously it was possible to write a kernel directly to
-	     a floppy using dd, configure the boot device using rdev, and
-	     boot using the resulting floppy. Linux no longer supports this
-	     method of booting.
-
-3.2) Booting from a cdrom using isolinux
-
-     	When building kernels, an easy way to create a bootable cdrom that
-     	uses isolinux is to use the isoimage target which uses a bzimage
-     	image. Like zdisk and bzdisk, this target accepts the FDARGS
-     	parameter which can be used to set the kernel command line.
-
-	e.g.
-	  make isoimage FDARGS="root=/dev/nfs"
-
-     	The resulting iso image will be arch/<ARCH>/boot/image.iso
-     	This can be written to a cdrom using a variety of tools including
-     	cdrecord.
-
-	e.g.
-	  cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso
-
-     	For more information on isolinux, including how to create bootdisks
-     	for prebuilt kernels, see http://syslinux.zytor.com/
-
-3.2) Using LILO
-	When using LILO all the necessary command line parameters may be
-	specified using the 'append=' directive in the LILO configuration
-	file.
-
-	However, to use the 'root=' directive you also need to create
-	a dummy root device, which may be removed after LILO is run.
-
-	mknod /dev/boot255 c 0 255
-
-	For information on configuring LILO, please refer to its documentation.
-
-3.3) Using GRUB
-	When using GRUB, kernel parameter are simply appended after the kernel
-	specification: kernel <kernel> <parameters>
-
-3.4) Using loadlin
-	loadlin may be used to boot Linux from a DOS command prompt without
-	requiring a local hard disk to mount as root. This has not been
-	thoroughly tested by the authors of this document, but in general
-	it should be possible configure the kernel command line similarly
-	to the configuration of LILO.
-
-	Please refer to the loadlin documentation for further information.
-
-3.5) Using a boot ROM
-	This is probably the most elegant way of booting a diskless client.
-	With a boot ROM the kernel is loaded using the TFTP protocol. The
-	authors of this document are not aware of any no commercial boot
-	ROMs that support booting Linux over the network. However, there
-	are two free implementations of a boot ROM, netboot-nfs and
-	etherboot, both of which are available on sunsite.unc.edu, and both
-	of which contain everything you need to boot a diskless Linux client.
-
-3.6) Using pxelinux
-	Pxelinux may be used to boot linux using the PXE boot loader
-	which is present on many modern network cards.
-
-	When using pxelinux, the kernel image is specified using
-	"kernel <relative-path-below /tftpboot>". The nfsroot parameters
-	are passed to the kernel by adding them to the "append" line.
-	It is common to use serial console in conjunction with pxeliunx,
-	see Documentation/serial-console.txt for more information.
-
-	For more information on isolinux, including how to create bootdisks
-	for prebuilt kernels, see http://syslinux.zytor.com/
-
-
-
-
-4.) Credits
-    -------
-
-  The nfsroot code in the kernel and the RARP support have been written
-  by Gero Kuhlmann <gero@gkminix.han.de>.
-
-  The rest of the IP layer autoconfiguration code has been written
-  by Martin Mares <mj@atrey.karlin.mff.cuni.cz>.
-
-  In order to write the initial version of nfsroot I would like to thank
-  Jens-Uwe Mager <jum@anubis.han.de> for his help.
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 92b888d540a6..a7e9746ee7ea 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -140,7 +140,7 @@ Callers of notify_change() need ->i_mutex now.
 New super_block field "struct export_operations *s_export_op" for
 explicit support for exporting, e.g. via NFS.  The structure is fully
 documented at its declaration in include/linux/fs.h, and in
-Documentation/filesystems/Exporting.
+Documentation/filesystems/nfs/Exporting.
 
 Briefly it allows for the definition of decode_fh and encode_fh operations
 to encode and decode filehandles, and allows the filesystem to use
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9107b387e91f..dab0f04b4264 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1017,7 +1017,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			No delay
 
 	ip=		[IP_PNP]
-			See Documentation/filesystems/nfsroot.txt.
+			See Documentation/filesystems/nfs/nfsroot.txt.
 
 	ip2=		[HW] Set IO/IRQ pairs for up to 4 IntelliPort boards
 			See comment before ip2_setup() in
@@ -1538,10 +1538,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			going to be removed in 2.6.29.
 
 	nfsaddrs=	[NFS]
-			See Documentation/filesystems/nfsroot.txt.
+			See Documentation/filesystems/nfs/nfsroot.txt.
 
 	nfsroot=	[NFS] nfs root filesystem for disk-less boxes.
-			See Documentation/filesystems/nfsroot.txt.
+			See Documentation/filesystems/nfs/nfsroot.txt.
 
 	nfs.callback_tcpport=
 			[NFS] set the TCP port on which the NFSv4 callback
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 75949d6a5f1b..6177f7cca16a 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
  */
 
  /*
-  * See Documentation/filesystems/Exporting
+  * See Documentation/filesystems/nfs/Exporting
   * and examples in fs/exportfs
   *
   * Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 197c7db583c7..e9e175949a63 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -6,7 +6,7 @@
  * and for mapping back from file handles to dentries.
  *
  * For details on why we do all the strange and hairy things in here
- * take a look at Documentation/filesystems/Exporting.
+ * take a look at Documentation/filesystems/nfs/Exporting.
  */
 #include <linux/exportfs.h>
 #include <linux/fs.h>
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index e81a30593ba9..ed752cb38474 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -9,7 +9,7 @@
  *
  * The following files are helpful:
  *
- *     Documentation/filesystems/Exporting
+ *     Documentation/filesystems/nfs/Exporting
  *     fs/exportfs/expfs.c.
  */
 
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a77bc25d5af..59e5673b4597 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config ROOT_NFS
 	  If you want your system to mount its root file system via NFS,
 	  choose Y here.  This is common practice for managing systems
 	  without local permanent storage.  For details, read
-	  <file:Documentation/filesystems/nfsroot.txt>.
+	  <file:Documentation/filesystems/nfs/nfsroot.txt>.
 
 	  Most people say N here.
 
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 27e772cefb6a..dc12f416a49f 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -97,7 +97,7 @@ struct fid {
  * @get_name:       find the name for a given inode in a given directory
  * @get_parent:     find the parent of a given directory
  *
- * See Documentation/filesystems/Exporting for details on how to use
+ * See Documentation/filesystems/nfs/Exporting for details on how to use
  * this interface correctly.
  *
  * encode_fh:
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 70491d9035eb..0c94a1ac2946 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,7 +166,7 @@ config IP_PNP_DHCP
 
 	  If unsure, say Y. Note that if you want to use DHCP, a DHCP server
 	  must be operating on your network.  Read
-	  <file:Documentation/filesystems/nfsroot.txt> for details.
+	  <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 config IP_PNP_BOOTP
 	bool "IP: BOOTP support"
@@ -181,7 +181,7 @@ config IP_PNP_BOOTP
 	  does BOOTP itself, providing all necessary information on the kernel
 	  command line, you can say N here. If unsure, say Y. Note that if you
 	  want to use BOOTP, a BOOTP server must be operating on your network.
-	  Read <file:Documentation/filesystems/nfsroot.txt> for details.
+	  Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 config IP_PNP_RARP
 	bool "IP: RARP support"
@@ -194,7 +194,7 @@ config IP_PNP_RARP
 	  older protocol which is being obsoleted by BOOTP and DHCP), say Y
 	  here. Note that if you want to use RARP, a RARP server must be
 	  operating on your network. Read
-	  <file:Documentation/filesystems/nfsroot.txt> for details.
+	  <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
 
 # not yet ready..
 #   bool '    IP: ARP support' CONFIG_IP_PNP_ARP
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f8d04c256454..7dcbf4706099 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1447,7 +1447,7 @@ late_initcall(ip_auto_config);
 
 /*
  *  Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
- *  command line parameter.  See Documentation/filesystems/nfsroot.txt.
+ *  command line parameter.  See Documentation/filesystems/nfs/nfsroot.txt.
  */
 static int __init ic_proto_name(char *name)
 {
-- 
cgit v1.2.3


From c017b4be3e84176cab10eca5e6c4faeb8cfc6f3e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 28 Oct 2009 13:33:09 +0000
Subject: kmemleak: Simplify the kmemleak_scan_area() function prototype

This function was taking non-necessary arguments which can be determined
by kmemleak. The patch also modifies the calling sites.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/kmemleak.h |  6 ++----
 kernel/module.c          |  7 ++-----
 mm/kmemleak.c            | 49 +++++++++++++++++++++---------------------------
 mm/slab.c                |  4 ++--
 4 files changed, 27 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 3c7497d46ee9..99d9a6766f7e 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -32,8 +32,7 @@ extern void kmemleak_padding(const void *ptr, unsigned long offset,
 			     size_t size) __ref;
 extern void kmemleak_not_leak(const void *ptr) __ref;
 extern void kmemleak_ignore(const void *ptr) __ref;
-extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
-			       size_t length, gfp_t gfp) __ref;
+extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
 extern void kmemleak_no_scan(const void *ptr) __ref;
 
 static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
@@ -84,8 +83,7 @@ static inline void kmemleak_not_leak(const void *ptr)
 static inline void kmemleak_ignore(const void *ptr)
 {
 }
-static inline void kmemleak_scan_area(const void *ptr, unsigned long offset,
-				      size_t length, gfp_t gfp)
+static inline void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 {
 }
 static inline void kmemleak_erase(void **ptr)
diff --git a/kernel/module.c b/kernel/module.c
index 8b7d8805819d..1eb952097077 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2043,9 +2043,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
 	unsigned int i;
 
 	/* only scan the sections containing data */
-	kmemleak_scan_area(mod->module_core, (unsigned long)mod -
-			   (unsigned long)mod->module_core,
-			   sizeof(struct module), GFP_KERNEL);
+	kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
 
 	for (i = 1; i < hdr->e_shnum; i++) {
 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -2054,8 +2052,7 @@ static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr,
 		    && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0)
 			continue;
 
-		kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr -
-				   (unsigned long)mod->module_core,
+		kmemleak_scan_area((void *)sechdrs[i].sh_addr,
 				   sechdrs[i].sh_size, GFP_KERNEL);
 	}
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8bf765c4f58d..96106358e042 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -119,8 +119,8 @@
 /* scanning area inside a memory block */
 struct kmemleak_scan_area {
 	struct hlist_node node;
-	unsigned long offset;
-	size_t length;
+	unsigned long start;
+	size_t size;
 };
 
 #define KMEMLEAK_GREY	0
@@ -241,8 +241,6 @@ struct early_log {
 	const void *ptr;		/* allocated/freed memory block */
 	size_t size;			/* memory block size */
 	int min_count;			/* minimum reference count */
-	unsigned long offset;		/* scan area offset */
-	size_t length;			/* scan area length */
 	unsigned long trace[MAX_TRACE];	/* stack trace */
 	unsigned int trace_len;		/* stack trace length */
 };
@@ -720,14 +718,13 @@ static void make_black_object(unsigned long ptr)
  * Add a scanning area to the object. If at least one such area is added,
  * kmemleak will only scan these ranges rather than the whole memory block.
  */
-static void add_scan_area(unsigned long ptr, unsigned long offset,
-			  size_t length, gfp_t gfp)
+static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
 {
 	unsigned long flags;
 	struct kmemleak_object *object;
 	struct kmemleak_scan_area *area;
 
-	object = find_and_get_object(ptr, 0);
+	object = find_and_get_object(ptr, 1);
 	if (!object) {
 		kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n",
 			      ptr);
@@ -741,7 +738,7 @@ static void add_scan_area(unsigned long ptr, unsigned long offset,
 	}
 
 	spin_lock_irqsave(&object->lock, flags);
-	if (offset + length > object->size) {
+	if (ptr + size > object->pointer + object->size) {
 		kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr);
 		dump_object_info(object);
 		kmem_cache_free(scan_area_cache, area);
@@ -749,8 +746,8 @@ static void add_scan_area(unsigned long ptr, unsigned long offset,
 	}
 
 	INIT_HLIST_NODE(&area->node);
-	area->offset = offset;
-	area->length = length;
+	area->start = ptr;
+	area->size = size;
 
 	hlist_add_head(&area->node, &object->area_list);
 out_unlock:
@@ -786,7 +783,7 @@ static void object_no_scan(unsigned long ptr)
  * processed later once kmemleak is fully initialized.
  */
 static void __init log_early(int op_type, const void *ptr, size_t size,
-			     int min_count, unsigned long offset, size_t length)
+			     int min_count)
 {
 	unsigned long flags;
 	struct early_log *log;
@@ -808,8 +805,6 @@ static void __init log_early(int op_type, const void *ptr, size_t size,
 	log->ptr = ptr;
 	log->size = size;
 	log->min_count = min_count;
-	log->offset = offset;
-	log->length = length;
 	if (op_type == KMEMLEAK_ALLOC)
 		log->trace_len = __save_stack_trace(log->trace);
 	crt_early_log++;
@@ -858,7 +853,7 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		create_object((unsigned long)ptr, size, min_count, gfp);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0);
+		log_early(KMEMLEAK_ALLOC, ptr, size, min_count);
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc);
 
@@ -873,7 +868,7 @@ void __ref kmemleak_free(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		delete_object_full((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_FREE, ptr, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free);
 
@@ -888,7 +883,7 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		delete_object_part((unsigned long)ptr, size);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
+		log_early(KMEMLEAK_FREE_PART, ptr, size, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_part);
 
@@ -903,7 +898,7 @@ void __ref kmemleak_not_leak(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		make_gray_object((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_not_leak);
 
@@ -919,22 +914,21 @@ void __ref kmemleak_ignore(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		make_black_object((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_IGNORE, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_ignore);
 
 /*
  * Limit the range to be scanned in an allocated memory block.
  */
-void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
-			      size_t length, gfp_t gfp)
+void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
-		add_scan_area((unsigned long)ptr, offset, length, gfp);
+		add_scan_area((unsigned long)ptr, size, gfp);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length);
+		log_early(KMEMLEAK_SCAN_AREA, ptr, size, 0);
 }
 EXPORT_SYMBOL(kmemleak_scan_area);
 
@@ -948,7 +942,7 @@ void __ref kmemleak_no_scan(const void *ptr)
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
 		object_no_scan((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
-		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0);
+		log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0);
 }
 EXPORT_SYMBOL(kmemleak_no_scan);
 
@@ -1075,9 +1069,9 @@ static void scan_object(struct kmemleak_object *object)
 		}
 	} else
 		hlist_for_each_entry(area, elem, &object->area_list, node)
-			scan_block((void *)(object->pointer + area->offset),
-				   (void *)(object->pointer + area->offset
-					    + area->length), object, 0);
+			scan_block((void *)area->start,
+				   (void *)(area->start + area->size),
+				   object, 0);
 out:
 	spin_unlock_irqrestore(&object->lock, flags);
 }
@@ -1642,8 +1636,7 @@ void __init kmemleak_init(void)
 			kmemleak_ignore(log->ptr);
 			break;
 		case KMEMLEAK_SCAN_AREA:
-			kmemleak_scan_area(log->ptr, log->offset, log->length,
-					   GFP_KERNEL);
+			kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
 			break;
 		case KMEMLEAK_NO_SCAN:
 			kmemleak_no_scan(log->ptr);
diff --git a/mm/slab.c b/mm/slab.c
index 646db3085193..d2713a944ebd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2584,8 +2584,8 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
 		 * kmemleak does not treat the ->s_mem pointer as a reference
 		 * to the object. Otherwise we will not report the leak.
 		 */
-		kmemleak_scan_area(slabp, offsetof(struct slab, list),
-				   sizeof(struct list_head), local_flags);
+		kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
+				   local_flags);
 		if (!slabp)
 			return NULL;
 	} else {
-- 
cgit v1.2.3


From dd17c8f72993f9461e9c19250e3f155d6d99df22 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 29 Oct 2009 22:34:15 +0900
Subject: percpu: remove per_cpu__ prefix.

Now that the return from alloc_percpu is compatible with the address
of per-cpu vars, it makes sense to hand around the address of per-cpu
variables.  To make this sane, we remove the per_cpu__ prefix we used
created to stop people accidentally using these vars directly.

Now we have sparse, we can use that (next patch).

tj: * Updated to convert stuff which were missed by or added after the
      original patch.

    * Kill per_cpu_var() macro.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
---
 arch/blackfin/mach-common/entry.S       |  4 ++--
 arch/cris/arch-v10/kernel/entry.S       |  2 +-
 arch/cris/arch-v32/mm/mmu.S             |  2 +-
 arch/ia64/include/asm/percpu.h          |  4 ++--
 arch/ia64/kernel/ia64_ksyms.c           |  4 ++--
 arch/ia64/mm/discontig.c                |  2 +-
 arch/microblaze/include/asm/entry.h     |  2 +-
 arch/parisc/lib/fixup.S                 |  8 +++----
 arch/powerpc/platforms/pseries/hvCall.S |  2 +-
 arch/sparc/kernel/nmi.c                 |  6 +++---
 arch/sparc/kernel/rtrap_64.S            |  8 +++----
 arch/x86/include/asm/percpu.h           | 37 +++++++++++++++------------------
 arch/x86/include/asm/system.h           |  8 +++----
 arch/x86/kernel/apic/nmi.c              |  6 +++---
 arch/x86/kernel/head_32.S               |  6 +++---
 arch/x86/kernel/vmlinux.lds.S           |  4 ++--
 arch/x86/xen/xen-asm_32.S               |  4 ++--
 include/asm-generic/percpu.h            | 12 +++++------
 include/linux/percpu-defs.h             | 18 ++++++----------
 include/linux/percpu.h                  |  5 ++---
 include/linux/vmstat.h                  |  8 +++----
 kernel/rcutorture.c                     |  8 +++----
 kernel/trace/trace.c                    |  6 +++---
 kernel/trace/trace_functions_graph.c    |  4 ++--
 24 files changed, 80 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 1e7cac23e25f..a3ea7e9fe43b 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -835,8 +835,8 @@ ENDPROC(_resume)
 
 ENTRY(_ret_from_exception)
 #ifdef CONFIG_IPIPE
-	p2.l = _per_cpu__ipipe_percpu_domain;
-	p2.h = _per_cpu__ipipe_percpu_domain;
+	p2.l = _ipipe_percpu_domain;
+	p2.h = _ipipe_percpu_domain;
 	r0.l = _ipipe_root;
 	r0.h = _ipipe_root;
 	r2 = [p2];
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 2c18d08cd913..c52bef39e250 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -358,7 +358,7 @@ mmu_bus_fault:
 1:	btstq	12, $r1		   ; Refill?
 	bpl	2f
 	lsrq	24, $r1     ; Get PGD index (bit 24-31)
-	move.d  [per_cpu__current_pgd], $r0 ; PGD for the current process
+	move.d  [current_pgd], $r0 ; PGD for the current process
 	move.d	[$r0+$r1.d], $r0   ; Get PMD
 	beq	2f
 	nop
diff --git a/arch/cris/arch-v32/mm/mmu.S b/arch/cris/arch-v32/mm/mmu.S
index 2238d154bde3..f125d912e140 100644
--- a/arch/cris/arch-v32/mm/mmu.S
+++ b/arch/cris/arch-v32/mm/mmu.S
@@ -115,7 +115,7 @@
 #ifdef CONFIG_SMP
 	move    $s7, $acr	; PGD
 #else
-	move.d  per_cpu__current_pgd, $acr ; PGD
+	move.d  current_pgd, $acr ; PGD
 #endif
 	; Look up PMD in PGD
 	lsrq	24, $r0	; Get PMD index into PGD (bit 24-31)
diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 30cf46534dd2..f7c00a5e0e2b 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h
@@ -9,7 +9,7 @@
 #define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
 
 #ifdef __ASSEMBLY__
-# define THIS_CPU(var)	(per_cpu__##var)  /* use this to mark accesses to per-CPU variables... */
+# define THIS_CPU(var)	(var)  /* use this to mark accesses to per-CPU variables... */
 #else /* !__ASSEMBLY__ */
 
 
@@ -39,7 +39,7 @@ extern void *per_cpu_init(void);
  * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
  * more efficient.
  */
-#define __ia64_per_cpu_var(var)	per_cpu__##var
+#define __ia64_per_cpu_var(var)	var
 
 #include <asm-generic/percpu.h>
 
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 461b99902bf6..7f4a0ed24152 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -30,9 +30,9 @@ EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic
 #endif
 
 #include <asm/processor.h>
-EXPORT_SYMBOL(per_cpu__ia64_cpu_info);
+EXPORT_SYMBOL(ia64_cpu_info);
 #ifdef CONFIG_SMP
-EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
+EXPORT_SYMBOL(local_per_cpu_offset);
 #endif
 
 #include <asm/uaccess.h>
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 19c4b2195dce..8d586d1e2515 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -459,7 +459,7 @@ static void __init initialize_pernode_data(void)
 		cpu = 0;
 		node = node_cpuid[cpu].nid;
 		cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
-			((char *)&per_cpu__ia64_cpu_info - __per_cpu_start));
+			((char *)&ia64_cpu_info - __per_cpu_start));
 		cpu0_cpu_info->node_data = mem_data[node].node_data;
 	}
 #endif /* CONFIG_SMP */
diff --git a/arch/microblaze/include/asm/entry.h b/arch/microblaze/include/asm/entry.h
index 61abbd232640..ec89f2ad0fe1 100644
--- a/arch/microblaze/include/asm/entry.h
+++ b/arch/microblaze/include/asm/entry.h
@@ -21,7 +21,7 @@
  * places
  */
 
-#define PER_CPU(var) per_cpu__##var
+#define PER_CPU(var) var
 
 # ifndef __ASSEMBLY__
 DECLARE_PER_CPU(unsigned int, KSP); /* Saved kernel stack pointer */
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
index d172d4245cdc..f8c45cc2947d 100644
--- a/arch/parisc/lib/fixup.S
+++ b/arch/parisc/lib/fixup.S
@@ -36,8 +36,8 @@
 #endif
 	/* t2 = &__per_cpu_offset[smp_processor_id()]; */
 	LDREGX \t2(\t1),\t2 
-	addil LT%per_cpu__exception_data,%r27
-	LDREG RT%per_cpu__exception_data(%r1),\t1
+	addil LT%exception_data,%r27
+	LDREG RT%exception_data(%r1),\t1
 	/* t1 = &__get_cpu_var(exception_data) */
 	add,l \t1,\t2,\t1
 	/* t1 = t1->fault_ip */
@@ -46,8 +46,8 @@
 #else
 	.macro  get_fault_ip t1 t2
 	/* t1 = &__get_cpu_var(exception_data) */
-	addil LT%per_cpu__exception_data,%r27
-	LDREG RT%per_cpu__exception_data(%r1),\t2
+	addil LT%exception_data,%r27
+	LDREG RT%exception_data(%r1),\t2
 	/* t1 = t2->fault_ip */
 	LDREG EXCDATA_IP(\t2), \t1
 	.endm
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c1427b3634ec..580f789cae7f 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -55,7 +55,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
 	/* calculate address of stat structure r4 = opcode */	\
 	srdi	r4,r4,2;		/* index into array */	\
 	mulli	r4,r4,HCALL_STAT_SIZE;				\
-	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
+	LOAD_REG_ADDR(r7, hcall_stats);				\
 	add	r4,r4,r7;					\
 	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
 	add	r4,r4,r7;					\
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index f30f4a1ead23..2ad288ff99a4 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -112,13 +112,13 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
 		touched = 1;
 	}
 	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
-		__this_cpu_inc(per_cpu_var(alert_counter));
-		if (__this_cpu_read(per_cpu_var(alert_counter)) == 30 * nmi_hz)
+		__this_cpu_inc(alert_counter);
+		if (__this_cpu_read(alert_counter) == 30 * nmi_hz)
 			die_nmi("BUG: NMI Watchdog detected LOCKUP",
 				regs, panic_on_timeout);
 	} else {
 		__get_cpu_var(last_irq_sum) = sum;
-		__this_cpu_write(per_cpu_var(alert_counter), 0);
+		__this_cpu_write(alert_counter, 0);
 	}
 	if (__get_cpu_var(wd_enabled)) {
 		write_pic(picl_value(nmi_hz));
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index fd3cee4d117c..1ddec403f512 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -149,11 +149,11 @@ rtrap_nmi:	ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
 rtrap_irq:
 rtrap:
 #ifndef CONFIG_SMP
-		sethi			%hi(per_cpu____cpu_data), %l0
-		lduw			[%l0 + %lo(per_cpu____cpu_data)], %l1
+		sethi			%hi(__cpu_data), %l0
+		lduw			[%l0 + %lo(__cpu_data)], %l1
 #else
-		sethi			%hi(per_cpu____cpu_data), %l0
-		or			%l0, %lo(per_cpu____cpu_data), %l0
+		sethi			%hi(__cpu_data), %l0
+		or			%l0, %lo(__cpu_data), %l0
 		lduw			[%l0 + %g5], %l1
 #endif
 		cmp			%l1, 0
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0c44196b78ac..4c170ccc72ed 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -25,19 +25,18 @@
  */
 #ifdef CONFIG_SMP
 #define PER_CPU(var, reg)						\
-	__percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg;	\
-	lea per_cpu__##var(reg), reg
-#define PER_CPU_VAR(var)	%__percpu_seg:per_cpu__##var
+	__percpu_mov_op %__percpu_seg:this_cpu_off, reg;		\
+	lea var(reg), reg
+#define PER_CPU_VAR(var)	%__percpu_seg:var
 #else /* ! SMP */
-#define PER_CPU(var, reg)						\
-	__percpu_mov_op $per_cpu__##var, reg
-#define PER_CPU_VAR(var)	per_cpu__##var
+#define PER_CPU(var, reg)	__percpu_mov_op $var, reg
+#define PER_CPU_VAR(var)	var
 #endif	/* SMP */
 
 #ifdef CONFIG_X86_64_SMP
 #define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
 #else
-#define INIT_PER_CPU_VAR(var)  per_cpu__##var
+#define INIT_PER_CPU_VAR(var)  var
 #endif
 
 #else /* ...!ASSEMBLY */
@@ -60,12 +59,12 @@
  * There also must be an entry in vmlinux_64.lds.S
  */
 #define DECLARE_INIT_PER_CPU(var) \
-       extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
+       extern typeof(var) init_per_cpu_var(var)
 
 #ifdef CONFIG_X86_64_SMP
 #define init_per_cpu_var(var)  init_per_cpu__##var
 #else
-#define init_per_cpu_var(var)  per_cpu_var(var)
+#define init_per_cpu_var(var)  var
 #endif
 
 /* For arch-specific code, we can use direct single-insn ops (they
@@ -142,16 +141,14 @@ do {							\
  * per-thread variables implemented as per-cpu variables and thus
  * stable for the duration of the respective task.
  */
-#define percpu_read(var)	percpu_from_op("mov", per_cpu__##var,	\
-					       "m" (per_cpu__##var))
-#define percpu_read_stable(var)	percpu_from_op("mov", per_cpu__##var,	\
-					       "p" (&per_cpu__##var))
-#define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
-#define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
-#define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
-#define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
-#define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)
-#define percpu_xor(var, val)	percpu_to_op("xor", per_cpu__##var, val)
+#define percpu_read(var)		percpu_from_op("mov", var, "m" (var))
+#define percpu_read_stable(var)		percpu_from_op("mov", var, "p" (&(var)))
+#define percpu_write(var, val)		percpu_to_op("mov", var, val)
+#define percpu_add(var, val)		percpu_to_op("add", var, val)
+#define percpu_sub(var, val)		percpu_to_op("sub", var, val)
+#define percpu_and(var, val)		percpu_to_op("and", var, val)
+#define percpu_or(var, val)		percpu_to_op("or", var, val)
+#define percpu_xor(var, val)		percpu_to_op("xor", var, val)
 
 #define __this_cpu_read_1(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
 #define __this_cpu_read_2(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
@@ -236,7 +233,7 @@ do {							\
 ({									\
 	int old__;							\
 	asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"		\
-		     : "=r" (old__), "+m" (per_cpu__##var)		\
+		     : "=r" (old__), "+m" (var)				\
 		     : "dIr" (bit));					\
 	old__;								\
 })
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index f08f97374892..de10c19d9558 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -31,7 +31,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
 	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
 #define __switch_canary_oparam						\
-	, [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
+	, [stack_canary] "=m" (stack_canary.canary)
 #define __switch_canary_iparam						\
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
@@ -113,7 +113,7 @@ do {									\
 	"movq %P[task_canary](%%rsi),%%r8\n\t"				  \
 	"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
 #define __switch_canary_oparam						  \
-	, [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+	, [gs_canary] "=m" (irq_stack_union.stack_canary)
 #define __switch_canary_iparam						  \
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
@@ -134,7 +134,7 @@ do {									\
 	     __switch_canary						  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
 	     "movq %%rax,%%rdi\n\t" 					  \
-	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"	  \
+	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"		  \
 	     "jnz   ret_from_fork\n\t"					  \
 	     RESTORE_CONTEXT						  \
 	     : "=a" (last)					  	  \
@@ -144,7 +144,7 @@ do {									\
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [_tif_fork] "i" (_TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-	       [current_task] "m" (per_cpu_var(current_task))		  \
+	       [current_task] "m" (current_task)			  \
 	       __switch_canary_iparam					  \
 	     : "memory", "cc" __EXTRA_CLOBBER)
 #endif
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index e631cc4416f7..45404379d173 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -437,8 +437,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 		 * Ayiee, looks like this CPU is stuck ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
 		 */
-		__this_cpu_inc(per_cpu_var(alert_counter));
-		if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
+		__this_cpu_inc(alert_counter);
+		if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
 			/*
 			 * die_nmi will return ONLY if NOTIFY_STOP happens..
 			 */
@@ -446,7 +446,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 				regs, panic_on_timeout);
 	} else {
 		__get_cpu_var(last_irq_sum) = sum;
-		__this_cpu_write(per_cpu_var(alert_counter), 0);
+		__this_cpu_write(alert_counter, 0);
 	}
 
 	/* see if the nmi watchdog went off */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 050c278481b1..fd39eaf83b84 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -438,8 +438,8 @@ is386:	movl $2,%ecx		# set MP
 	 */
 	cmpb $0,ready
 	jne 1f
-	movl $per_cpu__gdt_page,%eax
-	movl $per_cpu__stack_canary,%ecx
+	movl $gdt_page,%eax
+	movl $stack_canary,%ecx
 	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
 	shrl $16, %ecx
 	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -702,7 +702,7 @@ idt_descr:
 	.word 0				# 32 bit align gdt_desc.address
 ENTRY(early_gdt_descr)
 	.word GDT_ENTRIES*8-1
-	.long per_cpu__gdt_page		/* Overwritten for secondary CPUs */
+	.long gdt_page			/* Overwritten for secondary CPUs */
 
 /*
  * The boot_gdt must mirror the equivalent in setup.S and is
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 92929fb3f9fa..ecb92717c412 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -312,7 +312,7 @@ SECTIONS
  * Per-cpu symbols which need to be offset from __per_cpu_load
  * for the boot processor.
  */
-#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
 INIT_PER_CPU(gdt_page);
 INIT_PER_CPU(irq_stack_union);
 
@@ -323,7 +323,7 @@ INIT_PER_CPU(irq_stack_union);
 	   "kernel image bigger than KERNEL_IMAGE_SIZE");
 
 #ifdef CONFIG_SMP
-. = ASSERT((per_cpu__irq_stack_union == 0),
+. = ASSERT((irq_stack_union == 0),
            "irq_stack_union is not at start of per-cpu area");
 #endif
 
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 88e15deb8b82..22a2093b5862 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -90,9 +90,9 @@ ENTRY(xen_iret)
 	GET_THREAD_INFO(%eax)
 	movl TI_cpu(%eax), %eax
 	movl __per_cpu_offset(,%eax,4), %eax
-	mov per_cpu__xen_vcpu(%eax), %eax
+	mov xen_vcpu(%eax), %eax
 #else
-	movl per_cpu__xen_vcpu, %eax
+	movl xen_vcpu, %eax
 #endif
 
 	/* check IF state we're restoring */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 8087b90d4673..ca6f0491412b 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -50,11 +50,11 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
  * offset.
  */
 #define per_cpu(var, cpu) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))
+	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
 #define __get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
+	(*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
 #define __raw_get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
+	(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
 
 #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
 #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
@@ -66,9 +66,9 @@ extern void setup_per_cpu_areas(void);
 
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu_var(var)))
-#define __get_cpu_var(var)			per_cpu_var(var)
-#define __raw_get_cpu_var(var)			per_cpu_var(var)
+#define per_cpu(var, cpu)			(*((void)(cpu), &(var)))
+#define __get_cpu_var(var)			(var)
+#define __raw_get_cpu_var(var)			(var)
 #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
 #define __this_cpu_ptr(ptr) this_cpu_ptr(ptr)
 
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 5a5d6ce4bd55..ee99f6c2cdcd 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -1,12 +1,6 @@
 #ifndef _LINUX_PERCPU_DEFS_H
 #define _LINUX_PERCPU_DEFS_H
 
-/*
- * Determine the real variable name from the name visible in the
- * kernel sources.
- */
-#define per_cpu_var(var) per_cpu__##var
-
 /*
  * Base implementations of per-CPU variable declarations and definitions, where
  * the section in which the variable is to be placed is provided by the
@@ -56,24 +50,24 @@
  */
 #define DECLARE_PER_CPU_SECTION(type, name, sec)			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name;		\
-	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+	extern __PCPU_ATTRS(sec) __typeof__(type) name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;		\
 	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak			\
-	__typeof__(type) per_cpu__##name
+	__typeof__(type) name
 #else
 /*
  * Normal declaration and definition macros.
  */
 #define DECLARE_PER_CPU_SECTION(type, name, sec)			\
-	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+	extern __PCPU_ATTRS(sec) __typeof__(type) name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES			\
-	__typeof__(type) per_cpu__##name
+	__typeof__(type) name
 #endif
 
 /*
@@ -137,8 +131,8 @@
 /*
  * Intermodule exports for per-CPU variables.
  */
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var)
 
 
 #endif /* _LINUX_PERCPU_DEFS_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 522f421ec213..e12410e55e05 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -182,7 +182,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr)
 #ifndef percpu_read
 # define percpu_read(var)						\
   ({									\
-	typeof(per_cpu_var(var)) __tmp_var__;				\
+	typeof(var) __tmp_var__;					\
 	__tmp_var__ = get_cpu_var(var);					\
 	put_cpu_var(var);						\
 	__tmp_var__;							\
@@ -253,8 +253,7 @@ do {									\
 
 /*
  * Optimized manipulation for memory allocated through the per cpu
- * allocator or for addresses of per cpu variables (can be determined
- * using per_cpu_var(xx).
+ * allocator or for addresses of per cpu variables.
  *
  * These operation guarantee exclusivity of access for other operations
  * on the *same* processor. The assumption is that per cpu data is only
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d85889710f9b..3e489fda11a1 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -76,22 +76,22 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
 
 static inline void __count_vm_event(enum vm_event_item item)
 {
-	__this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+	__this_cpu_inc(vm_event_states.event[item]);
 }
 
 static inline void count_vm_event(enum vm_event_item item)
 {
-	this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+	this_cpu_inc(vm_event_states.event[item]);
 }
 
 static inline void __count_vm_events(enum vm_event_item item, long delta)
 {
-	__this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+	__this_cpu_add(vm_event_states.event[item], delta);
 }
 
 static inline void count_vm_events(enum vm_event_item item, long delta)
 {
-	this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+	this_cpu_add(vm_event_states.event[item], delta);
 }
 
 extern void all_vm_events(unsigned long *);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 178967b6434e..e339ab349121 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -731,13 +731,13 @@ static void rcu_torture_timer(unsigned long unused)
 		/* Should not happen, but... */
 		pipe_count = RCU_TORTURE_PIPE_LEN;
 	}
-	__this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
+	__this_cpu_inc(rcu_torture_count[pipe_count]);
 	completed = cur_ops->completed() - completed;
 	if (completed > RCU_TORTURE_PIPE_LEN) {
 		/* Should not happen, but... */
 		completed = RCU_TORTURE_PIPE_LEN;
 	}
-	__this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
+	__this_cpu_inc(rcu_torture_batch[completed]);
 	preempt_enable();
 	cur_ops->readunlock(idx);
 }
@@ -786,13 +786,13 @@ rcu_torture_reader(void *arg)
 			/* Should not happen, but... */
 			pipe_count = RCU_TORTURE_PIPE_LEN;
 		}
-		__this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
+		__this_cpu_inc(rcu_torture_count[pipe_count]);
 		completed = cur_ops->completed() - completed;
 		if (completed > RCU_TORTURE_PIPE_LEN) {
 			/* Should not happen, but... */
 			completed = RCU_TORTURE_PIPE_LEN;
 		}
-		__this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
+		__this_cpu_inc(rcu_torture_batch[completed]);
 		preempt_enable();
 		cur_ops->readunlock(idx);
 		schedule();
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 85a5ed70b5b2..b808177af816 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -91,12 +91,12 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
 static inline void ftrace_disable_cpu(void)
 {
 	preempt_disable();
-	__this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
+	__this_cpu_inc(ftrace_cpu_disabled);
 }
 
 static inline void ftrace_enable_cpu(void)
 {
-	__this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
+	__this_cpu_dec(ftrace_cpu_disabled);
 	preempt_enable();
 }
 
@@ -1085,7 +1085,7 @@ trace_function(struct trace_array *tr,
 	struct ftrace_entry *entry;
 
 	/* If we are reading the ring buffer, don't trace */
-	if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
 		return;
 
 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 90a6daa10962..8614e3241ff8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -176,7 +176,7 @@ static int __trace_graph_entry(struct trace_array *tr,
 	struct ring_buffer *buffer = tr->buffer;
 	struct ftrace_graph_ent_entry *entry;
 
-	if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
 		return 0;
 
 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -240,7 +240,7 @@ static void __trace_graph_return(struct trace_array *tr,
 	struct ring_buffer *buffer = tr->buffer;
 	struct ftrace_graph_ret_entry *entry;
 
-	if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
 		return;
 
 	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
-- 
cgit v1.2.3


From f7b64fe806029e0a0454df132eec3c5ab576102c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 29 Oct 2009 22:34:15 +0900
Subject: percpu: make access macros universal

Now that per_cpu__ prefix is gone, there's no distinction between
static and dynamic percpu variables.  Make get_cpu_var() take dynamic
percpu variables and ensure that all macros have parentheses around
the parameter evaluation and evaluate the variable parameter only once
such that any expression which evaluates to percpu address can be used
safely.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e12410e55e05..f965f833a643 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -27,10 +27,13 @@
  * we force a syntax error here if it isn't.
  */
 #define get_cpu_var(var) (*({				\
-	extern int simple_identifier_##var(void);	\
 	preempt_disable();				\
 	&__get_cpu_var(var); }))
-#define put_cpu_var(var) preempt_enable()
+
+#define put_cpu_var(var) do {				\
+	(void)(var);					\
+	preempt_enable();				\
+} while (0)
 
 #ifdef CONFIG_SMP
 
@@ -182,17 +185,19 @@ static inline void *pcpu_lpage_remapped(void *kaddr)
 #ifndef percpu_read
 # define percpu_read(var)						\
   ({									\
-	typeof(var) __tmp_var__;					\
-	__tmp_var__ = get_cpu_var(var);					\
-	put_cpu_var(var);						\
-	__tmp_var__;							\
+	typeof(var) *pr_ptr__ = &(var);					\
+	typeof(var) pr_ret__;						\
+	pr_ret__ = get_cpu_var(*pr_ptr__);				\
+	put_cpu_var(*pr_ptr__);						\
+	pr_ret__;							\
   })
 #endif
 
 #define __percpu_generic_to_op(var, val, op)				\
 do {									\
-	get_cpu_var(var) op val;					\
-	put_cpu_var(var);						\
+	typeof(var) *pgto_ptr__ = &(var);				\
+	get_cpu_var(*pgto_ptr__) op val;				\
+	put_cpu_var(*pgto_ptr__);					\
 } while (0)
 
 #ifndef percpu_write
@@ -304,7 +309,7 @@ do {									\
 #define _this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
 	preempt_disable();						\
-	*__this_cpu_ptr(&pcp) op val;					\
+	*__this_cpu_ptr(&(pcp)) op val;					\
 	preempt_enable();						\
 } while (0)
 
-- 
cgit v1.2.3


From e0fdb0e050eae331046385643618f12452aa7e73 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 29 Oct 2009 22:34:15 +0900
Subject: percpu: add __percpu for sparse.

We have to make __kernel "__attribute__((address_space(0)))" so we can
cast to it.

tj: * put_cpu_var() update.

    * Annotations added to dynamic allocator interface.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/percpu.h |  4 +++-
 include/linux/compiler.h     |  4 +++-
 include/linux/percpu-defs.h  |  2 +-
 include/linux/percpu.h       | 18 +++++++++++-------
 4 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index ca6f0491412b..fded453fd25c 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -41,7 +41,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
  * Only S390 provides its own means of moving the pointer.
  */
 #ifndef SHIFT_PERCPU_PTR
-#define SHIFT_PERCPU_PTR(__p, __offset)	RELOC_HIDE((__p), (__offset))
+/* Weird cast keeps both GCC and sparse happy. */
+#define SHIFT_PERCPU_PTR(__p, __offset)				\
+	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
 #endif
 
 /*
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 04fb5135b4e1..abba8045c6ef 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -5,7 +5,7 @@
 
 #ifdef __CHECKER__
 # define __user		__attribute__((noderef, address_space(1)))
-# define __kernel	/* default address space */
+# define __kernel	__attribute__((address_space(0)))
 # define __safe		__attribute__((safe))
 # define __force	__attribute__((force))
 # define __nocast	__attribute__((nocast))
@@ -15,6 +15,7 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu	__attribute__((noderef, address_space(3)))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __acquire(x) (void)0
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
+# define __percpu
 #endif
 
 #ifdef __KERNEL__
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index ee99f6c2cdcd..0fa0cb524250 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -12,7 +12,7 @@
  * that section.
  */
 #define __PCPU_ATTRS(sec)						\
-	__attribute__((section(PER_CPU_BASE_SECTION sec)))		\
+	__percpu __attribute__((section(PER_CPU_BASE_SECTION sec)))	\
 	PER_CPU_ATTRIBUTES
 
 #define __PCPU_DUMMY_ATTRS						\
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index f965f833a643..2c0d31a3f6b6 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -30,8 +30,12 @@
 	preempt_disable();				\
 	&__get_cpu_var(var); }))
 
+/*
+ * The weird & is necessary because sparse considers (void)(var) to be
+ * a direct dereference of percpu variable (var).
+ */
 #define put_cpu_var(var) do {				\
-	(void)(var);					\
+	(void)&(var);					\
 	preempt_enable();				\
 } while (0)
 
@@ -130,9 +134,9 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
  */
 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
 
-extern void *__alloc_reserved_percpu(size_t size, size_t align);
-extern void *__alloc_percpu(size_t size, size_t align);
-extern void free_percpu(void *__pdata);
+extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
+extern void __percpu *__alloc_percpu(size_t size, size_t align);
+extern void free_percpu(void __percpu *__pdata);
 
 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void __init setup_per_cpu_areas(void);
@@ -142,7 +146,7 @@ extern void __init setup_per_cpu_areas(void);
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
-static inline void *__alloc_percpu(size_t size, size_t align)
+static inline void __percpu *__alloc_percpu(size_t size, size_t align)
 {
 	/*
 	 * Can't easily make larger alignment work with kmalloc.  WARN
@@ -153,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align)
 	return kzalloc(size, GFP_KERNEL);
 }
 
-static inline void free_percpu(void *p)
+static inline void free_percpu(void __percpu *p)
 {
 	kfree(p);
 }
@@ -168,7 +172,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr)
 #endif /* CONFIG_SMP */
 
 #define alloc_percpu(type)	\
-	(typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type))
+	(typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
 
 /*
  * Optional methods for optimized non-lvalue per-cpu variable access.
-- 
cgit v1.2.3


From 545695fb41da117928ab946067a42d9e15fd009d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 29 Oct 2009 22:34:15 +0900
Subject: percpu: make accessors check for percpu pointer in sparse

The previous patch made sparse warn about percpu variables being used
directly without going through percpu accessors.  This patch
implements the other half - checking whether non percpu variable is
passed into percpu accessors.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 include/asm-generic/percpu.h |  6 ++++--
 include/linux/percpu-defs.h  | 20 ++++++++++++++++++--
 include/linux/percpu.h       |  2 ++
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index fded453fd25c..04f91c2d3f7b 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -42,8 +42,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
  */
 #ifndef SHIFT_PERCPU_PTR
 /* Weird cast keeps both GCC and sparse happy. */
-#define SHIFT_PERCPU_PTR(__p, __offset)				\
-	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
+#define SHIFT_PERCPU_PTR(__p, __offset)	({				\
+	__verify_pcpu_ptr((__p));					\
+	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
+})
 #endif
 
 /*
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 0fa0cb524250..1fa36eb54b6a 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -18,6 +18,16 @@
 #define __PCPU_DUMMY_ATTRS						\
 	__attribute__((section(".discard"), unused))
 
+/*
+ * Macro which verifies @ptr is a percpu pointer without evaluating
+ * @ptr.  This is to be used in percpu accessors to verify that the
+ * input parameter is a percpu pointer.
+ */
+#define __verify_pcpu_ptr(ptr)	do {					\
+	void __percpu *__vpp_verify = (typeof(ptr))NULL;		\
+	(void)__vpp_verify;						\
+} while (0)
+
 /*
  * s390 and alpha modules require percpu variables to be defined as
  * weak to force the compiler to generate GOT based external
@@ -129,10 +139,16 @@
 	__aligned(PAGE_SIZE)
 
 /*
- * Intermodule exports for per-CPU variables.
+ * Intermodule exports for per-CPU variables.  sparse forgets about
+ * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
+ * noop if __CHECKER__.
  */
+#ifndef __CHECKER__
 #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var)
 #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var)
-
+#else
+#define EXPORT_PER_CPU_SYMBOL(var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var)
+#endif
 
 #endif /* _LINUX_PERCPU_DEFS_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 2c0d31a3f6b6..42878f0cd0e2 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -237,6 +237,7 @@ extern void __bad_size_call_parameter(void);
 
 #define __pcpu_size_call_return(stem, variable)				\
 ({	typeof(variable) pscr_ret__;					\
+	__verify_pcpu_ptr(&(variable));					\
 	switch(sizeof(variable)) {					\
 	case 1: pscr_ret__ = stem##1(variable);break;			\
 	case 2: pscr_ret__ = stem##2(variable);break;			\
@@ -250,6 +251,7 @@ extern void __bad_size_call_parameter(void);
 
 #define __pcpu_size_call(stem, variable, ...)				\
 do {									\
+	__verify_pcpu_ptr(&(variable));					\
 	switch(sizeof(variable)) {					\
 		case 1: stem##1(variable, __VA_ARGS__);break;		\
 		case 2: stem##2(variable, __VA_ARGS__);break;		\
-- 
cgit v1.2.3


From 8c10cbdb4af642d9a2efb45ea89251aaab905360 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 19 Oct 2009 12:04:53 +0200
Subject: nfsd: use STATEID_FMT and STATEID_VAL for printing stateids

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 46 ++++++++++++++++------------------------------
 include/linux/nfsd/state.h |  7 +++++++
 2 files changed, 23 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 42dab9587afe..c8b621a120cd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2416,11 +2416,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 
 	memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
 
-	dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
-	             dp->dl_stateid.si_boot,
-	             dp->dl_stateid.si_stateownerid,
-	             dp->dl_stateid.si_fileid,
-	             dp->dl_stateid.si_generation);
+	dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
+		STATEID_VAL(&dp->dl_stateid));
 out:
 	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
 			&& flag == NFS4_OPEN_DELEGATE_NONE
@@ -2510,9 +2507,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 
 	status = nfs_ok;
 
-	dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
-	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
-	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
+	dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
+		STATEID_VAL(&stp->st_stateid));
 out:
 	if (fp)
 		put_nfs4_file(fp);
@@ -2678,9 +2674,8 @@ STALE_STATEID(stateid_t *stateid)
 {
 	if (time_after((unsigned long)boot_time,
 			(unsigned long)stateid->si_boot)) {
-		dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
-			stateid->si_boot, stateid->si_stateownerid,
-			stateid->si_fileid, stateid->si_generation);
+		dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+			STATEID_VAL(stateid));
 		return 1;
 	}
 	return 0;
@@ -2692,9 +2687,8 @@ EXPIRED_STATEID(stateid_t *stateid)
 	if (time_before((unsigned long)boot_time,
 			((unsigned long)stateid->si_boot)) &&
 	    time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
-		dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n",
-			stateid->si_boot, stateid->si_stateownerid,
-			stateid->si_fileid, stateid->si_generation);
+		dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
+			STATEID_VAL(stateid));
 		return 1;
 	}
 	return 0;
@@ -2708,9 +2702,8 @@ stateid_error_map(stateid_t *stateid)
 	if (EXPIRED_STATEID(stateid))
 		return nfserr_expired;
 
-	dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n",
-		stateid->si_boot, stateid->si_stateownerid,
-		stateid->si_fileid, stateid->si_generation);
+	dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
+		STATEID_VAL(stateid));
 	return nfserr_bad_stateid;
 }
 
@@ -2896,10 +2889,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
 	struct svc_fh *current_fh = &cstate->current_fh;
 	__be32 status;
 
-	dprintk("NFSD: preprocess_seqid_op: seqid=%d " 
-			"stateid = (%08x/%08x/%08x/%08x)\n", seqid,
-		stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
-		stateid->si_generation);
+	dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
+		seqid, STATEID_VAL(stateid));
 
 	*stpp = NULL;
 	*sopp = NULL;
@@ -3031,12 +3022,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	sop->so_confirmed = 1;
 	update_stateid(&stp->st_stateid);
 	memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
-	dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " 
-		"stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
-		         stp->st_stateid.si_boot,
-		         stp->st_stateid.si_stateownerid,
-		         stp->st_stateid.si_fileid,
-		         stp->st_stateid.si_generation);
+	dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
+		__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid));
 
 	nfsd4_create_clid_dir(sop->so_client);
 out:
@@ -3295,9 +3282,8 @@ find_delegation_stateid(struct inode *ino, stateid_t *stid)
 	struct nfs4_file *fp;
 	struct nfs4_delegation *dl;
 
-	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
-                    stid->si_boot, stid->si_stateownerid,
-                    stid->si_fileid, stid->si_generation);
+	dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
+		STATEID_VAL(stid));
 
 	fp = find_file(ino);
 	if (!fp)
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index b38d11324189..5aadf8aa3a97 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -60,6 +60,13 @@ typedef struct {
 #define si_stateownerid   si_opaque.so_stateownerid
 #define si_fileid         si_opaque.so_fileid
 
+#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
+#define STATEID_VAL(s) \
+	(s)->si_boot, \
+	(s)->si_stateownerid, \
+	(s)->si_fileid, \
+	(s)->si_generation
+
 struct nfsd4_cb_sequence {
 	/* args/res */
 	u32			cbs_minorversion;
-- 
cgit v1.2.3


From 417608c20a4c8397bc5307d949ec01ea0a0dd8e5 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Thu, 12 Nov 2009 11:19:44 -0800
Subject: IB/mlx4: Remove limitation on LSO header size

Current code has a limitation: an LSO header is not allowed to cross a
64 byte boundary.  This patch removes this limitation by setting the
WQE RR for large headers thus allowing LSO headers of any size.  The
extra buffer reserved for MLX4_IB_QP_LSO QPs has been doubled, from 64
to 128 bytes, assuming this is reasonable upper limit for header
length.  Also, this patch will cause IB_DEVICE_UD_TSO to be set only
for HCA FW versions that set MLX4_DEV_CAP_FLAG_BLH; e.g. FW version
2.6.000 and higher.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/main.c |  2 +-
 drivers/infiniband/hw/mlx4/qp.c   | 24 ++++++++++++------------
 drivers/net/mlx4/fw.c             |  1 +
 include/linux/mlx4/device.h       |  1 +
 4 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3cb3f47a10b8..e596537ff353 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -103,7 +103,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
 		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
-	if (dev->dev->caps.max_gso_sz)
+	if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
 		props->device_cap_flags |= IB_DEVICE_UD_TSO;
 	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
 		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 518d561970aa..847030c89a8d 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -54,7 +54,8 @@ enum {
 	/*
 	 * Largest possible UD header: send with GRH and immediate data.
 	 */
-	MLX4_IB_UD_HEADER_SIZE		= 72
+	MLX4_IB_UD_HEADER_SIZE		= 72,
+	MLX4_IB_LSO_HEADER_SPARE	= 128,
 };
 
 struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@ struct mlx4_ib_sqp {
 };
 
 enum {
-	MLX4_IB_MIN_SQ_STRIDE = 6
+	MLX4_IB_MIN_SQ_STRIDE	= 6,
+	MLX4_IB_CACHE_LINE_SIZE	= 64,
 };
 
 static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
 	case IB_QPT_UD:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_datagram_seg) +
-			((flags & MLX4_IB_QP_LSO) ? 64 : 0);
+			((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
 	case IB_QPT_UC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
@@ -1466,16 +1468,12 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 
 static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
-			 __be32 *lso_hdr_sz)
+			 __be32 *lso_hdr_sz, __be32 *blh)
 {
 	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
 
-	/*
-	 * This is a temporary limitation and will be removed in
-	 * a forthcoming FW release:
-	 */
-	if (unlikely(halign > 64))
-		return -EINVAL;
+	if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+		*blh = cpu_to_be32(1 << 6);
 
 	if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
 		     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1521,6 +1519,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	__be32 dummy;
 	__be32 *lso_wqe;
 	__be32 uninitialized_var(lso_hdr_sz);
+	__be32 blh;
 	int i;
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1529,6 +1528,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		lso_wqe = &dummy;
+		blh = 0;
 
 		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
 			err = -ENOMEM;
@@ -1615,7 +1615,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
 			if (wr->opcode == IB_WR_LSO) {
-				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
+				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
 				if (unlikely(err)) {
 					*bad_wr = wr;
 					goto out;
@@ -1686,7 +1686,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		}
 
 		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
-			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
 
 		stamp = ind + qp->sq_spare_wqes;
 		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 3c16602172fc..7194be3a2894 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -90,6 +90,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
 		[ 9] = "Q_Key violation counter",
 		[10] = "VMM",
 		[12] = "DPDP",
+		[15] = "Big LSO headers",
 		[16] = "MW support",
 		[17] = "APM support",
 		[18] = "Atomic ops support",
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ce7cc6c7bcbb..e92d1bfdb330 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -61,6 +61,7 @@ enum {
 	MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1 <<  8,
 	MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1 <<  9,
 	MLX4_DEV_CAP_FLAG_DPDP		= 1 << 12,
+	MLX4_DEV_CAP_FLAG_BLH		= 1 << 15,
 	MLX4_DEV_CAP_FLAG_MEM_WINDOW	= 1 << 16,
 	MLX4_DEV_CAP_FLAG_APM		= 1 << 17,
 	MLX4_DEV_CAP_FLAG_ATOMIC	= 1 << 18,
-- 
cgit v1.2.3


From 0a3adadee42f2865bb867b8c5f4955b7def9baad Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 4 Nov 2009 18:12:35 -0500
Subject: nfsd: make fs/nfsd/vfs.h for common includes

None of this stuff is used outside nfsd, so move it out of the common
linux include directory.

Actually, probably none of the stuff in include/linux/nfsd/nfsd.h really
belongs there, so later we may remove that file entirely.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/lockd.c           |  1 +
 fs/nfsd/nfs2acl.c         |  1 +
 fs/nfsd/nfs3acl.c         |  1 +
 fs/nfsd/nfs3proc.c        |  1 +
 fs/nfsd/nfs4proc.c        |  1 +
 fs/nfsd/nfs4recover.c     |  1 +
 fs/nfsd/nfs4state.c       |  1 +
 fs/nfsd/nfs4xdr.c         |  1 +
 fs/nfsd/nfsfh.c           |  1 +
 fs/nfsd/nfsproc.c         |  1 +
 fs/nfsd/nfssvc.c          |  1 +
 fs/nfsd/vfs.c             |  1 +
 fs/nfsd/vfs.h             | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/nfsd.h | 87 +----------------------------------------
 14 files changed, 111 insertions(+), 86 deletions(-)
 create mode 100644 fs/nfsd/vfs.h

(limited to 'include/linux')

diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index b2786a5f9afe..812bc64874f6 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -16,6 +16,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/lockd/bind.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index e2a17f0a96a7..38c883d48b02 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -14,6 +14,7 @@
 #include <linux/nfsd/xdr3.h>
 #include <linux/posix_acl.h>
 #include <linux/nfsacl.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index ff73596eb550..526d85a65f76 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -13,6 +13,7 @@
 #include <linux/nfsd/xdr3.h>
 #include <linux/posix_acl.h>
 #include <linux/nfsacl.h>
+#include "vfs.h"
 
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
 
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c418a922..1a259d313e14 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -25,6 +25,7 @@
 #include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr3.h>
 #include <linux/nfs3.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2e1b0a..60a93cdefef5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -48,6 +48,7 @@
 #include <linux/nfsd/xdr4.h>
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/gss_api.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b5348405046b..c7a6b245c7ad 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -47,6 +47,7 @@
 #include <linux/crypto.h>
 #include <linux/sched.h>
 #include <linux/mount.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c8b621a120cd..850960e5d626 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -56,6 +56,7 @@
 #include <linux/module.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/clnt.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0fbd50cee1f6..db0fc55670b3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -57,6 +57,7 @@
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/svcauth_gss.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 01965b2f3a76..d0d8a217a3ea 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -22,6 +22,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/nfsd/nfsd.h>
+#include "vfs.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_FH
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c5393d1b8955..6c967e1ba37b 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -24,6 +24,7 @@
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
+#include "vfs.h"
 
 typedef struct svc_rqst	svc_rqst;
 typedef struct svc_buf	svc_buf;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83eedd43..2944b31dcbe6 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -35,6 +35,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
 #include <linux/seq_file.h>
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 638573968dcf..a7038ede671a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -56,6 +56,7 @@
 #endif /* CONFIG_NFSD_V4 */
 #include <linux/jhash.h>
 #include <linux/ima.h>
+#include "vfs.h"
 
 #include <asm/uaccess.h>
 
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
new file mode 100644
index 000000000000..b8011fd2fcab
--- /dev/null
+++ b/fs/nfsd/vfs.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_VFS_H
+#define LINUX_NFSD_VFS_H
+
+/*
+ * Flags for nfsd_permission
+ */
+#define NFSD_MAY_NOP		0
+#define NFSD_MAY_EXEC		1 /* == MAY_EXEC */
+#define NFSD_MAY_WRITE		2 /* == MAY_WRITE */
+#define NFSD_MAY_READ		4 /* == MAY_READ */
+#define NFSD_MAY_SATTR		8
+#define NFSD_MAY_TRUNC		16
+#define NFSD_MAY_LOCK		32
+#define NFSD_MAY_OWNER_OVERRIDE	64
+#define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
+#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
+
+#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
+#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
+
+/*
+ * Callback function for readdir
+ */
+typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+
+/* nfsd/vfs.c */
+int		fh_lock_parent(struct svc_fh *, struct dentry *);
+int		nfsd_racache_init(int);
+void		nfsd_racache_shutdown(void);
+int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+		                struct svc_export **expp);
+__be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+				const char *, unsigned int, struct svc_fh *);
+__be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
+				const char *, unsigned int,
+				struct svc_export **, struct dentry **);
+__be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+				struct iattr *, int, time_t);
+#ifdef CONFIG_NFSD_V4
+__be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
+                    struct nfs4_acl *);
+int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
+#endif /* CONFIG_NFSD_V4 */
+__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, struct iattr *attrs,
+				int type, dev_t rdev, struct svc_fh *res);
+#ifdef CONFIG_NFSD_V3
+__be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+__be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, struct iattr *attrs,
+				struct svc_fh *res, int createmode,
+				u32 *verifier, int *truncp, int *created);
+__be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
+				loff_t, unsigned long);
+#endif /* CONFIG_NFSD_V3 */
+__be32		nfsd_open(struct svc_rqst *, struct svc_fh *, int,
+				int, struct file **);
+void		nfsd_close(struct file *);
+__be32 		nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
+				loff_t, struct kvec *, int, unsigned long *);
+__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
+				loff_t, struct kvec *,int, unsigned long *, int *);
+__be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
+				char *, int *);
+__be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
+				char *name, int len, char *path, int plen,
+				struct svc_fh *res, struct iattr *);
+__be32		nfsd_link(struct svc_rqst *, struct svc_fh *,
+				char *, int, struct svc_fh *);
+__be32		nfsd_rename(struct svc_rqst *,
+				struct svc_fh *, char *, int,
+				struct svc_fh *, char *, int);
+__be32		nfsd_remove(struct svc_rqst *,
+				struct svc_fh *, char *, int);
+__be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
+				char *name, int len);
+int		nfsd_truncate(struct svc_rqst *, struct svc_fh *,
+				unsigned long size);
+__be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
+			     loff_t *, struct readdir_cd *, filldir_t);
+__be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
+				struct kstatfs *, int access);
+
+int		nfsd_notify_change(struct inode *, struct iattr *);
+__be32		nfsd_permission(struct svc_rqst *, struct svc_export *,
+				struct dentry *, int);
+int		nfsd_sync_dir(struct dentry *dp);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
+int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
+#endif
+
+#endif /* LINUX_NFSD_VFS_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 510ffdd5020e..e4518d090a8c 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -25,30 +25,10 @@
  */
 #define NFSD_SUPPORTED_MINOR_VERSION	1
 
-/*
- * Flags for nfsd_permission
- */
-#define NFSD_MAY_NOP		0
-#define NFSD_MAY_EXEC		1 /* == MAY_EXEC */
-#define NFSD_MAY_WRITE		2 /* == MAY_WRITE */
-#define NFSD_MAY_READ		4 /* == MAY_READ */
-#define NFSD_MAY_SATTR		8
-#define NFSD_MAY_TRUNC		16
-#define NFSD_MAY_LOCK		32
-#define NFSD_MAY_OWNER_OVERRIDE	64
-#define NFSD_MAY_LOCAL_ACCESS	128 /* IRIX doing local access check on device special file*/
-#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
-
-#define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
-#define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
-
-/*
- * Callback function for readdir
- */
 struct readdir_cd {
 	__be32			err;	/* 0, nfserr, or nfserr_eof */
 };
-typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+
 
 extern struct svc_program	nfsd_program;
 extern struct svc_version	nfsd_version2, nfsd_version3,
@@ -73,69 +53,6 @@ int		nfsd_nrpools(void);
 int		nfsd_get_nrthreads(int n, int *);
 int		nfsd_set_nrthreads(int n, int *);
 
-/* nfsd/vfs.c */
-int		fh_lock_parent(struct svc_fh *, struct dentry *);
-int		nfsd_racache_init(int);
-void		nfsd_racache_shutdown(void);
-int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
-		                struct svc_export **expp);
-__be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
-				const char *, unsigned int, struct svc_fh *);
-__be32		 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
-				const char *, unsigned int,
-				struct svc_export **, struct dentry **);
-__be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
-				struct iattr *, int, time_t);
-#ifdef CONFIG_NFSD_V4
-__be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
-                    struct nfs4_acl *);
-int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
-#endif /* CONFIG_NFSD_V4 */
-__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, struct iattr *attrs,
-				int type, dev_t rdev, struct svc_fh *res);
-#ifdef CONFIG_NFSD_V3
-__be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
-__be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, struct iattr *attrs,
-				struct svc_fh *res, int createmode,
-				u32 *verifier, int *truncp, int *created);
-__be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
-				loff_t, unsigned long);
-#endif /* CONFIG_NFSD_V3 */
-__be32		nfsd_open(struct svc_rqst *, struct svc_fh *, int,
-				int, struct file **);
-void		nfsd_close(struct file *);
-__be32 		nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
-				loff_t, struct kvec *, int, unsigned long *);
-__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
-				loff_t, struct kvec *,int, unsigned long *, int *);
-__be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
-				char *, int *);
-__be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
-				char *name, int len, char *path, int plen,
-				struct svc_fh *res, struct iattr *);
-__be32		nfsd_link(struct svc_rqst *, struct svc_fh *,
-				char *, int, struct svc_fh *);
-__be32		nfsd_rename(struct svc_rqst *,
-				struct svc_fh *, char *, int,
-				struct svc_fh *, char *, int);
-__be32		nfsd_remove(struct svc_rqst *,
-				struct svc_fh *, char *, int);
-__be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
-				char *name, int len);
-int		nfsd_truncate(struct svc_rqst *, struct svc_fh *,
-				unsigned long size);
-__be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
-			     loff_t *, struct readdir_cd *, filldir_t);
-__be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
-				struct kstatfs *, int access);
-
-int		nfsd_notify_change(struct inode *, struct iattr *);
-__be32		nfsd_permission(struct svc_rqst *, struct svc_export *,
-				struct dentry *, int);
-int		nfsd_sync_dir(struct dentry *dp);
-
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
 extern struct svc_version nfsd_acl_version2;
@@ -147,8 +64,6 @@ extern struct svc_version nfsd_acl_version3;
 #else
 #define nfsd_acl_version3 NULL
 #endif
-struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
-int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
 #endif
 
 enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-- 
cgit v1.2.3


From 5db53f3e80dee2d9dff5e534f9e9fe1db17c9936 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Fri, 20 Nov 2009 20:13:39 +0100
Subject: [LogFS] add new flash file system

This is a new flash file system. See
Documentation/filesystems/logfs.txt

Signed-off-by: Joern Engel <joern@logfs.org>
---
 Documentation/filesystems/00-INDEX  |    2 +
 Documentation/filesystems/logfs.txt |  241 ++++
 fs/Kconfig                          |    1 +
 fs/Makefile                         |    1 +
 fs/logfs/Kconfig                    |   17 +
 fs/logfs/Makefile                   |   13 +
 fs/logfs/compr.c                    |   95 ++
 fs/logfs/dev_bdev.c                 |  263 ++++
 fs/logfs/dev_mtd.c                  |  253 ++++
 fs/logfs/dir.c                      |  818 +++++++++++++
 fs/logfs/file.c                     |  263 ++++
 fs/logfs/gc.c                       |  730 ++++++++++++
 fs/logfs/inode.c                    |  417 +++++++
 fs/logfs/journal.c                  |  879 ++++++++++++++
 fs/logfs/logfs.h                    |  722 +++++++++++
 fs/logfs/logfs_abi.h                |  627 ++++++++++
 fs/logfs/readwrite.c                | 2246 +++++++++++++++++++++++++++++++++++
 fs/logfs/segment.c                  |  924 ++++++++++++++
 fs/logfs/super.c                    |  634 ++++++++++
 include/linux/btree-128.h           |  109 ++
 include/linux/btree-type.h          |  147 +++
 include/linux/btree.h               |  243 ++++
 lib/Kconfig                         |    3 +
 lib/Makefile                        |    1 +
 lib/btree.c                         |  797 +++++++++++++
 25 files changed, 10446 insertions(+)
 create mode 100644 Documentation/filesystems/logfs.txt
 create mode 100644 fs/logfs/Kconfig
 create mode 100644 fs/logfs/Makefile
 create mode 100644 fs/logfs/compr.c
 create mode 100644 fs/logfs/dev_bdev.c
 create mode 100644 fs/logfs/dev_mtd.c
 create mode 100644 fs/logfs/dir.c
 create mode 100644 fs/logfs/file.c
 create mode 100644 fs/logfs/gc.c
 create mode 100644 fs/logfs/inode.c
 create mode 100644 fs/logfs/journal.c
 create mode 100644 fs/logfs/logfs.h
 create mode 100644 fs/logfs/logfs_abi.h
 create mode 100644 fs/logfs/readwrite.c
 create mode 100644 fs/logfs/segment.c
 create mode 100644 fs/logfs/super.c
 create mode 100644 include/linux/btree-128.h
 create mode 100644 include/linux/btree-type.h
 create mode 100644 include/linux/btree.h
 create mode 100644 lib/btree.c

(limited to 'include/linux')

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index f15621ee5599..d362aa543b27 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -62,6 +62,8 @@ jfs.txt
 	- info and mount options for the JFS filesystem.
 locks.txt
 	- info on file locking implementations, flock() vs. fcntl(), etc.
+logfs.txt
+	- info on the LogFS flash filesystem.
 mandatory-locking.txt
 	- info on the Linux implementation of Sys V mandatory file locking.
 ncpfs.txt
diff --git a/Documentation/filesystems/logfs.txt b/Documentation/filesystems/logfs.txt
new file mode 100644
index 000000000000..e64c94ba401a
--- /dev/null
+++ b/Documentation/filesystems/logfs.txt
@@ -0,0 +1,241 @@
+
+The LogFS Flash Filesystem
+==========================
+
+Specification
+=============
+
+Superblocks
+-----------
+
+Two superblocks exist at the beginning and end of the filesystem.
+Each superblock is 256 Bytes large, with another 3840 Bytes reserved
+for future purposes, making a total of 4096 Bytes.
+
+Superblock locations may differ for MTD and block devices.  On MTD the
+first non-bad block contains a superblock in the first 4096 Bytes and
+the last non-bad block contains a superblock in the last 4096 Bytes.
+On block devices, the first 4096 Bytes of the device contain the first
+superblock and the last aligned 4096 Byte-block contains the second
+superblock.
+
+For the most part, the superblocks can be considered read-only.  They
+are written only to correct errors detected within the superblocks,
+move the journal and change the filesystem parameters through tunefs.
+As a result, the superblock does not contain any fields that require
+constant updates, like the amount of free space, etc.
+
+Segments
+--------
+
+The space in the device is split up into equal-sized segments.
+Segments are the primary write unit of LogFS.  Within each segments,
+writes happen from front (low addresses) to back (high addresses.  If
+only a partial segment has been written, the segment number, the
+current position within and optionally a write buffer are stored in
+the journal.
+
+Segments are erased as a whole.  Therefore Garbage Collection may be
+required to completely free a segment before doing so.
+
+Journal
+--------
+
+The journal contains all global information about the filesystem that
+is subject to frequent change.  At mount time, it has to be scanned
+for the most recent commit entry, which contains a list of pointers to
+all currently valid entries.
+
+Object Store
+------------
+
+All space except for the superblocks and journal is part of the object
+store.  Each segment contains a segment header and a number of
+objects, each consisting of the object header and the payload.
+Objects are either inodes, directory entries (dentries), file data
+blocks or indirect blocks.
+
+Levels
+------
+
+Garbage collection (GC) may fail if all data is written
+indiscriminately.  One requirement of GC is that data is seperated
+roughly according to the distance between the tree root and the data.
+Effectively that means all file data is on level 0, indirect blocks
+are on levels 1, 2, 3 4 or 5 for 1x, 2x, 3x, 4x or 5x indirect blocks,
+respectively.  Inode file data is on level 6 for the inodes and 7-11
+for indirect blocks.
+
+Each segment contains objects of a single level only.  As a result,
+each level requires its own seperate segment to be open for writing.
+
+Inode File
+----------
+
+All inodes are stored in a special file, the inode file.  Single
+exception is the inode file's inode (master inode) which for obvious
+reasons is stored in the journal instead.  Instead of data blocks, the
+leaf nodes of the inode files are inodes.
+
+Aliases
+-------
+
+Writes in LogFS are done by means of a wandering tree.  A naïve
+implementation would require that for each write or a block, all
+parent blocks are written as well, since the block pointers have
+changed.  Such an implementation would not be very efficient.
+
+In LogFS, the block pointer changes are cached in the journal by means
+of alias entries.  Each alias consists of its logical address - inode
+number, block index, level and child number (index into block) - and
+the changed data.  Any 8-byte word can be changes in this manner.
+
+Currently aliases are used for block pointers, file size, file used
+bytes and the height of an inodes indirect tree.
+
+Segment Aliases
+---------------
+
+Related to regular aliases, these are used to handle bad blocks.
+Initially, bad blocks are handled by moving the affected segment
+content to a spare segment and noting this move in the journal with a
+segment alias, a simple (to, from) tupel.  GC will later empty this
+segment and the alias can be removed again.  This is used on MTD only.
+
+Vim
+---
+
+By cleverly predicting the life time of data, it is possible to
+seperate long-living data from short-living data and thereby reduce
+the GC overhead later.  Each type of distinc life expectency (vim) can
+have a seperate segment open for writing.  Each (level, vim) tupel can
+be open just once.  If an open segment with unknown vim is encountered
+at mount time, it is closed and ignored henceforth.
+
+Indirect Tree
+-------------
+
+Inodes in LogFS are similar to FFS-style filesystems with direct and
+indirect block pointers.  One difference is that LogFS uses a single
+indirect pointer that can be either a 1x, 2x, etc. indirect pointer.
+A height field in the inode defines the height of the indirect tree
+and thereby the indirection of the pointer.
+
+Another difference is the addressing of indirect blocks.  In LogFS,
+the first 16 pointers in the first indirect block are left empty,
+corresponding to the 16 direct pointers in the inode.  In ext2 (maybe
+others as well) the first pointer in the first indirect block
+corresponds to logical block 12, skipping the 12 direct pointers.
+So where ext2 is using arithmetic to better utilize space, LogFS keeps
+arithmetic simple and uses compression to save space.
+
+Compression
+-----------
+
+Both file data and metadata can be compressed.  Compression for file
+data can be enabled with chattr +c and disabled with chattr -c.  Doing
+so has no effect on existing data, but new data will be stored
+accordingly.  New inodes will inherit the compression flag of the
+parent directory.
+
+Metadata is always compressed.  However, the space accounting ignores
+this and charges for the uncompressed size.  Failing to do so could
+result in GC failures when, after moving some data, indirect blocks
+compress worse than previously.  Even on a 100% full medium, GC may
+not consume any extra space, so the compression gains are lost space
+to the user.
+
+However, they are not lost space to the filesystem internals.  By
+cheating the user for those bytes, the filesystem gained some slack
+space and GC will run less often and faster.
+
+Garbage Collection and Wear Leveling
+------------------------------------
+
+Garbage collection is invoked whenever the number of free segments
+falls below a threshold.  The best (known) candidate is picked based
+on the least amount of valid data contained in the segment.  All
+remaining valid data is copied elsewhere, thereby invalidating it.
+
+The GC code also checks for aliases and writes then back if their
+number gets too large.
+
+Wear leveling is done by occasionally picking a suboptimal segment for
+garbage collection.  If a stale segments erase count is significantly
+lower than the active segments' erase counts, it will be picked.  Wear
+leveling is rate limited, so it will never monopolize the device for
+more than one segment worth at a time.
+
+Values for "occasionally", "significantly lower" are compile time
+constants.
+
+Hashed directories
+------------------
+
+To satisfy efficient lookup(), directory entries are hashed and
+located based on the hash.  In order to both support large directories
+and not be overly inefficient for small directories, several hash
+tables of increasing size are used.  For each table, the hash value
+modulo the table size gives the table index.
+
+Tables sizes are chosen to limit the number of indirect blocks with a
+fully populated table to 0, 1, 2 or 3 respectively.  So the first
+table contains 16 entries, the second 512-16, etc.
+
+The last table is special in several ways.  First its size depends on
+the effective 32bit limit on telldir/seekdir cookies.  Since logfs
+uses the upper half of the address space for indirect blocks, the size
+is limited to 2^31.  Secondly the table contains hash buckets with 16
+entries each.
+
+Using single-entry buckets would result in birthday "attacks".  At
+just 2^16 used entries, hash collisions would be likely (P >= 0.5).
+My math skills are insufficient to do the combinatorics for the 17x
+collisions necessary to overflow a bucket, but testing showed that in
+10,000 runs the lowest directory fill before a bucket overflow was
+188,057,130 entries with an average of 315,149,915 entries.  So for
+directory sizes of up to a million, bucket overflows should be
+virtually impossible under normal circumstances.
+
+With carefully chosen filenames, it is obviously possible to cause an
+overflow with just 21 entries (4 higher tables + 16 entries + 1).  So
+there may be a security concern if a malicious user has write access
+to a directory.
+
+Open For Discussion
+===================
+
+Device Address Space
+--------------------
+
+A device address space is used for caching.  Both block devices and
+MTD provide functions to either read a single page or write a segment.
+Partial segments may be written for data integrity, but where possible
+complete segments are written for performance on simple block device
+flash media.
+
+Meta Inodes
+-----------
+
+Inodes are stored in the inode file, which is just a regular file for
+most purposes.  At umount time, however, the inode file needs to
+remain open until all dirty inodes are written.  So
+generic_shutdown_super() may not close this inode, but shouldn't
+complain about remaining inodes due to the inode file either.  Same
+goes for mapping inode of the device address space.
+
+Currently logfs uses a hack that essentially copies part of fs/inode.c
+code over.  A general solution would be preferred.
+
+Indirect block mapping
+----------------------
+
+With compression, the block device (or mapping inode) cannot be used
+to cache indirect blocks.  Some other place is required.  Currently
+logfs uses the top half of each inode's address space.  The low 8TB
+(on 32bit) are filled with file data, the high 8TB are used for
+indirect blocks.
+
+One problem is that 16TB files created on 64bit systems actually have
+data in the top 8TB.  But files >16TB would cause problems anyway, so
+only the limit has changed.
diff --git a/fs/Kconfig b/fs/Kconfig
index 64d44efad7a5..7405f071be67 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -177,6 +177,7 @@ source "fs/efs/Kconfig"
 source "fs/jffs2/Kconfig"
 # UBIFS File system configuration
 source "fs/ubifs/Kconfig"
+source "fs/logfs/Kconfig"
 source "fs/cramfs/Kconfig"
 source "fs/squashfs/Kconfig"
 source "fs/freevxfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index af6d04700d9c..c3633aa46911 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_NTFS_FS)		+= ntfs/
 obj-$(CONFIG_UFS_FS)		+= ufs/
 obj-$(CONFIG_EFS_FS)		+= efs/
 obj-$(CONFIG_JFFS2_FS)		+= jffs2/
+obj-$(CONFIG_LOGFS)		+= logfs/
 obj-$(CONFIG_UBIFS_FS)		+= ubifs/
 obj-$(CONFIG_AFFS_FS)		+= affs/
 obj-$(CONFIG_ROMFS_FS)		+= romfs/
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig
new file mode 100644
index 000000000000..daf9a9b32dd3
--- /dev/null
+++ b/fs/logfs/Kconfig
@@ -0,0 +1,17 @@
+config LOGFS
+	tristate "LogFS file system (EXPERIMENTAL)"
+	depends on (MTD || BLOCK) && EXPERIMENTAL
+	select ZLIB_INFLATE
+	select ZLIB_DEFLATE
+	select CRC32
+	select BTREE
+	help
+	  Flash filesystem aimed to scale efficiently to large devices.
+	  In comparison to JFFS2 it offers significantly faster mount
+	  times and potentially less RAM usage, although the latter has
+	  not been measured yet.
+
+	  In its current state it is still very experimental and should
+	  not be used for other than testing purposes.
+
+	  If unsure, say N.
diff --git a/fs/logfs/Makefile b/fs/logfs/Makefile
new file mode 100644
index 000000000000..4820027787ee
--- /dev/null
+++ b/fs/logfs/Makefile
@@ -0,0 +1,13 @@
+obj-$(CONFIG_LOGFS)	+= logfs.o
+
+logfs-y	+= compr.o
+logfs-y	+= dir.o
+logfs-y	+= file.o
+logfs-y	+= gc.o
+logfs-y	+= inode.o
+logfs-y	+= journal.o
+logfs-y	+= readwrite.o
+logfs-y	+= segment.o
+logfs-y	+= super.o
+logfs-$(CONFIG_BLOCK)	+= dev_bdev.o
+logfs-$(CONFIG_MTD)	+= dev_mtd.o
diff --git a/fs/logfs/compr.c b/fs/logfs/compr.c
new file mode 100644
index 000000000000..44bbfd249abc
--- /dev/null
+++ b/fs/logfs/compr.c
@@ -0,0 +1,95 @@
+/*
+ * fs/logfs/compr.c	- compression routines
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/vmalloc.h>
+#include <linux/zlib.h>
+
+#define COMPR_LEVEL 3
+
+static DEFINE_MUTEX(compr_mutex);
+static struct z_stream_s stream;
+
+int logfs_compress(void *in, void *out, size_t inlen, size_t outlen)
+{
+	int err, ret;
+
+	ret = -EIO;
+	mutex_lock(&compr_mutex);
+	err = zlib_deflateInit(&stream, COMPR_LEVEL);
+	if (err != Z_OK)
+		goto error;
+
+	stream.next_in = in;
+	stream.avail_in = inlen;
+	stream.total_in = 0;
+	stream.next_out = out;
+	stream.avail_out = outlen;
+	stream.total_out = 0;
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END)
+		goto error;
+
+	err = zlib_deflateEnd(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	if (stream.total_out >= stream.total_in)
+		goto error;
+
+	ret = stream.total_out;
+error:
+	mutex_unlock(&compr_mutex);
+	return ret;
+}
+
+int logfs_uncompress(void *in, void *out, size_t inlen, size_t outlen)
+{
+	int err, ret;
+
+	ret = -EIO;
+	mutex_lock(&compr_mutex);
+	err = zlib_inflateInit(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	stream.next_in = in;
+	stream.avail_in = inlen;
+	stream.total_in = 0;
+	stream.next_out = out;
+	stream.avail_out = outlen;
+	stream.total_out = 0;
+
+	err = zlib_inflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END)
+		goto error;
+
+	err = zlib_inflateEnd(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	ret = 0;
+error:
+	mutex_unlock(&compr_mutex);
+	return ret;
+}
+
+int __init logfs_compr_init(void)
+{
+	size_t size = max(zlib_deflate_workspacesize(),
+			zlib_inflate_workspacesize());
+	stream.workspace = vmalloc(size);
+	if (!stream.workspace)
+		return -ENOMEM;
+	return 0;
+}
+
+void logfs_compr_exit(void)
+{
+	vfree(stream.workspace);
+}
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
new file mode 100644
index 000000000000..58a057b6e1af
--- /dev/null
+++ b/fs/logfs/dev_bdev.c
@@ -0,0 +1,263 @@
+/*
+ * fs/logfs/dev_bdev.c	- Device access methods for block devices
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+
+#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
+
+static void request_complete(struct bio *bio, int err)
+{
+	complete((struct completion *)bio->bi_private);
+}
+
+static int sync_request(struct page *page, struct block_device *bdev, int rw)
+{
+	struct bio bio;
+	struct bio_vec bio_vec;
+	struct completion complete;
+
+	bio_init(&bio);
+	bio.bi_io_vec = &bio_vec;
+	bio_vec.bv_page = page;
+	bio_vec.bv_len = PAGE_SIZE;
+	bio_vec.bv_offset = 0;
+	bio.bi_vcnt = 1;
+	bio.bi_idx = 0;
+	bio.bi_size = PAGE_SIZE;
+	bio.bi_bdev = bdev;
+	bio.bi_sector = page->index * (PAGE_SIZE >> 9);
+	init_completion(&complete);
+	bio.bi_private = &complete;
+	bio.bi_end_io = request_complete;
+
+	submit_bio(rw, &bio);
+	generic_unplug_device(bdev_get_queue(bdev));
+	wait_for_completion(&complete);
+	return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
+}
+
+static int bdev_readpage(void *_sb, struct page *page)
+{
+	struct super_block *sb = _sb;
+	struct block_device *bdev = logfs_super(sb)->s_bdev;
+	int err;
+
+	err = sync_request(page, bdev, READ);
+	if (err) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		ClearPageError(page);
+	}
+	unlock_page(page);
+	return err;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(wq);
+
+static void writeseg_end_io(struct bio *bio, int err)
+{
+	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct super_block *sb = bio->bi_private;
+	struct logfs_super *super = logfs_super(sb);
+	struct page *page;
+
+	BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
+	BUG_ON(err);
+	BUG_ON(bio->bi_vcnt == 0);
+	do {
+		page = bvec->bv_page;
+		if (--bvec >= bio->bi_io_vec)
+			prefetchw(&bvec->bv_page->flags);
+
+		end_page_writeback(page);
+	} while (bvec >= bio->bi_io_vec);
+	bio_put(bio);
+	if (atomic_dec_and_test(&super->s_pending_writes))
+		wake_up(&wq);
+}
+
+static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
+		size_t nr_pages)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct bio *bio;
+	struct page *page;
+	struct request_queue *q = bdev_get_queue(sb->s_bdev);
+	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
+	int i;
+
+	bio = bio_alloc(GFP_NOFS, max_pages);
+	BUG_ON(!bio); /* FIXME: handle this */
+
+	for (i = 0; i < nr_pages; i++) {
+		if (i >= max_pages) {
+			/* Block layer cannot split bios :( */
+			bio->bi_vcnt = i;
+			bio->bi_idx = 0;
+			bio->bi_size = i * PAGE_SIZE;
+			bio->bi_bdev = super->s_bdev;
+			bio->bi_sector = ofs >> 9;
+			bio->bi_private = sb;
+			bio->bi_end_io = writeseg_end_io;
+			atomic_inc(&super->s_pending_writes);
+			submit_bio(WRITE, bio);
+
+			ofs += i * PAGE_SIZE;
+			index += i;
+			nr_pages -= i;
+			i = 0;
+
+			bio = bio_alloc(GFP_NOFS, max_pages);
+			BUG_ON(!bio);
+		}
+		page = find_lock_page(mapping, index + i);
+		BUG_ON(!page);
+		bio->bi_io_vec[i].bv_page = page;
+		bio->bi_io_vec[i].bv_len = PAGE_SIZE;
+		bio->bi_io_vec[i].bv_offset = 0;
+
+		BUG_ON(PageWriteback(page));
+		set_page_writeback(page);
+		unlock_page(page);
+	}
+	bio->bi_vcnt = nr_pages;
+	bio->bi_idx = 0;
+	bio->bi_size = nr_pages * PAGE_SIZE;
+	bio->bi_bdev = super->s_bdev;
+	bio->bi_sector = ofs >> 9;
+	bio->bi_private = sb;
+	bio->bi_end_io = writeseg_end_io;
+	atomic_inc(&super->s_pending_writes);
+	submit_bio(WRITE, bio);
+	return 0;
+}
+
+static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int head;
+
+	BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO);
+
+	if (len == 0) {
+		/* This can happen when the object fit perfectly into a
+		 * segment, the segment gets written per sync and subsequently
+		 * closed.
+		 */
+		return;
+	}
+	head = ofs & (PAGE_SIZE - 1);
+	if (head) {
+		ofs -= head;
+		len += head;
+	}
+	len = PAGE_ALIGN(len);
+	__bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
+	generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
+}
+
+static int bdev_erase(struct super_block *sb, loff_t to, size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct page *page;
+	pgoff_t index = to >> PAGE_SHIFT;
+	int i, nr_pages = len >> PAGE_SHIFT;
+
+	BUG_ON(to & (PAGE_SIZE - 1));
+	BUG_ON(len & (PAGE_SIZE - 1));
+
+	if (logfs_super(sb)->s_flags & LOGFS_SB_FLAG_RO)
+		return -EROFS;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = find_get_page(mapping, index + i);
+		if (page) {
+			memset(page_address(page), 0xFF, PAGE_SIZE);
+			page_cache_release(page);
+		}
+	}
+	return 0;
+}
+
+static void bdev_sync(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	wait_event(wq, atomic_read(&super->s_pending_writes) == 0);
+}
+
+static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = bdev_readpage;
+
+	*ofs = 0;
+	return read_cache_page(mapping, 0, filler, sb);
+}
+
+static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = bdev_readpage;
+	u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000;
+	pgoff_t index = pos >> PAGE_SHIFT;
+
+	*ofs = pos;
+	return read_cache_page(mapping, index, filler, sb);
+}
+
+static int bdev_write_sb(struct super_block *sb, struct page *page)
+{
+	struct block_device *bdev = logfs_super(sb)->s_bdev;
+
+	/* Nothing special to do for block devices. */
+	return sync_request(page, bdev, WRITE);
+}
+
+static void bdev_put_device(struct super_block *sb)
+{
+	close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
+}
+
+static const struct logfs_device_ops bd_devops = {
+	.find_first_sb	= bdev_find_first_sb,
+	.find_last_sb	= bdev_find_last_sb,
+	.write_sb	= bdev_write_sb,
+	.readpage	= bdev_readpage,
+	.writeseg	= bdev_writeseg,
+	.erase		= bdev_erase,
+	.sync		= bdev_sync,
+	.put_device	= bdev_put_device,
+};
+
+int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+		const char *devname, struct vfsmount *mnt)
+{
+	struct block_device *bdev;
+
+	bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+
+	if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
+		int mtdnr = MINOR(bdev->bd_dev);
+		close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
+		return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+	}
+
+	return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt);
+}
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
new file mode 100644
index 000000000000..68e99d046c23
--- /dev/null
+++ b/fs/logfs/dev_mtd.c
@@ -0,0 +1,253 @@
+/*
+ * fs/logfs/dev_mtd.c	- Device access methods for MTD
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/completion.h>
+#include <linux/mount.h>
+#include <linux/sched.h>
+
+#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
+
+static int mtd_read(struct super_block *sb, loff_t ofs, size_t len, void *buf)
+{
+	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
+	size_t retlen;
+	int ret;
+
+	ret = mtd->read(mtd, ofs, len, &retlen, buf);
+	BUG_ON(ret == -EINVAL);
+	if (ret)
+		return ret;
+
+	/* Not sure if we should loop instead. */
+	if (retlen != len)
+		return -EIO;
+
+	return 0;
+}
+
+static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct mtd_info *mtd = super->s_mtd;
+	size_t retlen;
+	loff_t page_start, page_end;
+	int ret;
+
+	if (super->s_flags & LOGFS_SB_FLAG_RO)
+		return -EROFS;
+
+	BUG_ON((ofs >= mtd->size) || (len > mtd->size - ofs));
+	BUG_ON(ofs != (ofs >> super->s_writeshift) << super->s_writeshift);
+	BUG_ON(len > PAGE_CACHE_SIZE);
+	page_start = ofs & PAGE_CACHE_MASK;
+	page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
+	ret = mtd->write(mtd, ofs, len, &retlen, buf);
+	if (ret || (retlen != len))
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * For as long as I can remember (since about 2001) mtd->erase has been an
+ * asynchronous interface lacking the first driver to actually use the
+ * asynchronous properties.  So just to prevent the first implementor of such
+ * a thing from breaking logfs in 2350, we do the usual pointless dance to
+ * declare a completion variable and wait for completion before returning
+ * from mtd_erase().  What an excercise in futility!
+ */
+static void logfs_erase_callback(struct erase_info *ei)
+{
+	complete((struct completion *)ei->priv);
+}
+
+static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct page *page;
+	pgoff_t index = ofs >> PAGE_SHIFT;
+
+	for (index = ofs >> PAGE_SHIFT; index < (ofs + len) >> PAGE_SHIFT; index++) {
+		page = find_get_page(mapping, index);
+		if (!page)
+			continue;
+		memset(page_address(page), 0xFF, PAGE_SIZE);
+		page_cache_release(page);
+	}
+	return 0;
+}
+
+static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len)
+{
+	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
+	struct erase_info ei;
+	DECLARE_COMPLETION_ONSTACK(complete);
+	int ret;
+
+	BUG_ON(len % mtd->erasesize);
+	if (logfs_super(sb)->s_flags & LOGFS_SB_FLAG_RO)
+		return -EROFS;
+
+	memset(&ei, 0, sizeof(ei));
+	ei.mtd = mtd;
+	ei.addr = ofs;
+	ei.len = len;
+	ei.callback = logfs_erase_callback;
+	ei.priv = (long)&complete;
+	ret = mtd->erase(mtd, &ei);
+	if (ret)
+		return -EIO;
+
+	wait_for_completion(&complete);
+	if (ei.state != MTD_ERASE_DONE)
+		return -EIO;
+	return mtd_erase_mapping(sb, ofs, len);
+}
+
+static void mtd_sync(struct super_block *sb)
+{
+	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
+
+	if (mtd->sync)
+		mtd->sync(mtd);
+}
+
+static int mtd_readpage(void *_sb, struct page *page)
+{
+	struct super_block *sb = _sb;
+	int err;
+
+	err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
+			page_address(page));
+	if (err == -EUCLEAN) {
+		err = 0;
+		/* FIXME: force GC this segment */
+	}
+	if (err) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		ClearPageError(page);
+	}
+	unlock_page(page);
+	return err;
+}
+
+static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = mtd_readpage;
+	struct mtd_info *mtd = super->s_mtd;
+
+	if (!mtd->block_isbad)
+		return NULL;
+
+	*ofs = 0;
+	while (mtd->block_isbad(mtd, *ofs)) {
+		*ofs += mtd->erasesize;
+		if (*ofs >= mtd->size)
+			return NULL;
+	}
+	BUG_ON(*ofs & ~PAGE_MASK);
+	return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb);
+}
+
+static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = mtd_readpage;
+	struct mtd_info *mtd = super->s_mtd;
+
+	if (!mtd->block_isbad)
+		return NULL;
+
+	*ofs = mtd->size - mtd->erasesize;
+	while (mtd->block_isbad(mtd, *ofs)) {
+		*ofs -= mtd->erasesize;
+		if (*ofs <= 0)
+			return NULL;
+	}
+	*ofs = *ofs + mtd->erasesize - 0x1000;
+	BUG_ON(*ofs & ~PAGE_MASK);
+	return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb);
+}
+
+static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
+		size_t nr_pages)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct page *page;
+	int i, err;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = find_lock_page(mapping, index + i);
+		BUG_ON(!page);
+
+		err = mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
+				page_address(page));
+		unlock_page(page);
+		page_cache_release(page);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int head;
+
+	if (super->s_flags & LOGFS_SB_FLAG_RO)
+		return;
+
+	if (len == 0) {
+		/* This can happen when the object fit perfectly into a
+		 * segment, the segment gets written per sync and subsequently
+		 * closed.
+		 */
+		return;
+	}
+	head = ofs & (PAGE_SIZE - 1);
+	if (head) {
+		ofs -= head;
+		len += head;
+	}
+	len = PAGE_ALIGN(len);
+	__mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
+}
+
+static void mtd_put_device(struct super_block *sb)
+{
+	put_mtd_device(logfs_super(sb)->s_mtd);
+}
+
+static const struct logfs_device_ops mtd_devops = {
+	.find_first_sb	= mtd_find_first_sb,
+	.find_last_sb	= mtd_find_last_sb,
+	.readpage	= mtd_readpage,
+	.writeseg	= mtd_writeseg,
+	.erase		= mtd_erase,
+	.sync		= mtd_sync,
+	.put_device	= mtd_put_device,
+};
+
+int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+		int mtdnr, struct vfsmount *mnt)
+{
+	struct mtd_info *mtd;
+	const struct logfs_device_ops *devops = &mtd_devops;
+
+	mtd = get_mtd_device(NULL, mtdnr);
+	return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
+}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
new file mode 100644
index 000000000000..89104e6f81c4
--- /dev/null
+++ b/fs/logfs/dir.c
@@ -0,0 +1,818 @@
+/*
+ * fs/logfs/dir.c	- directory-related code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+
+
+/*
+ * Atomic dir operations
+ *
+ * Directory operations are by default not atomic.  Dentries and Inodes are
+ * created/removed/altered in seperate operations.  Therefore we need to do
+ * a small amount of journaling.
+ *
+ * Create, link, mkdir, mknod and symlink all share the same function to do
+ * the work: __logfs_create.  This function works in two atomic steps:
+ * 1. allocate inode (remember in journal)
+ * 2. allocate dentry (clear journal)
+ *
+ * As we can only get interrupted between the two, when the inode we just
+ * created is simply stored in the anchor.  On next mount, if we were
+ * interrupted, we delete the inode.  From a users point of view the
+ * operation never happened.
+ *
+ * Unlink and rmdir also share the same function: unlink.  Again, this
+ * function works in two atomic steps
+ * 1. remove dentry (remember inode in journal)
+ * 2. unlink inode (clear journal)
+ *
+ * And again, on the next mount, if we were interrupted, we delete the inode.
+ * From a users point of view the operation succeeded.
+ *
+ * Rename is the real pain to deal with, harder than all the other methods
+ * combined.  Depending on the circumstances we can run into three cases.
+ * A "target rename" where the target dentry already existed, a "local
+ * rename" where both parent directories are identical or a "cross-directory
+ * rename" in the remaining case.
+ *
+ * Local rename is atomic, as the old dentry is simply rewritten with a new
+ * name.
+ *
+ * Cross-directory rename works in two steps, similar to __logfs_create and
+ * logfs_unlink:
+ * 1. Write new dentry (remember old dentry in journal)
+ * 2. Remove old dentry (clear journal)
+ *
+ * Here we remember a dentry instead of an inode.  On next mount, if we were
+ * interrupted, we delete the dentry.  From a users point of view, the
+ * operation succeeded.
+ *
+ * Target rename works in three atomic steps:
+ * 1. Attach old inode to new dentry (remember old dentry and new inode)
+ * 2. Remove old dentry (still remember the new inode)
+ * 3. Remove victim inode
+ *
+ * Here we remember both an inode an a dentry.  If we get interrupted
+ * between steps 1 and 2, we delete both the dentry and the inode.  If
+ * we get interrupted between steps 2 and 3, we delete just the inode.
+ * In either case, the remaining objects are deleted on next mount.  From
+ * a users point of view, the operation succeeded.
+ */
+
+static int write_dir(struct inode *dir, struct logfs_disk_dentry *dd,
+		loff_t pos)
+{
+	return logfs_inode_write(dir, dd, sizeof(*dd), pos, WF_LOCK, NULL);
+}
+
+static int write_inode(struct inode *inode)
+{
+	return __logfs_write_inode(inode, WF_LOCK);
+}
+
+static s64 dir_seek_data(struct inode *inode, s64 pos)
+{
+	s64 new_pos = logfs_seek_data(inode, pos);
+
+	return max(pos, new_pos - 1);
+}
+
+static int beyond_eof(struct inode *inode, loff_t bix)
+{
+	loff_t pos = bix << inode->i_sb->s_blocksize_bits;
+	return pos >= i_size_read(inode);
+}
+
+/*
+ * Prime value was chosen to be roughly 256 + 26.  r5 hash uses 11,
+ * so short names (len <= 9) don't even occupy the complete 32bit name
+ * space.  A prime >256 ensures short names quickly spread the 32bit
+ * name space.  Add about 26 for the estimated amount of information
+ * of each character and pick a prime nearby, preferrably a bit-sparse
+ * one.
+ */
+static u32 hash_32(const char *s, int len, u32 seed)
+{
+	u32 hash = seed;
+	int i;
+
+	for (i = 0; i < len; i++)
+		hash = hash * 293 + s[i];
+	return hash;
+}
+
+/*
+ * We have to satisfy several conflicting requirements here.  Small
+ * directories should stay fairly compact and not require too many
+ * indirect blocks.  The number of possible locations for a given hash
+ * should be small to make lookup() fast.  And we should try hard not
+ * to overflow the 32bit name space or nfs and 32bit host systems will
+ * be unhappy.
+ *
+ * So we use the following scheme.  First we reduce the hash to 0..15
+ * and try a direct block.  If that is occupied we reduce the hash to
+ * 16..255 and try an indirect block.  Same for 2x and 3x indirect
+ * blocks.  Lastly we reduce the hash to 0x800_0000 .. 0xffff_ffff,
+ * but use buckets containing eight entries instead of a single one.
+ *
+ * Using 16 entries should allow for a reasonable amount of hash
+ * collisions, so the 32bit name space can be packed fairly tight
+ * before overflowing.  Oh and currently we don't overflow but return
+ * and error.
+ *
+ * How likely are collisions?  Doing the appropriate math is beyond me
+ * and the Bronstein textbook.  But running a test program to brute
+ * force collisions for a couple of days showed that on average the
+ * first collision occurs after 598M entries, with 290M being the
+ * smallest result.  Obviously 21 entries could already cause a
+ * collision if all entries are carefully chosen.
+ */
+static pgoff_t hash_index(u32 hash, int round)
+{
+	switch (round) {
+	case 0:
+		return hash % I0_BLOCKS;
+	case 1:
+		return I0_BLOCKS + hash % (I1_BLOCKS - I0_BLOCKS);
+	case 2:
+		return I1_BLOCKS + hash % (I2_BLOCKS - I1_BLOCKS);
+	case 3:
+		return I2_BLOCKS + hash % (I3_BLOCKS - I2_BLOCKS);
+	case 4 ... 19:
+		return I3_BLOCKS + 16 * (hash % (((1<<31) - I3_BLOCKS) / 16))
+			+ round - 4;
+	}
+	BUG();
+}
+
+static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
+{
+	struct qstr *name = &dentry->d_name;
+	struct page *page;
+	struct logfs_disk_dentry *dd;
+	u32 hash = hash_32(name->name, name->len, 0);
+	pgoff_t index;
+	int round;
+
+	if (name->len > LOGFS_MAX_NAMELEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	for (round = 0; round < 20; round++) {
+		index = hash_index(hash, round);
+
+		if (beyond_eof(dir, index))
+			return NULL;
+		if (!logfs_exist_block(dir, index))
+			continue;
+		page = read_cache_page(dir->i_mapping, index,
+				(filler_t *)logfs_readpage, NULL);
+		if (IS_ERR(page))
+			return page;
+		dd = kmap_atomic(page, KM_USER0);
+		BUG_ON(dd->namelen == 0);
+
+		if (name->len != be16_to_cpu(dd->namelen) ||
+				memcmp(name->name, dd->name, name->len)) {
+			kunmap_atomic(dd, KM_USER0);
+			page_cache_release(page);
+			continue;
+		}
+
+		kunmap_atomic(dd, KM_USER0);
+		return page;
+	}
+	return NULL;
+}
+
+static int logfs_remove_inode(struct inode *inode)
+{
+	int ret;
+
+	inode->i_nlink--;
+	ret = write_inode(inode);
+	LOGFS_BUG_ON(ret, inode->i_sb);
+	return ret;
+}
+
+static void abort_transaction(struct inode *inode, struct logfs_transaction *ta)
+{
+	if (logfs_inode(inode)->li_block)
+		logfs_inode(inode)->li_block->ta = NULL;
+	kfree(ta);
+}
+
+static int logfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct logfs_super *super = logfs_super(dir->i_sb);
+	struct inode *inode = dentry->d_inode;
+	struct logfs_transaction *ta;
+	struct page *page;
+	pgoff_t index;
+	int ret;
+
+	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
+	if (!ta)
+		return -ENOMEM;
+
+	ta->state = UNLINK_1;
+	ta->ino = inode->i_ino;
+
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+	page = logfs_get_dd_page(dir, dentry);
+	if (!page)
+		return -ENOENT;
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+	index = page->index;
+	page_cache_release(page);
+
+	mutex_lock(&super->s_dirop_mutex);
+	logfs_add_transaction(dir, ta);
+
+	ret = logfs_delete(dir, index, NULL);
+	if (!ret)
+		ret = write_inode(dir);
+
+	if (ret) {
+		abort_transaction(dir, ta);
+		printk(KERN_ERR"LOGFS: unable to delete inode\n");
+		goto out;
+	}
+
+	ta->state = UNLINK_2;
+	logfs_add_transaction(inode, ta);
+	ret = logfs_remove_inode(inode);
+out:
+	mutex_unlock(&super->s_dirop_mutex);
+	return ret;
+}
+
+static inline int logfs_empty_dir(struct inode *dir)
+{
+	u64 data;
+
+	data = logfs_seek_data(dir, 0) << dir->i_sb->s_blocksize_bits;
+	return data >= i_size_read(dir);
+}
+
+static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (!logfs_empty_dir(inode))
+		return -ENOTEMPTY;
+
+	return logfs_unlink(dir, dentry);
+}
+
+/* FIXME: readdir currently has it's own dir_walk code.  I don't see a good
+ * way to combine the two copies */
+#define IMPLICIT_NODES 2
+static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
+{
+	struct inode *dir = file->f_dentry->d_inode;
+	loff_t pos = file->f_pos - IMPLICIT_NODES;
+	struct page *page;
+	struct logfs_disk_dentry *dd;
+	int full;
+
+	BUG_ON(pos < 0);
+	for (;; pos++) {
+		if (beyond_eof(dir, pos))
+			break;
+		if (!logfs_exist_block(dir, pos)) {
+			/* deleted dentry */
+			pos = dir_seek_data(dir, pos);
+			continue;
+		}
+		page = read_cache_page(dir->i_mapping, pos,
+				(filler_t *)logfs_readpage, NULL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		dd = kmap_atomic(page, KM_USER0);
+		BUG_ON(dd->namelen == 0);
+
+		full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen),
+				pos, be64_to_cpu(dd->ino), dd->type);
+		kunmap_atomic(dd, KM_USER0);
+		page_cache_release(page);
+		if (full)
+			break;
+	}
+
+	file->f_pos = pos + IMPLICIT_NODES;
+	return 0;
+}
+
+static int logfs_readdir(struct file *file, void *buf, filldir_t filldir)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	ino_t pino = parent_ino(file->f_dentry);
+	int err;
+
+	if (file->f_pos < 0)
+		return -EINVAL;
+
+	if (file->f_pos == 0) {
+		if (filldir(buf, ".", 1, 1, inode->i_ino, DT_DIR) < 0)
+			return 0;
+		file->f_pos++;
+	}
+	if (file->f_pos == 1) {
+		if (filldir(buf, "..", 2, 2, pino, DT_DIR) < 0)
+			return 0;
+		file->f_pos++;
+	}
+
+	err = __logfs_readdir(file, buf, filldir);
+	return err;
+}
+
+static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name)
+{
+	dd->namelen = cpu_to_be16(name->len);
+	memcpy(dd->name, name->name, name->len);
+}
+
+static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct page *page;
+	struct logfs_disk_dentry *dd;
+	pgoff_t index;
+	u64 ino = 0;
+	struct inode *inode;
+
+	page = logfs_get_dd_page(dir, dentry);
+	if (IS_ERR(page))
+		return ERR_CAST(page);
+	if (!page) {
+		d_add(dentry, NULL);
+		return NULL;
+	}
+	index = page->index;
+	dd = kmap_atomic(page, KM_USER0);
+	ino = be64_to_cpu(dd->ino);
+	kunmap_atomic(dd, KM_USER0);
+	page_cache_release(page);
+
+	inode = logfs_iget(dir->i_sb, ino);
+	if (IS_ERR(inode)) {
+		printk(KERN_ERR"LogFS: Cannot read inode #%llx for dentry (%lx, %lx)n",
+				ino, dir->i_ino, index);
+		return ERR_CAST(inode);
+	}
+	return d_splice_alias(inode, dentry);
+}
+
+static void grow_dir(struct inode *dir, loff_t index)
+{
+	index = (index + 1) << dir->i_sb->s_blocksize_bits;
+	if (i_size_read(dir) < index)
+		i_size_write(dir, index);
+}
+
+static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
+		struct inode *inode)
+{
+	struct page *page;
+	struct logfs_disk_dentry *dd;
+	u32 hash = hash_32(dentry->d_name.name, dentry->d_name.len, 0);
+	pgoff_t index;
+	int round, err;
+
+	for (round = 0; round < 20; round++) {
+		index = hash_index(hash, round);
+
+		if (logfs_exist_block(dir, index))
+			continue;
+		page = find_or_create_page(dir->i_mapping, index, GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+
+		dd = kmap_atomic(page, KM_USER0);
+		memset(dd, 0, sizeof(*dd));
+		dd->ino = cpu_to_be64(inode->i_ino);
+		dd->type = logfs_type(inode);
+		logfs_set_name(dd, &dentry->d_name);
+		kunmap_atomic(dd, KM_USER0);
+
+		err = logfs_write_buf(dir, page, WF_LOCK);
+		unlock_page(page);
+		page_cache_release(page);
+		if (!err)
+			grow_dir(dir, index);
+		return err;
+	}
+	/* FIXME: Is there a better return value?  In most cases neither
+	 * the filesystem nor the directory are full.  But we have had
+	 * too many collisions for this particular hash and no fallback.
+	 */
+	return -ENOSPC;
+}
+
+static int __logfs_create(struct inode *dir, struct dentry *dentry,
+		struct inode *inode, const char *dest, long destlen)
+{
+	struct logfs_super *super = logfs_super(dir->i_sb);
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_transaction *ta;
+	int ret;
+
+	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
+	if (!ta)
+		return -ENOMEM;
+
+	ta->state = CREATE_1;
+	ta->ino = inode->i_ino;
+	mutex_lock(&super->s_dirop_mutex);
+	logfs_add_transaction(inode, ta);
+
+	if (dest) {
+		/* symlink */
+		ret = logfs_inode_write(inode, dest, destlen, 0, WF_LOCK, NULL);
+		if (!ret)
+			ret = write_inode(inode);
+	} else {
+		/* creat/mkdir/mknod */
+		ret = write_inode(inode);
+	}
+	if (ret) {
+		abort_transaction(inode, ta);
+		li->li_flags |= LOGFS_IF_STILLBORN;
+		/* FIXME: truncate symlink */
+		inode->i_nlink--;
+		iput(inode);
+		goto out;
+	}
+
+	ta->state = CREATE_2;
+	logfs_add_transaction(dir, ta);
+	ret = logfs_write_dir(dir, dentry, inode);
+	/* sync directory */
+	if (!ret)
+		ret = write_inode(dir);
+
+	if (ret) {
+		logfs_del_transaction(dir, ta);
+		ta->state = CREATE_2;
+		logfs_add_transaction(inode, ta);
+		logfs_remove_inode(inode);
+		iput(inode);
+		goto out;
+	}
+	d_instantiate(dentry, inode);
+out:
+	mutex_unlock(&super->s_dirop_mutex);
+	return ret;
+}
+
+static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct inode *inode;
+
+	/*
+	 * FIXME: why do we have to fill in S_IFDIR, while the mode is
+	 * correct for mknod, creat, etc.?  Smells like the vfs *should*
+	 * do it for us but for some reason fails to do so.
+	 */
+	inode = logfs_new_inode(dir, S_IFDIR | mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &logfs_dir_iops;
+	inode->i_fop = &logfs_dir_fops;
+
+	return __logfs_create(dir, dentry, inode, NULL, 0);
+}
+
+static int logfs_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	struct inode *inode;
+
+	inode = logfs_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &logfs_reg_iops;
+	inode->i_fop = &logfs_reg_fops;
+	inode->i_mapping->a_ops = &logfs_reg_aops;
+
+	return __logfs_create(dir, dentry, inode, NULL, 0);
+}
+
+static int logfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+		dev_t rdev)
+{
+	struct inode *inode;
+
+	if (dentry->d_name.len > LOGFS_MAX_NAMELEN)
+		return -ENAMETOOLONG;
+
+	inode = logfs_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	init_special_inode(inode, mode, rdev);
+
+	return __logfs_create(dir, dentry, inode, NULL, 0);
+}
+
+static int logfs_symlink(struct inode *dir, struct dentry *dentry,
+		const char *target)
+{
+	struct inode *inode;
+	size_t destlen = strlen(target) + 1;
+
+	if (destlen > dir->i_sb->s_blocksize)
+		return -ENAMETOOLONG;
+
+	inode = logfs_new_inode(dir, S_IFLNK | 0777);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &logfs_symlink_iops;
+	inode->i_mapping->a_ops = &logfs_reg_aops;
+
+	return __logfs_create(dir, dentry, inode, target, destlen);
+}
+
+static int logfs_permission(struct inode *inode, int mask)
+{
+	return generic_permission(inode, mask, NULL);
+}
+
+static int logfs_link(struct dentry *old_dentry, struct inode *dir,
+		struct dentry *dentry)
+{
+	struct inode *inode = old_dentry->d_inode;
+
+	if (inode->i_nlink >= LOGFS_LINK_MAX)
+		return -EMLINK;
+
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+	atomic_inc(&inode->i_count);
+	inode->i_nlink++;
+	mark_inode_dirty_sync(inode);
+
+	return __logfs_create(dir, dentry, inode, NULL, 0);
+}
+
+static int logfs_get_dd(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, loff_t *pos)
+{
+	struct page *page;
+	void *map;
+
+	page = logfs_get_dd_page(dir, dentry);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+	*pos = page->index;
+	map = kmap_atomic(page, KM_USER0);
+	memcpy(dd, map, sizeof(*dd));
+	kunmap_atomic(map, KM_USER0);
+	page_cache_release(page);
+	return 0;
+}
+
+static int logfs_delete_dd(struct inode *dir, loff_t pos)
+{
+	/*
+	 * Getting called with pos somewhere beyond eof is either a goofup
+	 * within this file or means someone maliciously edited the
+	 * (crc-protected) journal.
+	 */
+	BUG_ON(beyond_eof(dir, pos));
+	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+	log_dir(" Delete dentry (%lx, %llx)\n", dir->i_ino, pos);
+	return logfs_delete(dir, pos, NULL);
+}
+
+/*
+ * Cross-directory rename, target does not exist.  Just a little nasty.
+ * Create a new dentry in the target dir, then remove the old dentry,
+ * all the while taking care to remember our operation in the journal.
+ */
+static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry,
+			      struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct logfs_super *super = logfs_super(old_dir->i_sb);
+	struct logfs_disk_dentry dd;
+	struct logfs_transaction *ta;
+	loff_t pos;
+	int err;
+
+	/* 1. locate source dd */
+	err = logfs_get_dd(old_dir, old_dentry, &dd, &pos);
+	if (err)
+		return err;
+
+	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
+	if (!ta)
+		return -ENOMEM;
+
+	ta->state = CROSS_RENAME_1;
+	ta->dir = old_dir->i_ino;
+	ta->pos = pos;
+
+	/* 2. write target dd */
+	mutex_lock(&super->s_dirop_mutex);
+	logfs_add_transaction(new_dir, ta);
+	err = logfs_write_dir(new_dir, new_dentry, old_dentry->d_inode);
+	if (!err)
+		err = write_inode(new_dir);
+
+	if (err) {
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		abort_transaction(new_dir, ta);
+		goto out;
+	}
+
+	/* 3. remove source dd */
+	ta->state = CROSS_RENAME_2;
+	logfs_add_transaction(old_dir, ta);
+	err = logfs_delete_dd(old_dir, pos);
+	if (!err)
+		err = write_inode(old_dir);
+	LOGFS_BUG_ON(err, old_dir->i_sb);
+out:
+	mutex_unlock(&super->s_dirop_mutex);
+	return err;
+}
+
+static int logfs_replace_inode(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, struct inode *inode)
+{
+	loff_t pos;
+	int err;
+
+	err = logfs_get_dd(dir, dentry, dd, &pos);
+	if (err)
+		return err;
+	dd->ino = cpu_to_be64(inode->i_ino);
+	dd->type = logfs_type(inode);
+
+	err = write_dir(dir, dd, pos);
+	if (err)
+		return err;
+	log_dir("Replace dentry (%lx, %llx) %s -> %llx\n", dir->i_ino, pos,
+			dd->name, be64_to_cpu(dd->ino));
+	return write_inode(dir);
+}
+
+/* Target dentry exists - the worst case.  We need to attach the source
+ * inode to the target dentry, then remove the orphaned target inode and
+ * source dentry.
+ */
+static int logfs_rename_target(struct inode *old_dir, struct dentry *old_dentry,
+			       struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct logfs_super *super = logfs_super(old_dir->i_sb);
+	struct inode *old_inode = old_dentry->d_inode;
+	struct inode *new_inode = new_dentry->d_inode;
+	int isdir = S_ISDIR(old_inode->i_mode);
+	struct logfs_disk_dentry dd;
+	struct logfs_transaction *ta;
+	loff_t pos;
+	int err;
+
+	BUG_ON(isdir != S_ISDIR(new_inode->i_mode));
+	if (isdir) {
+		if (!logfs_empty_dir(new_inode))
+			return -ENOTEMPTY;
+	}
+
+	/* 1. locate source dd */
+	err = logfs_get_dd(old_dir, old_dentry, &dd, &pos);
+	if (err)
+		return err;
+
+	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
+	if (!ta)
+		return -ENOMEM;
+
+	ta->state = TARGET_RENAME_1;
+	ta->dir = old_dir->i_ino;
+	ta->pos = pos;
+	ta->ino = new_inode->i_ino;
+
+	/* 2. attach source inode to target dd */
+	mutex_lock(&super->s_dirop_mutex);
+	logfs_add_transaction(new_dir, ta);
+	err = logfs_replace_inode(new_dir, new_dentry, &dd, old_inode);
+	if (err) {
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		super->s_victim_ino = 0;
+		abort_transaction(new_dir, ta);
+		goto out;
+	}
+
+	/* 3. remove source dd */
+	ta->state = TARGET_RENAME_2;
+	logfs_add_transaction(old_dir, ta);
+	err = logfs_delete_dd(old_dir, pos);
+	if (!err)
+		err = write_inode(old_dir);
+	LOGFS_BUG_ON(err, old_dir->i_sb);
+
+	/* 4. remove target inode */
+	ta->state = TARGET_RENAME_3;
+	logfs_add_transaction(new_inode, ta);
+	err = logfs_remove_inode(new_inode);
+
+out:
+	mutex_unlock(&super->s_dirop_mutex);
+	return err;
+}
+
+static int logfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+			struct inode *new_dir, struct dentry *new_dentry)
+{
+	if (new_dentry->d_inode)
+		return logfs_rename_target(old_dir, old_dentry,
+					   new_dir, new_dentry);
+	return logfs_rename_cross(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+/* No locking done here, as this is called before .get_sb() returns. */
+int logfs_replay_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode;
+	u64 ino, pos;
+	int err;
+
+	if (super->s_victim_ino) {
+		/* delete victim inode */
+		ino = super->s_victim_ino;
+		printk(KERN_INFO"LogFS: delete unmapped inode #%llx\n", ino);
+		inode = logfs_iget(sb, ino);
+		if (IS_ERR(inode))
+			goto fail;
+
+		LOGFS_BUG_ON(i_size_read(inode) > 0, sb);
+		super->s_victim_ino = 0;
+		err = logfs_remove_inode(inode);
+		iput(inode);
+		if (err) {
+			super->s_victim_ino = ino;
+			goto fail;
+		}
+	}
+	if (super->s_rename_dir) {
+		/* delete old dd from rename */
+		ino = super->s_rename_dir;
+		pos = super->s_rename_pos;
+		printk(KERN_INFO"LogFS: delete unbacked dentry (%llx, %llx)\n",
+				ino, pos);
+		inode = logfs_iget(sb, ino);
+		if (IS_ERR(inode))
+			goto fail;
+
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		err = logfs_delete_dd(inode, pos);
+		iput(inode);
+		if (err) {
+			super->s_rename_dir = ino;
+			super->s_rename_pos = pos;
+			goto fail;
+		}
+	}
+	return 0;
+fail:
+	LOGFS_BUG(sb);
+	return -EIO;
+}
+
+const struct inode_operations logfs_symlink_iops = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+};
+
+const struct inode_operations logfs_dir_iops = {
+	.create		= logfs_create,
+	.link		= logfs_link,
+	.lookup		= logfs_lookup,
+	.mkdir		= logfs_mkdir,
+	.mknod		= logfs_mknod,
+	.rename		= logfs_rename,
+	.rmdir		= logfs_rmdir,
+	.permission	= logfs_permission,
+	.symlink	= logfs_symlink,
+	.unlink		= logfs_unlink,
+};
+const struct file_operations logfs_dir_fops = {
+	.fsync		= logfs_fsync,
+	.ioctl		= logfs_ioctl,
+	.readdir	= logfs_readdir,
+	.read		= generic_read_dir,
+};
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
new file mode 100644
index 000000000000..370f367a933e
--- /dev/null
+++ b/fs/logfs/file.c
@@ -0,0 +1,263 @@
+/*
+ * fs/logfs/file.c	- prepare_write, commit_write and friends
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/sched.h>
+#include <linux/writeback.h>
+
+static int logfs_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata)
+{
+	struct inode *inode = mapping->host;
+	struct page *page;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
+
+	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
+		return 0;
+	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned end = start + len;
+
+		/* Reading beyond i_size is simple: memset to zero */
+		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+		return 0;
+	}
+	return logfs_readpage_nolock(page);
+}
+
+static int logfs_write_end(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned copied, struct page *page,
+		void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	pgoff_t index = page->index;
+	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned end = start + copied;
+	int ret = 0;
+
+	BUG_ON(PAGE_CACHE_SIZE != inode->i_sb->s_blocksize);
+	BUG_ON(page->index > I3_BLOCKS);
+
+	if (copied < len) {
+		/*
+		 * Short write of a non-initialized paged.  Just tell userspace
+		 * to retry the entire page.
+		 */
+		if (!PageUptodate(page)) {
+			copied = 0;
+			goto out;
+		}
+	}
+	if (copied == 0)
+		goto out; /* FIXME: do we need to update inode? */
+
+	if (i_size_read(inode) < (index << PAGE_CACHE_SHIFT) + end) {
+		i_size_write(inode, (index << PAGE_CACHE_SHIFT) + end);
+		mark_inode_dirty_sync(inode);
+	}
+
+	SetPageUptodate(page);
+	if (!PageDirty(page)) {
+		if (!get_page_reserve(inode, page))
+			__set_page_dirty_nobuffers(page);
+		else
+			ret = logfs_write_buf(inode, page, WF_LOCK);
+	}
+out:
+	unlock_page(page);
+	page_cache_release(page);
+	return ret ? ret : copied;
+}
+
+int logfs_readpage(struct file *file, struct page *page)
+{
+	int ret;
+
+	ret = logfs_readpage_nolock(page);
+	unlock_page(page);
+	return ret;
+}
+
+/* Clear the page's dirty flag in the radix tree. */
+/* TODO: mucking with PageWriteback is silly.  Add a generic function to clear
+ * the dirty bit from the radix tree for filesystems that don't have to wait
+ * for page writeback to finish (i.e. any compressing filesystem).
+ */
+static void clear_radix_tree_dirty(struct page *page)
+{
+	BUG_ON(PagePrivate(page) || page->private);
+	set_page_writeback(page);
+	end_page_writeback(page);
+}
+
+static int __logfs_writepage(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	int err;
+
+	err = logfs_write_buf(inode, page, WF_LOCK);
+	if (err)
+		set_page_dirty(page);
+	else
+		clear_radix_tree_dirty(page);
+	unlock_page(page);
+	return err;
+}
+
+static int logfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	loff_t i_size = i_size_read(inode);
+	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	unsigned offset;
+	u64 bix;
+	level_t level;
+
+	log_file("logfs_writepage(%lx, %lx, %p)\n", inode->i_ino, page->index,
+			page);
+
+	logfs_unpack_index(page->index, &bix, &level);
+
+	/* Indirect blocks are never truncated */
+	if (level != 0)
+		return __logfs_writepage(page);
+
+	/*
+	 * TODO: everything below is a near-verbatim copy of nobh_writepage().
+	 * The relevant bits should be factored out after logfs is merged.
+	 */
+
+	/* Is the page fully inside i_size? */
+	if (bix < end_index)
+		return __logfs_writepage(page);
+
+	 /* Is the page fully outside i_size? (truncate in progress) */
+	offset = i_size & (PAGE_CACHE_SIZE-1);
+	if (bix > end_index || offset == 0) {
+		unlock_page(page);
+		return 0; /* don't care */
+	}
+
+	/*
+	 * The page straddles i_size.  It must be zeroed out on each and every
+	 * writepage invokation because it may be mmapped.  "A file is mapped
+	 * in multiples of the page size.  For a file that is not a multiple of
+	 * the  page size, the remaining memory is zeroed when mapped, and
+	 * writes to that region are not written out to the file."
+	 */
+	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	return __logfs_writepage(page);
+}
+
+static void logfs_invalidatepage(struct page *page, unsigned long offset)
+{
+	move_page_to_btree(page);
+	BUG_ON(PagePrivate(page) || page->private);
+}
+
+static int logfs_releasepage(struct page *page, gfp_t only_xfs_uses_this)
+{
+	return 0; /* None of these are easy to release */
+}
+
+
+int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	unsigned int oldflags, flags;
+	int err;
+
+	switch (cmd) {
+	case FS_IOC_GETFLAGS:
+		flags = li->li_flags & LOGFS_FL_USER_VISIBLE;
+		return put_user(flags, (int __user *)arg);
+	case FS_IOC_SETFLAGS:
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if (!is_owner_or_cap(inode))
+			return -EACCES;
+
+		err = get_user(flags, (int __user *)arg);
+		if (err)
+			return err;
+
+		mutex_lock(&inode->i_mutex);
+		oldflags = li->li_flags;
+		flags &= LOGFS_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
+		li->li_flags = flags;
+		mutex_unlock(&inode->i_mutex);
+
+		inode->i_ctime = CURRENT_TIME;
+		mark_inode_dirty_sync(inode);
+		return 0;
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+int logfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct super_block *sb = dentry->d_inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+
+	/* FIXME: write anchor */
+	super->s_devops->sync(sb);
+	return 0;
+}
+
+static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int err = 0;
+
+	if (attr->ia_valid & ATTR_SIZE)
+		err = logfs_truncate(inode, attr->ia_size);
+	attr->ia_valid &= ~ATTR_SIZE;
+
+	if (!err)
+		err = inode_change_ok(inode, attr);
+	if (!err)
+		err = inode_setattr(inode, attr);
+	return err;
+}
+
+const struct inode_operations logfs_reg_iops = {
+	.setattr	= logfs_setattr,
+};
+
+const struct file_operations logfs_reg_fops = {
+	.aio_read	= generic_file_aio_read,
+	.aio_write	= generic_file_aio_write,
+	.fsync		= logfs_fsync,
+	.ioctl		= logfs_ioctl,
+	.llseek		= generic_file_llseek,
+	.mmap		= generic_file_readonly_mmap,
+	.open		= generic_file_open,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+};
+
+const struct address_space_operations logfs_reg_aops = {
+	.invalidatepage	= logfs_invalidatepage,
+	.readpage	= logfs_readpage,
+	.releasepage	= logfs_releasepage,
+	.set_page_dirty	= __set_page_dirty_nobuffers,
+	.writepage	= logfs_writepage,
+	.writepages	= generic_writepages,
+	.write_begin	= logfs_write_begin,
+	.write_end	= logfs_write_end,
+};
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
new file mode 100644
index 000000000000..b3656c44190e
--- /dev/null
+++ b/fs/logfs/gc.c
@@ -0,0 +1,730 @@
+/*
+ * fs/logfs/gc.c	- garbage collection code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/sched.h>
+
+/*
+ * Wear leveling needs to kick in when the difference between low erase
+ * counts and high erase counts gets too big.  A good value for "too big"
+ * may be somewhat below 10% of maximum erase count for the device.
+ * Why not 397, to pick a nice round number with no specific meaning? :)
+ *
+ * WL_RATELIMIT is the minimum time between two wear level events.  A huge
+ * number of segments may fulfil the requirements for wear leveling at the
+ * same time.  If that happens we don't want to cause a latency from hell,
+ * but just gently pick one segment every so often and minimize overhead.
+ */
+#define WL_DELTA 397
+#define WL_RATELIMIT 100
+#define MAX_OBJ_ALIASES	2600
+#define SCAN_RATIO 512	/* number of scanned segments per gc'd segment */
+#define LIST_SIZE 64	/* base size of candidate lists */
+#define SCAN_ROUNDS 128	/* maximum number of complete medium scans */
+#define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
+
+static int no_free_segments(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	return super->s_free_list.count;
+}
+
+/* journal has distance -1, top-most ifile layer distance 0 */
+static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u8 gc_level = (__force u8)__gc_level;
+
+	switch (gc_level) {
+	case 0: /* fall through */
+	case 1: /* fall through */
+	case 2: /* fall through */
+	case 3:
+		/* file data or indirect blocks */
+		return super->s_ifile_levels + super->s_iblock_levels - gc_level;
+	case 6: /* fall through */
+	case 7: /* fall through */
+	case 8: /* fall through */
+	case 9:
+		/* inode file data or indirect blocks */
+		return super->s_ifile_levels - (gc_level - 6);
+	default:
+		printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
+				gc_level);
+		WARN_ON(1);
+		return super->s_ifile_levels + super->s_iblock_levels;
+	}
+}
+
+static int segment_is_reserved(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area;
+	void *reserved;
+	int i;
+
+	/* Some segments are reserved.  Just pretend they were all valid */
+	reserved = btree_lookup32(&super->s_reserved_segments, segno);
+	if (reserved)
+		return 1;
+
+	/* Currently open segments */
+	for_each_area(i) {
+		area = super->s_area[i];
+		if (area->a_is_open && area->a_segno == segno)
+			return 1;
+	}
+
+	return 0;
+}
+
+static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
+{
+	BUG();
+}
+
+/*
+ * Returns the bytes consumed by valid objects in this segment.  Object headers
+ * are counted, the segment header is not.
+ */
+static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
+		gc_level_t *gc_level)
+{
+	struct logfs_segment_entry se;
+	u32 ec_level;
+
+	logfs_get_segment_entry(sb, segno, &se);
+	if (se.ec_level == cpu_to_be32(BADSEG) ||
+			se.valid == cpu_to_be32(RESERVED))
+		return RESERVED;
+
+	ec_level = be32_to_cpu(se.ec_level);
+	*ec = ec_level >> 4;
+	*gc_level = GC_LEVEL(ec_level & 0xf);
+	return be32_to_cpu(se.valid);
+}
+
+static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
+		u64 bix, gc_level_t gc_level)
+{
+	struct inode *inode;
+	int err, cookie;
+
+	inode = logfs_safe_iget(sb, ino, &cookie);
+	err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
+	BUG_ON(err);
+	logfs_safe_iput(inode, cookie);
+}
+
+static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_segment_header sh;
+	struct logfs_object_header oh;
+	u64 ofs, ino, bix;
+	u32 seg_ofs, logical_segno, cleaned = 0;
+	int err, len, valid;
+	gc_level_t gc_level;
+
+	LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
+
+	btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
+	err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
+	BUG_ON(err);
+	gc_level = GC_LEVEL(sh.level);
+	logical_segno = be32_to_cpu(sh.segno);
+	if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
+		logfs_mark_segment_bad(sb, segno);
+		cleaned = -1;
+		goto out;
+	}
+
+	for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
+			seg_ofs + sizeof(oh) < super->s_segsize; ) {
+		ofs = dev_ofs(sb, logical_segno, seg_ofs);
+		err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
+				&oh);
+		BUG_ON(err);
+
+		if (!memchr_inv(&oh, 0xff, sizeof(oh)))
+			break;
+
+		if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
+			logfs_mark_segment_bad(sb, segno);
+			cleaned = super->s_segsize - 1;
+			goto out;
+		}
+
+		ino = be64_to_cpu(oh.ino);
+		bix = be64_to_cpu(oh.bix);
+		len = sizeof(oh) + be16_to_cpu(oh.len);
+		valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
+		if (valid == 1) {
+			logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
+			cleaned += len;
+		} else if (valid == 2) {
+			/* Will be invalid upon journal commit */
+			cleaned += len;
+		}
+		seg_ofs += len;
+	}
+out:
+	btree_remove32(&super->s_reserved_segments, segno);
+	return cleaned;
+}
+
+static struct gc_candidate *add_list(struct gc_candidate *cand,
+		struct candidate_list *list)
+{
+	struct rb_node **p = &list->rb_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct gc_candidate *cur;
+	int comp;
+
+	cand->list = list;
+	while (*p) {
+		parent = *p;
+		cur = rb_entry(parent, struct gc_candidate, rb_node);
+
+		if (list->sort_by_ec)
+			comp = cand->erase_count < cur->erase_count;
+		else
+			comp = cand->valid < cur->valid;
+
+		if (comp)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	rb_link_node(&cand->rb_node, parent, p);
+	rb_insert_color(&cand->rb_node, &list->rb_tree);
+
+	if (list->count <= list->maxcount) {
+		list->count++;
+		return NULL;
+	}
+	cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
+	rb_erase(&cand->rb_node, &list->rb_tree);
+	cand->list = NULL;
+	return cand;
+}
+
+static void remove_from_list(struct gc_candidate *cand)
+{
+	struct candidate_list *list = cand->list;
+
+	rb_erase(&cand->rb_node, &list->rb_tree);
+	list->count--;
+}
+
+static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	btree_remove32(&super->s_cand_tree, cand->segno);
+	kfree(cand);
+}
+
+u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
+{
+	struct gc_candidate *cand;
+	u32 segno;
+
+	BUG_ON(list->count == 0);
+
+	cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
+	remove_from_list(cand);
+	segno = cand->segno;
+	if (ec)
+		*ec = cand->erase_count;
+	free_candidate(sb, cand);
+	return segno;
+}
+
+/*
+ * We have several lists to manage segments with.  The reserve_list is used to
+ * deal with bad blocks.  We try to keep the best (lowest ec) segments on this
+ * list.
+ * The free_list contains free segments for normal usage.  It usually gets the
+ * second pick after the reserve_list.  But when the free_list is running short
+ * it is more important to keep the free_list full than to keep a reserve.
+ *
+ * Segments that are not free are put onto a per-level low_list.  If we have
+ * to run garbage collection, we pick a candidate from there.  All segments on
+ * those lists should have at least some free space so GC will make progress.
+ *
+ * And last we have the ec_list, which is used to pick segments for wear
+ * leveling.
+ *
+ * If all appropriate lists are full, we simply free the candidate and forget
+ * about that segment for a while.  We have better candidates for each purpose.
+ */
+static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
+
+	if (cand->valid == 0) {
+		/* 100% free segments */
+		log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
+				cand->segno, cand->erase_count,
+				dev_ofs(sb, cand->segno, 0));
+		cand = add_list(cand, &super->s_reserve_list);
+		if (cand) {
+			log_gc_noisy("add free segment %x (ec %x) at %llx\n",
+					cand->segno, cand->erase_count,
+					dev_ofs(sb, cand->segno, 0));
+			cand = add_list(cand, &super->s_free_list);
+		}
+	} else {
+		/* good candidates for Garbage Collection */
+		if (cand->valid < full)
+			cand = add_list(cand, &super->s_low_list[cand->dist]);
+		/* good candidates for wear leveling,
+		 * segments that were recently written get ignored */
+		if (cand)
+			cand = add_list(cand, &super->s_ec_list);
+	}
+	if (cand)
+		free_candidate(sb, cand);
+}
+
+static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
+		u8 dist)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *cand;
+
+	cand = kmalloc(sizeof(*cand), GFP_NOFS);
+	if (!cand)
+		return -ENOMEM;
+
+	cand->segno = segno;
+	cand->valid = valid;
+	cand->erase_count = ec;
+	cand->dist = dist;
+
+	btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
+	__add_candidate(sb, cand);
+	return 0;
+}
+
+static void remove_segment_from_lists(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *cand;
+
+	cand = btree_lookup32(&super->s_cand_tree, segno);
+	if (cand) {
+		remove_from_list(cand);
+		free_candidate(sb, cand);
+	}
+}
+
+static void scan_segment(struct super_block *sb, u32 segno)
+{
+	u32 valid, ec = 0;
+	gc_level_t gc_level = 0;
+	u8 dist;
+
+	if (segment_is_reserved(sb, segno))
+		return;
+
+	remove_segment_from_lists(sb, segno);
+	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
+	if (valid == RESERVED)
+		return;
+
+	dist = root_distance(sb, gc_level);
+	add_candidate(sb, segno, valid, ec, dist);
+}
+
+static struct gc_candidate *first_in_list(struct candidate_list *list)
+{
+	if (list->count == 0)
+		return NULL;
+	return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
+}
+
+/*
+ * Find the best segment for garbage collection.  Main criterion is
+ * the segment requiring the least effort to clean.  Secondary
+ * criterion is to GC on the lowest level available.
+ *
+ * So we search the least effort segment on the lowest level first,
+ * then move up and pick another segment iff is requires significantly
+ * less effort.  Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
+ */
+static struct gc_candidate *get_candidate(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i, max_dist;
+	struct gc_candidate *cand = NULL, *this;
+
+	max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS);
+
+	for (i = max_dist; i >= 0; i--) {
+		this = first_in_list(&super->s_low_list[i]);
+		if (!this)
+			continue;
+		if (!cand)
+			cand = this;
+		if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
+			cand = this;
+	}
+	return cand;
+}
+
+static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
+{
+	struct logfs_super *super = logfs_super(sb);
+	gc_level_t gc_level;
+	u32 cleaned, valid, segno, ec;
+	u8 dist;
+
+	if (!cand) {
+		log_gc("GC attempted, but no candidate found\n");
+		return 0;
+	}
+
+	segno = cand->segno;
+	dist = cand->dist;
+	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
+	free_candidate(sb, cand);
+	log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
+			segno, (u64)segno << super->s_segshift,
+			dist, no_free_segments(sb), valid,
+			super->s_free_bytes);
+	cleaned = logfs_gc_segment(sb, segno, dist);
+	log_gc("GC segment #%02x complete - now %x valid\n", segno,
+			valid - cleaned);
+	BUG_ON(cleaned != valid);
+	return 1;
+}
+
+static int logfs_gc_once(struct super_block *sb)
+{
+	struct gc_candidate *cand;
+
+	cand = get_candidate(sb);
+	if (cand)
+		remove_from_list(cand);
+	return __logfs_gc_once(sb, cand);
+}
+
+/* returns 1 if a wrap occurs, 0 otherwise */
+static int logfs_scan_some(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u32 segno;
+	int i, ret = 0;
+
+	segno = super->s_sweeper;
+	for (i = SCAN_RATIO; i > 0; i--) {
+		segno++;
+		if (segno >= super->s_no_segs) {
+			segno = 0;
+			ret = 1;
+			/* Break out of the loop.  We want to read a single
+			 * block from the segment size on next invocation if
+			 * SCAN_RATIO is set to match block size
+			 */
+			break;
+		}
+
+		scan_segment(sb, segno);
+	}
+	super->s_sweeper = segno;
+	return ret;
+}
+
+/*
+ * In principle, this function should loop forever, looking for GC candidates
+ * and moving data.  LogFS is designed in such a way that this loop is
+ * guaranteed to terminate.
+ *
+ * Limiting the loop to some iterations serves purely to catch cases when
+ * these guarantees have failed.  An actual endless loop is an obvious bug
+ * and should be reported as such.
+ */
+static void __logfs_gc_pass(struct super_block *sb, int target)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_block *block;
+	int round, progress, last_progress = 0;
+
+	if (no_free_segments(sb) >= target &&
+			super->s_no_object_aliases < MAX_OBJ_ALIASES)
+		return;
+
+	log_gc("__logfs_gc_pass(%x)\n", target);
+	for (round = 0; round < SCAN_ROUNDS; ) {
+		if (no_free_segments(sb) >= target)
+			goto write_alias;
+
+		/* Sync in-memory state with on-medium state in case they
+		 * diverged */
+		logfs_write_anchor(super->s_master_inode);
+		round += logfs_scan_some(sb);
+		if (no_free_segments(sb) >= target)
+			goto write_alias;
+		progress = logfs_gc_once(sb);
+		if (progress)
+			last_progress = round;
+		else if (round - last_progress > 2)
+			break;
+		continue;
+
+		/*
+		 * The goto logic is nasty, I just don't know a better way to
+		 * code it.  GC is supposed to ensure two things:
+		 * 1. Enough free segments are available.
+		 * 2. The number of aliases is bounded.
+		 * When 1. is achieved, we take a look at 2. and write back
+		 * some alias-containing blocks, if necessary.  However, after
+		 * each such write we need to go back to 1., as writes can
+		 * consume free segments.
+		 */
+write_alias:
+		if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
+			return;
+		if (list_empty(&super->s_object_alias)) {
+			/* All aliases are still in btree */
+			return;
+		}
+		log_gc("Write back one alias\n");
+		block = list_entry(super->s_object_alias.next,
+				struct logfs_block, alias_list);
+		block->ops->write_block(block);
+		/*
+		 * To round off the nasty goto logic, we reset round here.  It
+		 * is a safety-net for GC not making any progress and limited
+		 * to something reasonably small.  If incremented it for every
+		 * single alias, the loop could terminate rather quickly.
+		 */
+		round = 0;
+	}
+	LOGFS_BUG(sb);
+}
+
+static int wl_ratelimit(struct super_block *sb, u64 *next_event)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	if (*next_event < super->s_gec) {
+		*next_event = super->s_gec + WL_RATELIMIT;
+		return 0;
+	}
+	return 1;
+}
+
+static void logfs_wl_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *wl_cand, *free_cand;
+
+	if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
+		return;
+
+	wl_cand = first_in_list(&super->s_ec_list);
+	if (!wl_cand)
+		return;
+	free_cand = first_in_list(&super->s_free_list);
+	if (!free_cand)
+		return;
+
+	if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
+		remove_from_list(wl_cand);
+		__logfs_gc_once(sb, wl_cand);
+	}
+}
+
+/*
+ * The journal needs wear leveling as well.  But moving the journal is an
+ * expensive operation so we try to avoid it as much as possible.  And if we
+ * have to do it, we move the whole journal, not individual segments.
+ *
+ * Ratelimiting is not strictly necessary here, it mainly serves to avoid the
+ * calculations.  First we check whether moving the journal would be a
+ * significant improvement.  That means that a) the current journal segments
+ * have more wear than the future journal segments and b) the current journal
+ * segments have more wear than normal ostore segments.
+ * Rationale for b) is that we don't have to move the journal if it is aging
+ * less than the ostore, even if the reserve segments age even less (they are
+ * excluded from wear leveling, after all).
+ * Next we check that the superblocks have less wear than the journal.  Since
+ * moving the journal requires writing the superblocks, we have to protect the
+ * superblocks even more than the journal.
+ *
+ * Also we double the acceptable wear difference, compared to ostore wear
+ * leveling.  Journal data is read and rewritten rapidly, comparatively.  So
+ * soft errors have much less time to accumulate and we allow the journal to
+ * be a bit worse than the ostore.
+ */
+static void logfs_journal_wl_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *cand;
+	u32 min_journal_ec = -1, max_reserve_ec = 0;
+	int i;
+
+	if (wl_ratelimit(sb, &super->s_wl_gec_journal))
+		return;
+
+	if (super->s_reserve_list.count < super->s_no_journal_segs) {
+		/* Reserve is not full enough to move complete journal */
+		return;
+	}
+
+	journal_for_each(i)
+		if (super->s_journal_seg[i])
+			min_journal_ec = min(min_journal_ec,
+					super->s_journal_ec[i]);
+	cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
+			struct gc_candidate, rb_node);
+	max_reserve_ec = cand->erase_count;
+	for (i = 0; i < 2; i++) {
+		struct logfs_segment_entry se;
+		u32 segno = seg_no(sb, super->s_sb_ofs[i]);
+		u32 ec;
+
+		logfs_get_segment_entry(sb, segno, &se);
+		ec = be32_to_cpu(se.ec_level) >> 4;
+		max_reserve_ec = max(max_reserve_ec, ec);
+	}
+
+	if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
+		do_logfs_journal_wl_pass(sb);
+	}
+}
+
+void logfs_gc_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	//BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
+	/* Write journal before free space is getting saturated with dirty
+	 * objects.
+	 */
+	if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
+			+ LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
+		logfs_write_anchor(super->s_master_inode);
+	__logfs_gc_pass(sb, logfs_super(sb)->s_total_levels);
+	logfs_wl_pass(sb);
+	logfs_journal_wl_pass(sb);
+}
+
+static int check_area(struct super_block *sb, int i)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_area[i];
+	struct logfs_object_header oh;
+	u32 segno = area->a_segno;
+	u32 ofs = area->a_used_bytes;
+	__be32 crc;
+	int err;
+
+	if (!area->a_is_open)
+		return 0;
+
+	for (ofs = area->a_used_bytes;
+	     ofs <= super->s_segsize - sizeof(oh);
+	     ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
+		err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
+		if (err)
+			return err;
+
+		if (!memchr_inv(&oh, 0xff, sizeof(oh)))
+			break;
+
+		crc = logfs_crc32(&oh, sizeof(oh) - 4, 4);
+		if (crc != oh.crc) {
+			printk(KERN_INFO "interrupted header at %llx\n",
+					dev_ofs(sb, segno, ofs));
+			return 0;
+		}
+	}
+	if (ofs != area->a_used_bytes) {
+		printk(KERN_INFO "%x bytes unaccounted data found at %llx\n",
+				ofs - area->a_used_bytes,
+				dev_ofs(sb, segno, area->a_used_bytes));
+		area->a_used_bytes = ofs;
+	}
+	return 0;
+}
+
+int logfs_check_areas(struct super_block *sb)
+{
+	int i, err;
+
+	for_each_area(i) {
+		err = check_area(sb, i);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static void logfs_init_candlist(struct candidate_list *list, int maxcount,
+		int sort_by_ec)
+{
+	list->count = 0;
+	list->maxcount = maxcount;
+	list->sort_by_ec = sort_by_ec;
+	list->rb_tree = RB_ROOT;
+}
+
+int logfs_init_gc(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
+	logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
+	logfs_init_candlist(&super->s_reserve_list,
+			super->s_bad_seg_reserve, 1);
+	for_each_area(i)
+		logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
+	logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
+	return 0;
+}
+
+static void logfs_cleanup_list(struct super_block *sb,
+		struct candidate_list *list)
+{
+	struct gc_candidate *cand;
+
+	while (list->count) {
+		cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
+				rb_node);
+		remove_from_list(cand);
+		free_candidate(sb, cand);
+	}
+	BUG_ON(list->rb_tree.rb_node);
+}
+
+void logfs_cleanup_gc(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	if (!super->s_free_list.count)
+		return;
+
+	/*
+	 * FIXME: The btree may still contain a single empty node.  So we
+	 * call the grim visitor to clean up that mess.  Btree code should
+	 * do it for us, really.
+	 */
+	btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
+	logfs_cleanup_list(sb, &super->s_free_list);
+	logfs_cleanup_list(sb, &super->s_reserve_list);
+	for_each_area(i)
+		logfs_cleanup_list(sb, &super->s_low_list[i]);
+	logfs_cleanup_list(sb, &super->s_ec_list);
+}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
new file mode 100644
index 000000000000..6d08b3762641
--- /dev/null
+++ b/fs/logfs/inode.c
@@ -0,0 +1,417 @@
+/*
+ * fs/logfs/inode.c	- inode handling code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
+
+/*
+ * How soon to reuse old inode numbers?  LogFS doesn't store deleted inodes
+ * on the medium.  It therefore also lacks a method to store the previous
+ * generation number for deleted inodes.  Instead a single generation number
+ * is stored which will be used for new inodes.  Being just a 32bit counter,
+ * this can obvious wrap relatively quickly.  So we only reuse inodes if we
+ * know that a fair number of inodes can be created before we have to increment
+ * the generation again - effectively adding some bits to the counter.
+ * But being too aggressive here means we keep a very large and very sparse
+ * inode file, wasting space on indirect blocks.
+ * So what is a good value?  Beats me.  64k seems moderately bad on both
+ * fronts, so let's use that for now...
+ *
+ * NFS sucks, as everyone already knows.
+ */
+#define INOS_PER_WRAP (0x10000)
+
+/*
+ * Logfs' requirement to read inodes for garbage collection makes life a bit
+ * harder.  GC may have to read inodes that are in I_FREEING state, when they
+ * are being written out - and waiting for GC to make progress, naturally.
+ *
+ * So we cannot just call iget() or some variant of it, but first have to check
+ * wether the inode in question might be in I_FREEING state.  Therefore we
+ * maintain our own per-sb list of "almost deleted" inodes and check against
+ * that list first.  Normally this should be at most 1-2 entries long.
+ *
+ * Also, inodes have logfs-specific reference counting on top of what the vfs
+ * does.  When .destroy_inode is called, normally the reference count will drop
+ * to zero and the inode gets deleted.  But if GC accessed the inode, its
+ * refcount will remain nonzero and final deletion will have to wait.
+ *
+ * As a result we have two sets of functions to get/put inodes:
+ * logfs_safe_iget/logfs_safe_iput	- safe to call from GC context
+ * logfs_iget/iput			- normal version
+ */
+static struct kmem_cache *logfs_inode_cache;
+
+static DEFINE_SPINLOCK(logfs_inode_lock);
+
+static void logfs_inode_setops(struct inode *inode)
+{
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFDIR:
+		inode->i_op = &logfs_dir_iops;
+		inode->i_fop = &logfs_dir_fops;
+		inode->i_mapping->a_ops = &logfs_reg_aops;
+		break;
+	case S_IFREG:
+		inode->i_op = &logfs_reg_iops;
+		inode->i_fop = &logfs_reg_fops;
+		inode->i_mapping->a_ops = &logfs_reg_aops;
+		break;
+	case S_IFLNK:
+		inode->i_op = &logfs_symlink_iops;
+		inode->i_mapping->a_ops = &logfs_reg_aops;
+		break;
+	case S_IFSOCK:	/* fall through */
+	case S_IFBLK:	/* fall through */
+	case S_IFCHR:	/* fall through */
+	case S_IFIFO:
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static struct inode *__logfs_iget(struct super_block *sb, ino_t ino)
+{
+	struct inode *inode = iget_locked(sb, ino);
+	int err;
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	err = logfs_read_inode(inode);
+	if (err || inode->i_nlink == 0) {
+		/* inode->i_nlink == 0 can be true when called from
+		 * block validator */
+		/* set i_nlink to 0 to prevent caching */
+		inode->i_nlink = 0;
+		logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE;
+		iget_failed(inode);
+		if (!err)
+			err = -ENOENT;
+		return ERR_PTR(err);
+	}
+
+	logfs_inode_setops(inode);
+	unlock_new_inode(inode);
+	return inode;
+}
+
+struct inode *logfs_iget(struct super_block *sb, ino_t ino)
+{
+	BUG_ON(ino == LOGFS_INO_MASTER);
+	BUG_ON(ino == LOGFS_INO_SEGFILE);
+	return __logfs_iget(sb, ino);
+}
+
+/*
+ * is_cached is set to 1 if we hand out a cached inode, 0 otherwise.
+ * this allows logfs_iput to do the right thing later
+ */
+struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_inode *li;
+
+	if (ino == LOGFS_INO_MASTER)
+		return super->s_master_inode;
+	if (ino == LOGFS_INO_SEGFILE)
+		return super->s_segfile_inode;
+
+	spin_lock(&logfs_inode_lock);
+	list_for_each_entry(li, &super->s_freeing_list, li_freeing_list)
+		if (li->vfs_inode.i_ino == ino) {
+			li->li_refcount++;
+			spin_unlock(&logfs_inode_lock);
+			*is_cached = 1;
+			return &li->vfs_inode;
+		}
+	spin_unlock(&logfs_inode_lock);
+
+	*is_cached = 0;
+	return __logfs_iget(sb, ino);
+}
+
+static void __logfs_destroy_inode(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	BUG_ON(li->li_block);
+	list_del(&li->li_freeing_list);
+	kmem_cache_free(logfs_inode_cache, li);
+}
+
+static void logfs_destroy_inode(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	BUG_ON(list_empty(&li->li_freeing_list));
+	spin_lock(&logfs_inode_lock);
+	li->li_refcount--;
+	if (li->li_refcount == 0)
+		__logfs_destroy_inode(inode);
+	spin_unlock(&logfs_inode_lock);
+}
+
+void logfs_safe_iput(struct inode *inode, int is_cached)
+{
+	if (inode->i_ino == LOGFS_INO_MASTER)
+		return;
+	if (inode->i_ino == LOGFS_INO_SEGFILE)
+		return;
+
+	if (is_cached) {
+		logfs_destroy_inode(inode);
+		return;
+	}
+
+	iput(inode);
+}
+
+static void logfs_init_inode(struct super_block *sb, struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	li->li_flags	= 0;
+	li->li_height	= 0;
+	li->li_used_bytes = 0;
+	li->li_block	= NULL;
+	inode->i_uid	= 0;
+	inode->i_gid	= 0;
+	inode->i_size	= 0;
+	inode->i_blocks	= 0;
+	inode->i_ctime	= CURRENT_TIME;
+	inode->i_mtime	= CURRENT_TIME;
+	inode->i_nlink	= 1;
+	INIT_LIST_HEAD(&li->li_freeing_list);
+
+	for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = 0;
+
+	return;
+}
+
+static struct inode *logfs_alloc_inode(struct super_block *sb)
+{
+	struct logfs_inode *li;
+
+	li = kmem_cache_alloc(logfs_inode_cache, GFP_NOFS);
+	if (!li)
+		return NULL;
+	logfs_init_inode(sb, &li->vfs_inode);
+	return &li->vfs_inode;
+}
+
+/*
+ * In logfs inodes are written to an inode file.  The inode file, like any
+ * other file, is managed with a inode.  The inode file's inode, aka master
+ * inode, requires special handling in several respects.  First, it cannot be
+ * written to the inode file, so it is stored in the journal instead.
+ *
+ * Secondly, this inode cannot be written back and destroyed before all other
+ * inodes have been written.  The ordering is important.  Linux' VFS is happily
+ * unaware of the ordering constraint and would ordinarily destroy the master
+ * inode at umount time while other inodes are still in use and dirty.  Not
+ * good.
+ *
+ * So logfs makes sure the master inode is not written until all other inodes
+ * have been destroyed.  Sadly, this method has another side-effect.  The VFS
+ * will notice one remaining inode and print a frightening warning message.
+ * Worse, it is impossible to judge whether such a warning was caused by the
+ * master inode or any other inodes have leaked as well.
+ *
+ * Our attempt of solving this is with logfs_new_meta_inode() below.  Its
+ * purpose is to create a new inode that will not trigger the warning if such
+ * an inode is still in use.  An ugly hack, no doubt.  Suggections for
+ * improvement are welcome.
+ */
+struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino)
+{
+	struct inode *inode;
+
+	inode = logfs_alloc_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	inode->i_mode = S_IFREG;
+	inode->i_ino = ino;
+	inode->i_sb = sb;
+
+	/* This is a blatant copy of alloc_inode code.  We'd need alloc_inode
+	 * to be nonstatic, alas. */
+	{
+		struct address_space * const mapping = &inode->i_data;
+
+		mapping->a_ops = &logfs_reg_aops;
+		mapping->host = inode;
+		mapping->flags = 0;
+		mapping_set_gfp_mask(mapping, GFP_NOFS);
+		mapping->assoc_mapping = NULL;
+		mapping->backing_dev_info = &default_backing_dev_info;
+		inode->i_mapping = mapping;
+		inode->i_nlink = 1;
+	}
+
+	return inode;
+}
+
+struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino)
+{
+	struct inode *inode;
+	int err;
+
+	inode = logfs_new_meta_inode(sb, ino);
+	if (IS_ERR(inode))
+		return inode;
+
+	err = logfs_read_inode(inode);
+	if (err) {
+		destroy_meta_inode(inode);
+		return ERR_PTR(err);
+	}
+	logfs_inode_setops(inode);
+	return inode;
+}
+
+static int logfs_write_inode(struct inode *inode, int do_sync)
+{
+	int ret;
+	long flags = WF_LOCK;
+
+	/* Can only happen if creat() failed.  Safe to skip. */
+	if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN)
+		return 0;
+
+	ret = __logfs_write_inode(inode, flags);
+	LOGFS_BUG_ON(ret, inode->i_sb);
+	return ret;
+}
+
+void destroy_meta_inode(struct inode *inode)
+{
+	if (inode) {
+		if (inode->i_data.nrpages)
+			truncate_inode_pages(&inode->i_data, 0);
+		logfs_clear_inode(inode);
+		kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
+	}
+}
+
+/* called with inode_lock held */
+static void logfs_drop_inode(struct inode *inode)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	struct logfs_inode *li = logfs_inode(inode);
+
+	spin_lock(&logfs_inode_lock);
+	list_move(&li->li_freeing_list, &super->s_freeing_list);
+	spin_unlock(&logfs_inode_lock);
+	generic_drop_inode(inode);
+}
+
+static void logfs_set_ino_generation(struct super_block *sb,
+		struct inode *inode)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u64 ino;
+
+	mutex_lock(&super->s_journal_mutex);
+	ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino);
+	super->s_last_ino = ino;
+	super->s_inos_till_wrap--;
+	if (super->s_inos_till_wrap < 0) {
+		super->s_last_ino = LOGFS_RESERVED_INOS;
+		super->s_generation++;
+		super->s_inos_till_wrap = INOS_PER_WRAP;
+	}
+	inode->i_ino = ino;
+	inode->i_generation = super->s_generation;
+	mutex_unlock(&super->s_journal_mutex);
+}
+
+struct inode *logfs_new_inode(struct inode *dir, int mode)
+{
+	struct super_block *sb = dir->i_sb;
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	logfs_init_inode(sb, inode);
+
+	/* inherit parent flags */
+	logfs_inode(inode)->li_flags |=
+		logfs_inode(dir)->li_flags & LOGFS_FL_INHERITED;
+
+	inode->i_mode = mode;
+	logfs_set_ino_generation(sb, inode);
+
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			inode->i_mode |= S_ISGID;
+	}
+
+	logfs_inode_setops(inode);
+	insert_inode_hash(inode);
+
+	return inode;
+}
+
+static void logfs_init_once(void *_li)
+{
+	struct logfs_inode *li = _li;
+	int i;
+
+	li->li_flags = 0;
+	li->li_used_bytes = 0;
+	li->li_refcount = 1;
+	for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = 0;
+	inode_init_once(&li->vfs_inode);
+}
+
+static int logfs_sync_fs(struct super_block *sb, int wait)
+{
+	/* FIXME: write anchor */
+	logfs_super(sb)->s_devops->sync(sb);
+	return 0;
+}
+
+const struct super_operations logfs_super_operations = {
+	.alloc_inode	= logfs_alloc_inode,
+	.clear_inode	= logfs_clear_inode,
+	.delete_inode	= logfs_delete_inode,
+	.destroy_inode	= logfs_destroy_inode,
+	.drop_inode	= logfs_drop_inode,
+	.write_inode	= logfs_write_inode,
+	.statfs		= logfs_statfs,
+	.sync_fs	= logfs_sync_fs,
+};
+
+int logfs_init_inode_cache(void)
+{
+	logfs_inode_cache = kmem_cache_create("logfs_inode_cache",
+			sizeof(struct logfs_inode), 0, SLAB_RECLAIM_ACCOUNT,
+			logfs_init_once);
+	if (!logfs_inode_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void logfs_destroy_inode_cache(void)
+{
+	kmem_cache_destroy(logfs_inode_cache);
+}
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
new file mode 100644
index 000000000000..7a023dbba9f8
--- /dev/null
+++ b/fs/logfs/journal.c
@@ -0,0 +1,879 @@
+/*
+ * fs/logfs/journal.c	- journal handling code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+
+static void logfs_calc_free(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u64 reserve, no_segs = super->s_no_segs;
+	s64 free;
+	int i;
+
+	/* superblock segments */
+	no_segs -= 2;
+	super->s_no_journal_segs = 0;
+	/* journal */
+	journal_for_each(i)
+		if (super->s_journal_seg[i]) {
+			no_segs--;
+			super->s_no_journal_segs++;
+		}
+
+	/* open segments plus one extra per level for GC */
+	no_segs -= 2 * super->s_total_levels;
+
+	free = no_segs * (super->s_segsize - LOGFS_SEGMENT_RESERVE);
+	free -= super->s_used_bytes;
+	/* just a bit extra */
+	free -= super->s_total_levels * 4096;
+
+	/* Bad blocks are 'paid' for with speed reserve - the filesystem
+	 * simply gets slower as bad blocks accumulate.  Until the bad blocks
+	 * exceed the speed reserve - then the filesystem gets smaller.
+	 */
+	reserve = super->s_bad_segments + super->s_bad_seg_reserve;
+	reserve *= super->s_segsize - LOGFS_SEGMENT_RESERVE;
+	reserve = max(reserve, super->s_speed_reserve);
+	free -= reserve;
+	if (free < 0)
+		free = 0;
+
+	super->s_free_bytes = free;
+}
+
+static void reserve_sb_and_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct btree_head32 *head = &super->s_reserved_segments;
+	int i, err;
+
+	err = btree_insert32(head, seg_no(sb, super->s_sb_ofs[0]), (void *)1,
+			GFP_KERNEL);
+	BUG_ON(err);
+
+	err = btree_insert32(head, seg_no(sb, super->s_sb_ofs[1]), (void *)1,
+			GFP_KERNEL);
+	BUG_ON(err);
+
+	journal_for_each(i) {
+		if (!super->s_journal_seg[i])
+			continue;
+		err = btree_insert32(head, super->s_journal_seg[i], (void *)1,
+				GFP_KERNEL);
+		BUG_ON(err);
+	}
+}
+
+static void read_dynsb(struct super_block *sb,
+		struct logfs_je_dynsb *dynsb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	super->s_gec		= be64_to_cpu(dynsb->ds_gec);
+	super->s_sweeper	= be64_to_cpu(dynsb->ds_sweeper);
+	super->s_victim_ino	= be64_to_cpu(dynsb->ds_victim_ino);
+	super->s_rename_dir	= be64_to_cpu(dynsb->ds_rename_dir);
+	super->s_rename_pos	= be64_to_cpu(dynsb->ds_rename_pos);
+	super->s_used_bytes	= be64_to_cpu(dynsb->ds_used_bytes);
+	super->s_generation	= be32_to_cpu(dynsb->ds_generation);
+}
+
+static void read_anchor(struct super_block *sb,
+		struct logfs_je_anchor *da)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode = super->s_master_inode;
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	super->s_last_ino = be64_to_cpu(da->da_last_ino);
+	li->li_flags	= 0;
+	li->li_height	= da->da_height;
+	i_size_write(inode, be64_to_cpu(da->da_size));
+	li->li_used_bytes = be64_to_cpu(da->da_used_bytes);
+
+	for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = be64_to_cpu(da->da_data[i]);
+}
+
+static void read_erasecount(struct super_block *sb,
+		struct logfs_je_journal_ec *ec)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	journal_for_each(i)
+		super->s_journal_ec[i] = be32_to_cpu(ec->ec[i]);
+}
+
+static int read_area(struct super_block *sb, struct logfs_je_area *a)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_area[a->gc_level];
+	u64 ofs;
+	u32 writemask = ~(super->s_writesize - 1);
+
+	if (a->gc_level >= LOGFS_NO_AREAS)
+		return -EIO;
+	if (a->vim != VIM_DEFAULT)
+		return -EIO; /* TODO: close area and continue */
+
+	area->a_used_bytes = be32_to_cpu(a->used_bytes);
+	area->a_written_bytes = area->a_used_bytes & writemask;
+	area->a_segno = be32_to_cpu(a->segno);
+	if (area->a_segno)
+		area->a_is_open = 1;
+
+	ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
+	if (super->s_writesize > 1)
+		logfs_buf_recover(area, ofs, a + 1, super->s_writesize);
+	else
+		logfs_buf_recover(area, ofs, NULL, 0);
+	return 0;
+}
+
+static void *unpack(void *from, void *to)
+{
+	struct logfs_journal_header *jh = from;
+	void *data = from + sizeof(struct logfs_journal_header);
+	int err;
+	size_t inlen, outlen;
+
+	inlen = be16_to_cpu(jh->h_len);
+	outlen = be16_to_cpu(jh->h_datalen);
+
+	if (jh->h_compr == COMPR_NONE)
+		memcpy(to, data, inlen);
+	else {
+		err = logfs_uncompress(data, to, inlen, outlen);
+		BUG_ON(err);
+	}
+	return to;
+}
+
+static int __read_je_header(struct super_block *sb, u64 ofs,
+		struct logfs_journal_header *jh)
+{
+	struct logfs_super *super = logfs_super(sb);
+	size_t bufsize = max_t(size_t, sb->s_blocksize, super->s_writesize)
+		+ MAX_JOURNAL_HEADER;
+	u16 type, len, datalen;
+	int err;
+
+	/* read header only */
+	err = wbuf_read(sb, ofs, sizeof(*jh), jh);
+	if (err)
+		return err;
+	type = be16_to_cpu(jh->h_type);
+	len = be16_to_cpu(jh->h_len);
+	datalen = be16_to_cpu(jh->h_datalen);
+	if (len > sb->s_blocksize)
+		return -EIO;
+	if ((type < JE_FIRST) || (type > JE_LAST))
+		return -EIO;
+	if (datalen > bufsize)
+		return -EIO;
+	return 0;
+}
+
+static int __read_je_payload(struct super_block *sb, u64 ofs,
+		struct logfs_journal_header *jh)
+{
+	u16 len;
+	int err;
+
+	len = be16_to_cpu(jh->h_len);
+	err = wbuf_read(sb, ofs + sizeof(*jh), len, jh + 1);
+	if (err)
+		return err;
+	if (jh->h_crc != logfs_crc32(jh, len + sizeof(*jh), 4)) {
+		/* Old code was confused.  It forgot about the header length
+		 * and stopped calculating the crc 16 bytes before the end
+		 * of data - ick!
+		 * FIXME: Remove this hack once the old code is fixed.
+		 */
+		if (jh->h_crc == logfs_crc32(jh, len, 4))
+			WARN_ON_ONCE(1);
+		else
+			return -EIO;
+	}
+	return 0;
+}
+
+/*
+ * jh needs to be large enough to hold the complete entry, not just the header
+ */
+static int __read_je(struct super_block *sb, u64 ofs,
+		struct logfs_journal_header *jh)
+{
+	int err;
+
+	err = __read_je_header(sb, ofs, jh);
+	if (err)
+		return err;
+	return __read_je_payload(sb, ofs, jh);
+}
+
+static int read_je(struct super_block *sb, u64 ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_journal_header *jh = super->s_compressed_je;
+	void *scratch = super->s_je;
+	u16 type, datalen;
+	int err;
+
+	err = __read_je(sb, ofs, jh);
+	if (err)
+		return err;
+	type = be16_to_cpu(jh->h_type);
+	datalen = be16_to_cpu(jh->h_datalen);
+
+	switch (type) {
+	case JE_DYNSB:
+		read_dynsb(sb, unpack(jh, scratch));
+		break;
+	case JE_ANCHOR:
+		read_anchor(sb, unpack(jh, scratch));
+		break;
+	case JE_ERASECOUNT:
+		read_erasecount(sb, unpack(jh, scratch));
+		break;
+	case JE_AREA:
+		read_area(sb, unpack(jh, scratch));
+		break;
+	case JE_OBJ_ALIAS:
+		err = logfs_load_object_aliases(sb, unpack(jh, scratch),
+				datalen);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EIO;
+	}
+	return err;
+}
+
+static int logfs_read_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_journal_header *jh = super->s_compressed_je;
+	u64 ofs, seg_ofs = dev_ofs(sb, segno, 0);
+	u32 h_ofs, last_ofs = 0;
+	u16 len, datalen, last_len;
+	int i, err;
+
+	/* search for most recent commit */
+	for (h_ofs = 0; h_ofs < super->s_segsize; h_ofs += sizeof(*jh)) {
+		ofs = seg_ofs + h_ofs;
+		err = __read_je_header(sb, ofs, jh);
+		if (err)
+			continue;
+		if (jh->h_type != cpu_to_be16(JE_COMMIT))
+			continue;
+		err = __read_je_payload(sb, ofs, jh);
+		if (err)
+			continue;
+		len = be16_to_cpu(jh->h_len);
+		datalen = be16_to_cpu(jh->h_datalen);
+		if ((datalen > sizeof(super->s_je_array)) ||
+				(datalen % sizeof(__be64)))
+			continue;
+		last_ofs = h_ofs;
+		last_len = datalen;
+		h_ofs += ALIGN(len, sizeof(*jh)) - sizeof(*jh);
+	}
+	/* read commit */
+	if (last_ofs == 0)
+		return -ENOENT;
+	ofs = seg_ofs + last_ofs;
+	log_journal("Read commit from %llx\n", ofs);
+	err = __read_je(sb, ofs, jh);
+	BUG_ON(err); /* We should have caught it in the scan loop already */
+	if (err)
+		return err;
+	/* uncompress */
+	unpack(jh, super->s_je_array);
+	super->s_no_je = last_len / sizeof(__be64);
+	/* iterate over array */
+	for (i = 0; i < super->s_no_je; i++) {
+		err = read_je(sb, be64_to_cpu(super->s_je_array[i]));
+		if (err)
+			return err;
+	}
+	super->s_journal_area->a_segno = segno;
+	return 0;
+}
+
+static u64 read_gec(struct super_block *sb, u32 segno)
+{
+	struct logfs_segment_header sh;
+	__be32 crc;
+	int err;
+
+	if (!segno)
+		return 0;
+	err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
+	if (err)
+		return 0;
+	crc = logfs_crc32(&sh, sizeof(sh), 4);
+	if (crc != sh.crc) {
+		WARN_ON(sh.gec != cpu_to_be64(0xffffffffffffffffull));
+		/* Most likely it was just erased */
+		return 0;
+	}
+	return be64_to_cpu(sh.gec);
+}
+
+static int logfs_read_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u64 gec[LOGFS_JOURNAL_SEGS], max;
+	u32 segno;
+	int i, max_i;
+
+	max = 0;
+	max_i = -1;
+	journal_for_each(i) {
+		segno = super->s_journal_seg[i];
+		gec[i] = read_gec(sb, super->s_journal_seg[i]);
+		if (gec[i] > max) {
+			max = gec[i];
+			max_i = i;
+		}
+	}
+	if (max_i == -1)
+		return -EIO;
+	/* FIXME: Try older segments in case of error */
+	return logfs_read_segment(sb, super->s_journal_seg[max_i]);
+}
+
+/*
+ * First search the current segment (outer loop), then pick the next segment
+ * in the array, skipping any zero entries (inner loop).
+ */
+static void journal_get_free_segment(struct logfs_area *area)
+{
+	struct logfs_super *super = logfs_super(area->a_sb);
+	int i;
+
+	journal_for_each(i) {
+		if (area->a_segno != super->s_journal_seg[i])
+			continue;
+
+		do {
+			i++;
+			if (i == LOGFS_JOURNAL_SEGS)
+				i = 0;
+		} while (!super->s_journal_seg[i]);
+
+		area->a_segno = super->s_journal_seg[i];
+		area->a_erase_count = ++(super->s_journal_ec[i]);
+		log_journal("Journal now at %x (ec %x)\n", area->a_segno,
+				area->a_erase_count);
+		return;
+	}
+	BUG();
+}
+
+static void journal_get_erase_count(struct logfs_area *area)
+{
+	/* erase count is stored globally and incremented in
+	 * journal_get_free_segment() - nothing to do here */
+}
+
+static int journal_erase_segment(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_segment_header sh;
+	u64 ofs;
+	int err;
+
+	err = logfs_erase_segment(sb, area->a_segno);
+	if (err)
+		return err;
+
+	sh.pad = 0;
+	sh.type = SEG_JOURNAL;
+	sh.level = 0;
+	sh.segno = cpu_to_be32(area->a_segno);
+	sh.ec = cpu_to_be32(area->a_erase_count);
+	sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
+	sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
+
+	/* This causes a bug in segment.c.  Not yet. */
+	//logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
+
+	ofs = dev_ofs(sb, area->a_segno, 0);
+	area->a_used_bytes = ALIGN(sizeof(sh), 16);
+	logfs_buf_write(area, ofs, &sh, sizeof(sh));
+	return 0;
+}
+
+static size_t __logfs_write_header(struct logfs_super *super,
+		struct logfs_journal_header *jh, size_t len, size_t datalen,
+		u16 type, u8 compr)
+{
+	jh->h_len	= cpu_to_be16(len);
+	jh->h_type	= cpu_to_be16(type);
+	jh->h_version	= cpu_to_be16(++super->s_last_version);
+	jh->h_datalen	= cpu_to_be16(datalen);
+	jh->h_compr	= compr;
+	jh->h_pad[0]	= 'H';
+	jh->h_pad[1]	= 'A';
+	jh->h_pad[2]	= 'T';
+	jh->h_crc	= logfs_crc32(jh, len + sizeof(*jh), 4);
+	return ALIGN(len, 16) + sizeof(*jh);
+}
+
+static size_t logfs_write_header(struct logfs_super *super,
+		struct logfs_journal_header *jh, size_t datalen, u16 type)
+{
+	size_t len = datalen;
+
+	return __logfs_write_header(super, jh, len, datalen, type, COMPR_NONE);
+}
+
+static inline size_t logfs_journal_erasecount_size(struct logfs_super *super)
+{
+	return LOGFS_JOURNAL_SEGS * sizeof(__be32);
+}
+
+static void *logfs_write_erasecount(struct super_block *sb, void *_ec,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_je_journal_ec *ec = _ec;
+	int i;
+
+	journal_for_each(i)
+		ec->ec[i] = cpu_to_be32(super->s_journal_ec[i]);
+	*type = JE_ERASECOUNT;
+	*len = logfs_journal_erasecount_size(super);
+	return ec;
+}
+
+static void account_shadow(void *_shadow, unsigned long _sb, u64 ignore,
+		size_t ignore2)
+{
+	struct logfs_shadow *shadow = _shadow;
+	struct super_block *sb = (void *)_sb;
+	struct logfs_super *super = logfs_super(sb);
+
+	/* consume new space */
+	super->s_free_bytes	  -= shadow->new_len;
+	super->s_used_bytes	  += shadow->new_len;
+	super->s_dirty_used_bytes -= shadow->new_len;
+
+	/* free up old space */
+	super->s_free_bytes	  += shadow->old_len;
+	super->s_used_bytes	  -= shadow->old_len;
+	super->s_dirty_free_bytes -= shadow->old_len;
+
+	logfs_set_segment_used(sb, shadow->old_ofs, -shadow->old_len);
+	logfs_set_segment_used(sb, shadow->new_ofs, shadow->new_len);
+
+	log_journal("account_shadow(%llx, %llx, %x) %llx->%llx %x->%x\n",
+			shadow->ino, shadow->bix, shadow->gc_level,
+			shadow->old_ofs, shadow->new_ofs,
+			shadow->old_len, shadow->new_len);
+	mempool_free(shadow, super->s_shadow_pool);
+}
+
+static void account_shadows(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode = super->s_master_inode;
+	struct logfs_inode *li = logfs_inode(inode);
+	struct shadow_tree *tree = &super->s_shadow_tree;
+
+	btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
+	btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
+
+	if (li->li_block) {
+		/*
+		 * We never actually use the structure, when attached to the
+		 * master inode.  But it is easier to always free it here than
+		 * to have checks in several places elsewhere when allocating
+		 * it.
+		 */
+		li->li_block->ops->free_block(sb, li->li_block);
+	}
+	BUG_ON((s64)li->li_used_bytes < 0);
+}
+
+static void *__logfs_write_anchor(struct super_block *sb, void *_da,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_je_anchor *da = _da;
+	struct inode *inode = super->s_master_inode;
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	da->da_height	= li->li_height;
+	da->da_last_ino = cpu_to_be64(super->s_last_ino);
+	da->da_size	= cpu_to_be64(i_size_read(inode));
+	da->da_used_bytes = cpu_to_be64(li->li_used_bytes);
+	for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+		da->da_data[i] = cpu_to_be64(li->li_data[i]);
+	*type = JE_ANCHOR;
+	*len = sizeof(*da);
+	return da;
+}
+
+static void *logfs_write_dynsb(struct super_block *sb, void *_dynsb,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_je_dynsb *dynsb = _dynsb;
+
+	dynsb->ds_gec		= cpu_to_be64(super->s_gec);
+	dynsb->ds_sweeper	= cpu_to_be64(super->s_sweeper);
+	dynsb->ds_victim_ino	= cpu_to_be64(super->s_victim_ino);
+	dynsb->ds_rename_dir	= cpu_to_be64(super->s_rename_dir);
+	dynsb->ds_rename_pos	= cpu_to_be64(super->s_rename_pos);
+	dynsb->ds_used_bytes	= cpu_to_be64(super->s_used_bytes);
+	dynsb->ds_generation	= cpu_to_be32(super->s_generation);
+	*type = JE_DYNSB;
+	*len = sizeof(*dynsb);
+	return dynsb;
+}
+
+static void write_wbuf(struct super_block *sb, struct logfs_area *area,
+		void *wbuf)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	u64 ofs;
+	pgoff_t index;
+	int page_ofs;
+	struct page *page;
+
+	ofs = dev_ofs(sb, area->a_segno,
+			area->a_used_bytes & ~(super->s_writesize - 1));
+	index = ofs >> PAGE_SHIFT;
+	page_ofs = ofs & (PAGE_SIZE - 1);
+
+	page = find_lock_page(mapping, index);
+	BUG_ON(!page);
+	memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize);
+	unlock_page(page);
+}
+
+static void *logfs_write_area(struct super_block *sb, void *_a,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_area[super->s_sum_index];
+	struct logfs_je_area *a = _a;
+
+	a->vim = VIM_DEFAULT;
+	a->gc_level = super->s_sum_index;
+	a->used_bytes = cpu_to_be32(area->a_used_bytes);
+	a->segno = cpu_to_be32(area->a_segno);
+	if (super->s_writesize > 1)
+		write_wbuf(sb, area, a + 1);
+
+	*type = JE_AREA;
+	*len = sizeof(*a) + super->s_writesize;
+	return a;
+}
+
+static void *logfs_write_commit(struct super_block *sb, void *h,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	*type = JE_COMMIT;
+	*len = super->s_no_je * sizeof(__be64);
+	return super->s_je_array;
+}
+
+static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
+		size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	void *header = super->s_compressed_je;
+	void *data = header + sizeof(struct logfs_journal_header);
+	ssize_t compr_len, pad_len;
+	u8 compr = COMPR_ZLIB;
+
+	if (len == 0)
+		return logfs_write_header(super, header, 0, type);
+
+	compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
+	if (compr_len < 0 || type == JE_ANCHOR) {
+		BUG_ON(len > sb->s_blocksize);
+		memcpy(data, buf, len);
+		compr_len = len;
+		compr = COMPR_NONE;
+	}
+
+	pad_len = ALIGN(compr_len, 16);
+	memset(data + compr_len, 0, pad_len - compr_len);
+
+	return __logfs_write_header(super, header, compr_len, len, type, compr);
+}
+
+static s64 logfs_get_free_bytes(struct logfs_area *area, size_t *bytes,
+		int must_pad)
+{
+	u32 writesize = logfs_super(area->a_sb)->s_writesize;
+	s32 ofs;
+	int ret;
+
+	ret = logfs_open_area(area, *bytes);
+	if (ret)
+		return -EAGAIN;
+
+	ofs = area->a_used_bytes;
+	area->a_used_bytes += *bytes;
+
+	if (must_pad) {
+		area->a_used_bytes = ALIGN(area->a_used_bytes, writesize);
+		*bytes = area->a_used_bytes - ofs;
+	}
+
+	return dev_ofs(area->a_sb, area->a_segno, ofs);
+}
+
+static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
+		size_t buf_len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_journal_area;
+	struct logfs_journal_header *jh = super->s_compressed_je;
+	size_t len;
+	int must_pad = 0;
+	s64 ofs;
+
+	len = __logfs_write_je(sb, buf, type, buf_len);
+	if (jh->h_type == cpu_to_be16(JE_COMMIT))
+		must_pad = 1;
+
+	ofs = logfs_get_free_bytes(area, &len, must_pad);
+	if (ofs < 0)
+		return ofs;
+	logfs_buf_write(area, ofs, super->s_compressed_je, len);
+	super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
+	return 0;
+}
+
+static int logfs_write_je(struct super_block *sb,
+		void* (*write)(struct super_block *sb, void *scratch,
+			u16 *type, size_t *len))
+{
+	void *buf;
+	size_t len;
+	u16 type;
+
+	buf = write(sb, logfs_super(sb)->s_je, &type, &len);
+	return logfs_write_je_buf(sb, buf, type, len);
+}
+
+int write_alias_journal(struct super_block *sb, u64 ino, u64 bix,
+		level_t level, int child_no, __be64 val)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_obj_alias *oa = super->s_je;
+	int err = 0, fill = super->s_je_fill;
+
+	log_aliases("logfs_write_obj_aliases #%x(%llx, %llx, %x, %x) %llx\n",
+			fill, ino, bix, level, child_no, be64_to_cpu(val));
+	oa[fill].ino = cpu_to_be64(ino);
+	oa[fill].bix = cpu_to_be64(bix);
+	oa[fill].val = val;
+	oa[fill].level = (__force u8)level;
+	oa[fill].child_no = cpu_to_be16(child_no);
+	fill++;
+	if (fill >= sb->s_blocksize / sizeof(*oa)) {
+		err = logfs_write_je_buf(sb, oa, JE_OBJ_ALIAS, sb->s_blocksize);
+		fill = 0;
+	}
+
+	super->s_je_fill = fill;
+	return err;
+}
+
+static int logfs_write_obj_aliases(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int err;
+
+	log_journal("logfs_write_obj_aliases: %d aliases to write\n",
+			super->s_no_object_aliases);
+	super->s_je_fill = 0;
+	err = logfs_write_obj_aliases_pagecache(sb);
+	if (err)
+		return err;
+
+	if (super->s_je_fill)
+		err = logfs_write_je_buf(sb, super->s_je, JE_OBJ_ALIAS,
+				super->s_je_fill
+				* sizeof(struct logfs_obj_alias));
+	return err;
+}
+
+/*
+ * Write all journal entries.  The goto logic ensures that all journal entries
+ * are written whenever a new segment is used.  It is ugly and potentially a
+ * bit wasteful, but robustness is more important.  With this we can *always*
+ * erase all journal segments except the one containing the most recent commit.
+ */
+void logfs_write_anchor(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_journal_area;
+	int i, err;
+
+	BUG_ON(logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
+	mutex_lock(&super->s_journal_mutex);
+
+	/* Do this first or suffer corruption */
+	logfs_sync_segments(sb);
+	account_shadows(sb);
+
+again:
+	super->s_no_je = 0;
+	for_each_area(i) {
+		if (!super->s_area[i]->a_is_open)
+			continue;
+		super->s_sum_index = i;
+		err = logfs_write_je(sb, logfs_write_area);
+		if (err)
+			goto again;
+	}
+	err = logfs_write_obj_aliases(sb);
+	if (err)
+		goto again;
+	err = logfs_write_je(sb, logfs_write_erasecount);
+	if (err)
+		goto again;
+	err = logfs_write_je(sb, __logfs_write_anchor);
+	if (err)
+		goto again;
+	err = logfs_write_je(sb, logfs_write_dynsb);
+	if (err)
+		goto again;
+	/*
+	 * Order is imperative.  First we sync all writes, including the
+	 * non-committed journal writes.  Then we write the final commit and
+	 * sync the current journal segment.
+	 * There is a theoretical bug here.  Syncing the journal segment will
+	 * write a number of journal entries and the final commit.  All these
+	 * are written in a single operation.  If the device layer writes the
+	 * data back-to-front, the commit will precede the other journal
+	 * entries, leaving a race window.
+	 * Two fixes are possible.  Preferred is to fix the device layer to
+	 * ensure writes happen front-to-back.  Alternatively we can insert
+	 * another logfs_sync_area() super->s_devops->sync() combo before
+	 * writing the commit.
+	 */
+	/*
+	 * On another subject, super->s_devops->sync is usually not necessary.
+	 * Unless called from sys_sync or friends, a barrier would suffice.
+	 */
+	super->s_devops->sync(sb);
+	err = logfs_write_je(sb, logfs_write_commit);
+	if (err)
+		goto again;
+	log_journal("Write commit to %llx\n",
+			be64_to_cpu(super->s_je_array[super->s_no_je - 1]));
+	logfs_sync_area(area);
+	BUG_ON(area->a_used_bytes != area->a_written_bytes);
+	super->s_devops->sync(sb);
+
+	mutex_unlock(&super->s_journal_mutex);
+	return;
+}
+
+void do_logfs_journal_wl_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_journal_area;
+	u32 segno, ec;
+	int i, err;
+
+	log_journal("Journal requires wear-leveling.\n");
+	/* Drop old segments */
+	journal_for_each(i)
+		if (super->s_journal_seg[i]) {
+			logfs_set_segment_unreserved(sb,
+					super->s_journal_seg[i],
+					super->s_journal_ec[i]);
+			super->s_journal_seg[i] = 0;
+			super->s_journal_ec[i] = 0;
+		}
+	/* Get new segments */
+	for (i = 0; i < super->s_no_journal_segs; i++) {
+		segno = get_best_cand(sb, &super->s_reserve_list, &ec);
+		super->s_journal_seg[i] = segno;
+		super->s_journal_ec[i] = ec;
+		logfs_set_segment_reserved(sb, segno);
+	}
+	/* Manually move journal_area */
+	area->a_segno = super->s_journal_seg[0];
+	area->a_is_open = 0;
+	area->a_used_bytes = 0;
+	/* Write journal */
+	logfs_write_anchor(super->s_master_inode);
+	/* Write superblocks */
+	err = logfs_write_sb(sb);
+	BUG_ON(err);
+}
+
+static const struct logfs_area_ops journal_area_ops = {
+	.get_free_segment	= journal_get_free_segment,
+	.get_erase_count	= journal_get_erase_count,
+	.erase_segment		= journal_erase_segment,
+};
+
+int logfs_init_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	size_t bufsize = max_t(size_t, sb->s_blocksize, super->s_writesize)
+		+ MAX_JOURNAL_HEADER;
+	int ret = -ENOMEM;
+
+	mutex_init(&super->s_journal_mutex);
+	btree_init_mempool32(&super->s_reserved_segments, super->s_btree_pool);
+
+	super->s_je = kzalloc(bufsize, GFP_KERNEL);
+	if (!super->s_je)
+		return ret;
+
+	super->s_compressed_je = kzalloc(bufsize, GFP_KERNEL);
+	if (!super->s_compressed_je)
+		return ret;
+
+	super->s_master_inode = logfs_new_meta_inode(sb, LOGFS_INO_MASTER);
+	if (IS_ERR(super->s_master_inode))
+		return PTR_ERR(super->s_master_inode);
+
+	ret = logfs_read_journal(sb);
+	if (ret)
+		return -EIO;
+
+	reserve_sb_and_journal(sb);
+	logfs_calc_free(sb);
+
+	super->s_journal_area->a_ops = &journal_area_ops;
+	return 0;
+}
+
+void logfs_cleanup_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	btree_grim_visitor32(&super->s_reserved_segments, 0, NULL);
+	destroy_meta_inode(super->s_master_inode);
+	super->s_master_inode = NULL;
+
+	kfree(super->s_compressed_je);
+	kfree(super->s_je);
+}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
new file mode 100644
index 000000000000..e3082abe9e3b
--- /dev/null
+++ b/fs/logfs/logfs.h
@@ -0,0 +1,722 @@
+/*
+ * fs/logfs/logfs.h
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ * Private header for logfs.
+ */
+#ifndef FS_LOGFS_LOGFS_H
+#define FS_LOGFS_LOGFS_H
+
+#undef __CHECK_ENDIAN__
+#define __CHECK_ENDIAN__
+
+#include <linux/btree.h>
+#include <linux/crc32.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/mempool.h>
+#include <linux/pagemap.h>
+#include <linux/mtd/mtd.h>
+#include "logfs_abi.h"
+
+#define LOGFS_DEBUG_SUPER	(0x0001)
+#define LOGFS_DEBUG_SEGMENT	(0x0002)
+#define LOGFS_DEBUG_JOURNAL	(0x0004)
+#define LOGFS_DEBUG_DIR		(0x0008)
+#define LOGFS_DEBUG_FILE	(0x0010)
+#define LOGFS_DEBUG_INODE	(0x0020)
+#define LOGFS_DEBUG_READWRITE	(0x0040)
+#define LOGFS_DEBUG_GC		(0x0080)
+#define LOGFS_DEBUG_GC_NOISY	(0x0100)
+#define LOGFS_DEBUG_ALIASES	(0x0200)
+#define LOGFS_DEBUG_BLOCKMOVE	(0x0400)
+#define LOGFS_DEBUG_ALL		(0xffffffff)
+
+#define LOGFS_DEBUG		(0x01)
+/*
+ * To enable specific log messages, simply define LOGFS_DEBUG to match any
+ * or all of the above.
+ */
+#ifndef LOGFS_DEBUG
+#define LOGFS_DEBUG		(0)
+#endif
+
+#define log_cond(cond, fmt, arg...) do {	\
+	if (cond)				\
+		printk(KERN_DEBUG fmt, ##arg);	\
+} while (0)
+
+#define log_super(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_SUPER, fmt, ##arg)
+#define log_segment(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_SEGMENT, fmt, ##arg)
+#define log_journal(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_JOURNAL, fmt, ##arg)
+#define log_dir(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_DIR, fmt, ##arg)
+#define log_file(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_FILE, fmt, ##arg)
+#define log_inode(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_INODE, fmt, ##arg)
+#define log_readwrite(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_READWRITE, fmt, ##arg)
+#define log_gc(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_GC, fmt, ##arg)
+#define log_gc_noisy(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_GC_NOISY, fmt, ##arg)
+#define log_aliases(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_ALIASES, fmt, ##arg)
+#define log_blockmove(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_BLOCKMOVE, fmt, ##arg)
+
+#define PG_pre_locked		PG_owner_priv_1
+#define PagePreLocked(page)	test_bit(PG_pre_locked, &(page)->flags)
+#define SetPagePreLocked(page)	set_bit(PG_pre_locked, &(page)->flags)
+#define ClearPagePreLocked(page) clear_bit(PG_pre_locked, &(page)->flags)
+
+/* FIXME: This should really be somewhere in the 64bit area. */
+#define LOGFS_LINK_MAX		(1<<30)
+
+/* Read-only filesystem */
+#define LOGFS_SB_FLAG_RO	0x0001
+#define LOGFS_SB_FLAG_SEG_ALIAS	0x0002
+#define LOGFS_SB_FLAG_OBJ_ALIAS	0x0004
+#define LOGFS_SB_FLAG_SHUTDOWN	0x0008
+
+/* Write Control Flags */
+#define WF_LOCK			0x01 /* take write lock */
+#define WF_WRITE		0x02 /* write block */
+#define WF_DELETE		0x04 /* delete old block */
+
+typedef u8 __bitwise level_t;
+typedef u8 __bitwise gc_level_t;
+
+#define LEVEL(level) ((__force level_t)(level))
+#define GC_LEVEL(gc_level) ((__force gc_level_t)(gc_level))
+
+#define SUBLEVEL(level) ( (void)((level) == LEVEL(1)),	\
+		(__force level_t)((__force u8)(level) - 1) )
+
+/**
+ * struct logfs_area - area management information
+ *
+ * @a_sb:			the superblock this area belongs to
+ * @a_is_open:			1 if the area is currently open, else 0
+ * @a_segno:			segment number of area
+ * @a_written_bytes:		number of bytes already written back
+ * @a_used_bytes:		number of used bytes
+ * @a_ops:			area operations (either journal or ostore)
+ * @a_erase_count:		erase count
+ * @a_level:			GC level
+ */
+struct logfs_area { /* a segment open for writing */
+	struct super_block *a_sb;
+	int	a_is_open;
+	u32	a_segno;
+	u32	a_written_bytes;
+	u32	a_used_bytes;
+	const struct logfs_area_ops *a_ops;
+	u32	a_erase_count;
+	gc_level_t a_level;
+};
+
+/**
+ * struct logfs_area_ops - area operations
+ *
+ * @get_free_segment:		fill area->ofs with the offset of a free segment
+ * @get_erase_count:		fill area->erase_count (needs area->ofs)
+ * @erase_segment:		erase and setup segment
+ */
+struct logfs_area_ops {
+	void	(*get_free_segment)(struct logfs_area *area);
+	void	(*get_erase_count)(struct logfs_area *area);
+	int	(*erase_segment)(struct logfs_area *area);
+};
+
+/**
+ * struct logfs_device_ops - device access operations
+ *
+ * @readpage:			read one page (mm page)
+ * @writeseg:			write one segment.  may be a partial segment
+ * @erase:			erase one segment
+ * @read:			read from the device
+ * @erase:			erase part of the device
+ */
+struct logfs_device_ops {
+	struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
+	struct page *(*find_last_sb)(struct super_block *sb, u64 *ofs);
+	int (*write_sb)(struct super_block *sb, struct page *page);
+	int (*readpage)(void *_sb, struct page *page);
+	void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
+	int (*erase)(struct super_block *sb, loff_t ofs, size_t len);
+	void (*sync)(struct super_block *sb);
+	void (*put_device)(struct super_block *sb);
+};
+
+/**
+ * struct candidate_list - list of similar candidates
+ */
+struct candidate_list {
+	struct rb_root rb_tree;
+	int count;
+	int maxcount;
+	int sort_by_ec;
+};
+
+/**
+ * struct gc_candidate - "candidate" segment to be garbage collected next
+ *
+ * @list:			list (either free of low)
+ * @segno:			segment number
+ * @valid:			number of valid bytes
+ * @erase_count:		erase count of segment
+ * @dist:			distance from tree root
+ *
+ * Candidates can be on two lists.  The free list contains electees rather
+ * than candidates - segments that no longer contain any valid data.  The
+ * low list contains candidates to be picked for GC.  It should be kept
+ * short.  It is not required to always pick a perfect candidate.  In the
+ * worst case GC will have to move more data than absolutely necessary.
+ */
+struct gc_candidate {
+	struct rb_node rb_node;
+	struct candidate_list *list;
+	u32	segno;
+	u32	valid;
+	u32	erase_count;
+	u8	dist;
+};
+
+/**
+ * struct logfs_journal_entry - temporary structure used during journal scan
+ *
+ * @used:
+ * @version:			normalized version
+ * @len:			length
+ * @offset:			offset
+ */
+struct logfs_journal_entry {
+	int used;
+	s16 version;
+	u16 len;
+	u16 datalen;
+	u64 offset;
+};
+
+enum transaction_state {
+	CREATE_1 = 1,
+	CREATE_2,
+	UNLINK_1,
+	UNLINK_2,
+	CROSS_RENAME_1,
+	CROSS_RENAME_2,
+	TARGET_RENAME_1,
+	TARGET_RENAME_2,
+	TARGET_RENAME_3
+};
+
+/**
+ * struct logfs_transaction - essential fields to support atomic dirops
+ *
+ * @ino:			target inode
+ * @dir:			inode of directory containing dentry
+ * @pos:			pos of dentry in directory
+ */
+struct logfs_transaction {
+	enum transaction_state state;
+	u64	 ino;
+	u64	 dir;
+	u64	 pos;
+};
+
+/**
+ * struct logfs_shadow - old block in the shadow of a not-yet-committed new one
+ * @old_ofs:			offset of old block on medium
+ * @new_ofs:			offset of new block on medium
+ * @ino:			inode number
+ * @bix:			block index
+ * @old_len:			size of old block, including header
+ * @new_len:			size of new block, including header
+ * @level:			block level
+ */
+struct logfs_shadow {
+	u64 old_ofs;
+	u64 new_ofs;
+	u64 ino;
+	u64 bix;
+	int old_len;
+	int new_len;
+	gc_level_t gc_level;
+};
+
+/**
+ * struct shadow_tree
+ * @new:			shadows where old_ofs==0, indexed by new_ofs
+ * @old:			shadows where old_ofs!=0, indexed by old_ofs
+ */
+struct shadow_tree {
+	struct btree_head64 new;
+	struct btree_head64 old;
+};
+
+struct object_alias_item {
+	struct list_head list;
+	__be64 val;
+	int child_no;
+};
+
+/**
+ * struct logfs_block - contains any block state
+ * @type:			indirect block or inode
+ * @full:			number of fully populated children
+ * @partial:			number of partially populated children
+ *
+ * Most blocks are directly represented by page cache pages.  But when a block
+ * becomes dirty, is part of a transaction, contains aliases or is otherwise
+ * special, a struct logfs_block is allocated to track the additional state.
+ * Inodes are very similar to indirect blocks, so they can also get one of
+ * these structures added when appropriate.
+ */
+#define BLOCK_INDIRECT	1	/* Indirect block */
+#define BLOCK_INODE	2	/* Inode */
+struct logfs_block_ops;
+struct logfs_block {
+	struct list_head alias_list;
+	struct list_head item_list;
+	struct super_block *sb;
+	u64 ino;
+	u64 bix;
+	level_t level;
+	struct page *page;
+	struct inode *inode;
+	struct logfs_transaction *ta;
+	unsigned long alias_map[LOGFS_BLOCK_FACTOR / BITS_PER_LONG];
+	struct logfs_block_ops *ops;
+	int full;
+	int partial;
+	int reserved_bytes;
+};
+
+typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
+		level_t level, int child_no, __be64 val);
+struct logfs_block_ops {
+	void	(*write_block)(struct logfs_block *block);
+	gc_level_t	(*block_level)(struct logfs_block *block);
+	void	(*free_block)(struct super_block *sb, struct logfs_block*block);
+	int	(*write_alias)(struct super_block *sb,
+			struct logfs_block *block,
+			write_alias_t *write_one_alias);
+};
+
+struct logfs_super {
+	struct mtd_info *s_mtd;			/* underlying device */
+	struct block_device *s_bdev;		/* underlying device */
+	const struct logfs_device_ops *s_devops;/* device access */
+	struct inode	*s_master_inode;	/* inode file */
+	struct inode	*s_segfile_inode;	/* segment file */
+	struct inode *s_mapping_inode;		/* device mapping */
+	atomic_t s_pending_writes;		/* outstanting bios */
+	long	 s_flags;
+	mempool_t *s_btree_pool;		/* for btree nodes */
+	mempool_t *s_alias_pool;		/* aliases in segment.c */
+	u64	 s_feature_incompat;
+	u64	 s_feature_ro_compat;
+	u64	 s_feature_compat;
+	u64	 s_feature_flags;
+	u64	 s_sb_ofs[2];
+	/* alias.c fields */
+	struct btree_head32 s_segment_alias;	/* remapped segments */
+	int	 s_no_object_aliases;
+	struct list_head s_object_alias;	/* remapped objects */
+	struct btree_head128 s_object_alias_tree; /* remapped objects */
+	struct mutex s_object_alias_mutex;
+	/* dir.c fields */
+	struct mutex s_dirop_mutex;		/* for creat/unlink/rename */
+	u64	 s_victim_ino;			/* used for atomic dir-ops */
+	u64	 s_rename_dir;			/* source directory ino */
+	u64	 s_rename_pos;			/* position of source dd */
+	/* gc.c fields */
+	long	 s_segsize;			/* size of a segment */
+	int	 s_segshift;			/* log2 of segment size */
+	long	 s_segmask;			/* 1 << s_segshift - 1 */
+	long	 s_no_segs;			/* segments on device */
+	long	 s_no_journal_segs;		/* segments used for journal */
+	long	 s_no_blocks;			/* blocks per segment */
+	long	 s_writesize;			/* minimum write size */
+	int	 s_writeshift;			/* log2 of write size */
+	u64	 s_size;			/* filesystem size */
+	struct logfs_area *s_area[LOGFS_NO_AREAS];	/* open segment array */
+	u64	 s_gec;				/* global erase count */
+	u64	 s_wl_gec_ostore;		/* time of last wl event */
+	u64	 s_wl_gec_journal;		/* time of last wl event */
+	u64	 s_sweeper;			/* current sweeper pos */
+	u8	 s_ifile_levels;		/* max level of ifile */
+	u8	 s_iblock_levels;		/* max level of regular files */
+	u8	 s_data_levels;			/* # of segments to leaf block*/
+	u8	 s_total_levels;		/* sum of above three */
+	struct btree_head32 s_cand_tree;	/* all candidates */
+	struct candidate_list s_free_list;	/* 100% free segments */
+	struct candidate_list s_reserve_list;	/* Bad segment reserve */
+	struct candidate_list s_low_list[LOGFS_NO_AREAS];/* good candidates */
+	struct candidate_list s_ec_list;	/* wear level candidates */
+	struct btree_head32 s_reserved_segments;/* sb, journal, bad, etc. */
+	/* inode.c fields */
+	u64	 s_last_ino;			/* highest ino used */
+	long	 s_inos_till_wrap;
+	u32	 s_generation;			/* i_generation for new files */
+	struct list_head s_freeing_list;	/* inodes being freed */
+	/* journal.c fields */
+	struct mutex s_journal_mutex;
+	void	*s_je;				/* journal entry to compress */
+	void	*s_compressed_je;		/* block to write to journal */
+	u32	 s_journal_seg[LOGFS_JOURNAL_SEGS]; /* journal segments */
+	u32	 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
+	u64	 s_last_version;
+	struct logfs_area *s_journal_area;	/* open journal segment */
+	__be64	s_je_array[64];
+	int	s_no_je;
+
+	int	 s_sum_index;			/* for the 12 summaries */
+	struct shadow_tree s_shadow_tree;
+	int	 s_je_fill;			/* index of current je */
+	/* readwrite.c fields */
+	struct mutex s_write_mutex;
+	int	 s_lock_count;
+	mempool_t *s_block_pool;		/* struct logfs_block pool */
+	mempool_t *s_shadow_pool;		/* struct logfs_shadow pool */
+	/*
+	 * Space accounting:
+	 * - s_used_bytes specifies space used to store valid data objects.
+	 * - s_dirty_used_bytes is space used to store non-committed data
+	 *   objects.  Those objects have already been written themselves,
+	 *   but they don't become valid until all indirect blocks up to the
+	 *   journal have been written as well.
+	 * - s_dirty_free_bytes is space used to store the old copy of a
+	 *   replaced object, as long as the replacement is non-committed.
+	 *   In other words, it is the amount of space freed when all dirty
+	 *   blocks are written back.
+	 * - s_free_bytes is the amount of free space available for any
+	 *   purpose.
+	 * - s_root_reserve is the amount of free space available only to
+	 *   the root user.  Non-privileged users can no longer write once
+	 *   this watermark has been reached.
+	 * - s_speed_reserve is space which remains unused to speed up
+	 *   garbage collection performance.
+	 * - s_dirty_pages is the space reserved for currently dirty pages.
+	 *   It is a pessimistic estimate, so some/most will get freed on
+	 *   page writeback.
+	 *
+	 * s_used_bytes + s_free_bytes + s_speed_reserve = total usable size
+	 */
+	u64	 s_free_bytes;
+	u64	 s_used_bytes;
+	u64	 s_dirty_free_bytes;
+	u64	 s_dirty_used_bytes;
+	u64	 s_root_reserve;
+	u64	 s_speed_reserve;
+	u64	 s_dirty_pages;
+	/* Bad block handling:
+	 * - s_bad_seg_reserve is a number of segments usually kept
+	 *   free.  When encountering bad blocks, the affected segment's data
+	 *   is _temporarily_ moved to a reserved segment.
+	 * - s_bad_segments is the number of known bad segments.
+	 */
+	u32	 s_bad_seg_reserve;
+	u32	 s_bad_segments;
+};
+
+/**
+ * struct logfs_inode - in-memory inode
+ *
+ * @vfs_inode:			struct inode
+ * @li_data:			data pointers
+ * @li_used_bytes:		number of used bytes
+ * @li_freeing_list:		used to track inodes currently being freed
+ * @li_flags:			inode flags
+ * @li_refcount:		number of internal (GC-induced) references
+ */
+struct logfs_inode {
+	struct inode vfs_inode;
+	u64	li_data[LOGFS_EMBEDDED_FIELDS];
+	u64	li_used_bytes;
+	struct list_head li_freeing_list;
+	struct logfs_block *li_block;
+	u32	li_flags;
+	u8	li_height;
+	int	li_refcount;
+};
+
+#define journal_for_each(__i) for (__i = 0; __i < LOGFS_JOURNAL_SEGS; __i++)
+#define for_each_area(__i) for (__i = 0; __i < LOGFS_NO_AREAS; __i++)
+#define for_each_area_down(__i) for (__i = LOGFS_NO_AREAS - 1; __i >= 0; __i--)
+
+/* compr.c */
+int logfs_compress(void *in, void *out, size_t inlen, size_t outlen);
+int logfs_uncompress(void *in, void *out, size_t inlen, size_t outlen);
+int __init logfs_compr_init(void);
+void logfs_compr_exit(void);
+
+/* dev_bdev.c */
+#ifdef CONFIG_BLOCK
+int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+		const char *devname, struct vfsmount *mnt);
+#else
+static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+		const char *devname, struct vfsmount *mnt)
+{
+	return -ENODEV;
+}
+#endif
+
+/* dev_mtd.c */
+#ifdef CONFIG_MTD
+int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+		int mtdnr, struct vfsmount *mnt);
+#else
+static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+		int mtdnr, struct vfsmount *mnt)
+{
+	return -ENODEV;
+}
+#endif
+
+/* dir.c */
+extern const struct inode_operations logfs_symlink_iops;
+extern const struct inode_operations logfs_dir_iops;
+extern const struct file_operations logfs_dir_fops;
+int logfs_replay_journal(struct super_block *sb);
+
+/* file.c */
+extern const struct inode_operations logfs_reg_iops;
+extern const struct file_operations logfs_reg_fops;
+extern const struct address_space_operations logfs_reg_aops;
+int logfs_readpage(struct file *file, struct page *page);
+int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+		unsigned long arg);
+int logfs_fsync(struct file *file, struct dentry *dentry, int datasync);
+
+/* gc.c */
+u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec);
+void logfs_gc_pass(struct super_block *sb);
+int logfs_check_areas(struct super_block *sb);
+int logfs_init_gc(struct super_block *sb);
+void logfs_cleanup_gc(struct super_block *sb);
+
+/* inode.c */
+extern const struct super_operations logfs_super_operations;
+struct inode *logfs_iget(struct super_block *sb, ino_t ino);
+struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *cookie);
+void logfs_safe_iput(struct inode *inode, int cookie);
+struct inode *logfs_new_inode(struct inode *dir, int mode);
+struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
+struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
+int logfs_init_inode_cache(void);
+void logfs_destroy_inode_cache(void);
+void destroy_meta_inode(struct inode *inode);
+void logfs_set_blocks(struct inode *inode, u64 no);
+/* these logically belong into inode.c but actually reside in readwrite.c */
+int logfs_read_inode(struct inode *inode);
+int __logfs_write_inode(struct inode *inode, long flags);
+void logfs_delete_inode(struct inode *inode);
+void logfs_clear_inode(struct inode *inode);
+
+/* journal.c */
+void logfs_write_anchor(struct inode *inode);
+int logfs_init_journal(struct super_block *sb);
+void logfs_cleanup_journal(struct super_block *sb);
+int write_alias_journal(struct super_block *sb, u64 ino, u64 bix,
+		level_t level, int child_no, __be64 val);
+void do_logfs_journal_wl_pass(struct super_block *sb);
+
+/* readwrite.c */
+pgoff_t logfs_pack_index(u64 bix, level_t level);
+void logfs_unpack_index(pgoff_t index, u64 *bix, level_t *level);
+int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
+		loff_t bix, long flags, struct shadow_tree *shadow_tree);
+int logfs_readpage_nolock(struct page *page);
+int logfs_write_buf(struct inode *inode, struct page *page, long flags);
+int logfs_delete(struct inode *inode, pgoff_t index,
+		struct shadow_tree *shadow_tree);
+int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
+		gc_level_t gc_level, long flags);
+int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 bix,
+		gc_level_t gc_level);
+int logfs_truncate(struct inode *inode, u64 size);
+u64 logfs_seek_hole(struct inode *inode, u64 bix);
+u64 logfs_seek_data(struct inode *inode, u64 bix);
+int logfs_open_segfile(struct super_block *sb);
+int logfs_init_rw(struct super_block *sb);
+void logfs_cleanup_rw(struct super_block *sb);
+void logfs_add_transaction(struct inode *inode, struct logfs_transaction *ta);
+void logfs_del_transaction(struct inode *inode, struct logfs_transaction *ta);
+void logfs_write_block(struct logfs_block *block, long flags);
+int logfs_write_obj_aliases_pagecache(struct super_block *sb);
+void logfs_get_segment_entry(struct super_block *sb, u32 segno,
+		struct logfs_segment_entry *se);
+void logfs_set_segment_used(struct super_block *sb, u64 ofs, int increment);
+void logfs_set_segment_erased(struct super_block *sb, u32 segno, u32 ec,
+		gc_level_t gc_level);
+void logfs_set_segment_reserved(struct super_block *sb, u32 segno);
+void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec);
+struct logfs_block *__alloc_block(struct super_block *sb,
+		u64 ino, u64 bix, level_t level);
+void __free_block(struct super_block *sb, struct logfs_block *block);
+void btree_write_block(struct logfs_block *block);
+void initialize_block_counters(struct page *page, struct logfs_block *block,
+		__be64 *array, int page_is_empty);
+int logfs_exist_block(struct inode *inode, u64 bix);
+int get_page_reserve(struct inode *inode, struct page *page);
+extern struct logfs_block_ops indirect_block_ops;
+
+/* segment.c */
+int logfs_erase_segment(struct super_block *sb, u32 ofs);
+int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf);
+int logfs_segment_read(struct inode *inode, struct page *page, u64 ofs, u64 bix,
+		level_t level);
+int logfs_segment_write(struct inode *inode, struct page *page,
+		struct logfs_shadow *shadow);
+int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow);
+int logfs_load_object_aliases(struct super_block *sb,
+		struct logfs_obj_alias *oa, int count);
+void move_page_to_btree(struct page *page);
+int logfs_init_mapping(struct super_block *sb);
+void logfs_sync_area(struct logfs_area *area);
+void logfs_sync_segments(struct super_block *sb);
+
+/* area handling */
+int logfs_init_areas(struct super_block *sb);
+void logfs_cleanup_areas(struct super_block *sb);
+int logfs_open_area(struct logfs_area *area, size_t bytes);
+void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
+		int use_filler);
+
+static inline void logfs_buf_write(struct logfs_area *area, u64 ofs,
+		void *buf, size_t len)
+{
+	__logfs_buf_write(area, ofs, buf, len, 0);
+}
+
+static inline void logfs_buf_recover(struct logfs_area *area, u64 ofs,
+		void *buf, size_t len)
+{
+	__logfs_buf_write(area, ofs, buf, len, 1);
+}
+
+/* super.c */
+struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index);
+void emergency_read_end(struct page *page);
+void logfs_crash_dump(struct super_block *sb);
+void *memchr_inv(const void *s, int c, size_t n);
+int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
+int logfs_get_sb_device(struct file_system_type *type, int flags,
+		struct mtd_info *mtd, struct block_device *bdev,
+		const struct logfs_device_ops *devops, struct vfsmount *mnt);
+int logfs_check_ds(struct logfs_disk_super *ds);
+int logfs_write_sb(struct super_block *sb);
+
+static inline struct logfs_super *logfs_super(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct logfs_inode *logfs_inode(struct inode *inode)
+{
+	return container_of(inode, struct logfs_inode, vfs_inode);
+}
+
+static inline void logfs_set_ro(struct super_block *sb)
+{
+	logfs_super(sb)->s_flags |= LOGFS_SB_FLAG_RO;
+}
+
+#define LOGFS_BUG(sb) do {					\
+	struct super_block *__sb = sb;				\
+	logfs_crash_dump(__sb);					\
+	logfs_super(__sb)->s_flags |= LOGFS_SB_FLAG_RO;		\
+	BUG();							\
+} while (0)
+
+#define LOGFS_BUG_ON(condition, sb) \
+	do { if (unlikely(condition)) LOGFS_BUG((sb)); } while (0)
+
+static inline __be32 logfs_crc32(void *data, size_t len, size_t skip)
+{
+	return cpu_to_be32(crc32(~0, data+skip, len-skip));
+}
+
+static inline u8 logfs_type(struct inode *inode)
+{
+	return (inode->i_mode >> 12) & 15;
+}
+
+static inline pgoff_t logfs_index(struct super_block *sb, u64 pos)
+{
+	return pos >> sb->s_blocksize_bits;
+}
+
+static inline u64 dev_ofs(struct super_block *sb, u32 segno, u32 ofs)
+{
+	return ((u64)segno << logfs_super(sb)->s_segshift) + ofs;
+}
+
+static inline u32 seg_no(struct super_block *sb, u64 ofs)
+{
+	return ofs >> logfs_super(sb)->s_segshift;
+}
+
+static inline u32 seg_ofs(struct super_block *sb, u64 ofs)
+{
+	return ofs & logfs_super(sb)->s_segmask;
+}
+
+static inline u64 seg_align(struct super_block *sb, u64 ofs)
+{
+	return ofs & ~logfs_super(sb)->s_segmask;
+}
+
+static inline struct logfs_block *logfs_block(struct page *page)
+{
+	return (void *)page->private;
+}
+
+static inline level_t shrink_level(gc_level_t __level)
+{
+	u8 level = (__force u8)__level;
+
+	if (level >= LOGFS_MAX_LEVELS)
+		level -= LOGFS_MAX_LEVELS;
+	return (__force level_t)level;
+}
+
+static inline gc_level_t expand_level(u64 ino, level_t __level)
+{
+	u8 level = (__force u8)__level;
+
+	if (ino == LOGFS_INO_MASTER) {
+		/* ifile has seperate areas */
+		level += LOGFS_MAX_LEVELS;
+	}
+	return (__force gc_level_t)level;
+}
+
+static inline int logfs_block_shift(struct super_block *sb, level_t level)
+{
+	level = shrink_level((__force gc_level_t)level);
+	return (__force int)level * (sb->s_blocksize_bits - 3);
+}
+
+static inline u64 logfs_block_mask(struct super_block *sb, level_t level)
+{
+	return ~0ull << logfs_block_shift(sb, level);
+}
+
+static inline struct logfs_area *get_area(struct super_block *sb,
+		gc_level_t gc_level)
+{
+	return logfs_super(sb)->s_area[(__force u8)gc_level];
+}
+
+#endif
diff --git a/fs/logfs/logfs_abi.h b/fs/logfs/logfs_abi.h
new file mode 100644
index 000000000000..5d3782ddecc8
--- /dev/null
+++ b/fs/logfs/logfs_abi.h
@@ -0,0 +1,627 @@
+/*
+ * fs/logfs/logfs_abi.h
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ * Public header for logfs.
+ */
+#ifndef FS_LOGFS_LOGFS_ABI_H
+#define FS_LOGFS_LOGFS_ABI_H
+
+/* For out-of-kernel compiles */
+#ifndef BUILD_BUG_ON
+#define BUILD_BUG_ON(condition) /**/
+#endif
+
+#define SIZE_CHECK(type, size)					\
+static inline void check_##type(void)				\
+{								\
+	BUILD_BUG_ON(sizeof(struct type) != (size));		\
+}
+
+/*
+ * Throughout the logfs code, we're constantly dealing with blocks at
+ * various positions or offsets.  To remove confusion, we stricly
+ * distinguish between a "position" - the logical position within a
+ * file and an "offset" - the physical location within the device.
+ *
+ * Any usage of the term offset for a logical location or position for
+ * a physical one is a bug and should get fixed.
+ */
+
+/*
+ * Block are allocated in one of several segments depending on their
+ * level.  The following levels are used:
+ *  0	- regular data block
+ *  1	- i1 indirect blocks
+ *  2	- i2 indirect blocks
+ *  3	- i3 indirect blocks
+ *  4	- i4 indirect blocks
+ *  5	- i5 indirect blocks
+ *  6	- ifile data blocks
+ *  7	- ifile i1 indirect blocks
+ *  8	- ifile i2 indirect blocks
+ *  9	- ifile i3 indirect blocks
+ * 10	- ifile i4 indirect blocks
+ * 11	- ifile i5 indirect blocks
+ * Potential levels to be used in the future:
+ * 12	- gc recycled blocks, long-lived data
+ * 13	- replacement blocks, short-lived data
+ *
+ * Levels 1-11 are necessary for robust gc operations and help seperate
+ * short-lived metadata from longer-lived file data.  In the future,
+ * file data should get seperated into several segments based on simple
+ * heuristics.  Old data recycled during gc operation is expected to be
+ * long-lived.  New data is of uncertain life expectancy.  New data
+ * used to replace older blocks in existing files is expected to be
+ * short-lived.
+ */
+
+
+/* Magic numbers.  64bit for superblock, 32bit for statfs f_type */
+#define LOGFS_MAGIC		0xb21f205ac97e8168ull
+#define LOGFS_MAGIC_U32		0xc97e8168u
+
+/*
+ * Various blocksize related macros.  Blocksize is currently fixed at 4KiB.
+ * Sooner or later that should become configurable and the macros replaced
+ * by something superblock-dependent.  Pointers in indirect blocks are and
+ * will remain 64bit.
+ *
+ * LOGFS_BLOCKSIZE	- self-explaining
+ * LOGFS_BLOCK_FACTOR	- number of pointers per indirect block
+ * LOGFS_BLOCK_BITS	- log2 of LOGFS_BLOCK_FACTOR, used for shifts
+ */
+#define LOGFS_BLOCKSIZE		(4096ull)
+#define LOGFS_BLOCK_FACTOR	(LOGFS_BLOCKSIZE / sizeof(u64))
+#define LOGFS_BLOCK_BITS	(9)
+
+/*
+ * Number of blocks at various levels of indirection.  There are 16 direct
+ * block pointers plus a single indirect pointer.
+ */
+#define I0_BLOCKS		(16)
+#define I1_BLOCKS		LOGFS_BLOCK_FACTOR
+#define I2_BLOCKS		(LOGFS_BLOCK_FACTOR * I1_BLOCKS)
+#define I3_BLOCKS		(LOGFS_BLOCK_FACTOR * I2_BLOCKS)
+#define I4_BLOCKS		(LOGFS_BLOCK_FACTOR * I3_BLOCKS)
+#define I5_BLOCKS		(LOGFS_BLOCK_FACTOR * I4_BLOCKS)
+
+#define INDIRECT_INDEX		I0_BLOCKS
+#define LOGFS_EMBEDDED_FIELDS	(I0_BLOCKS + 1)
+
+/*
+ * Sizes at which files require another level of indirection.  Files smaller
+ * than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
+ * similar like ext2 fast symlinks.
+ *
+ * Data at a position smaller than LOGFS_I0_SIZE is accessed through the
+ * direct pointers, else through the 1x indirect pointer and so forth.
+ */
+#define LOGFS_EMBEDDED_SIZE	(LOGFS_EMBEDDED_FIELDS * sizeof(u64))
+#define LOGFS_I0_SIZE		(I0_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I1_SIZE		(I1_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I2_SIZE		(I2_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I3_SIZE		(I3_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I4_SIZE		(I4_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I5_SIZE		(I5_BLOCKS * LOGFS_BLOCKSIZE)
+
+/*
+ * Each indirect block pointer must have this flag set, if all block pointers
+ * behind it are set, i.e. there is no hole hidden in the shadow of this
+ * indirect block pointer.
+ */
+#define LOGFS_FULLY_POPULATED (1ULL << 63)
+#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)
+
+/*
+ * LogFS needs to seperate data into levels.  Each level is defined as the
+ * maximal possible distance from the master inode (inode of the inode file).
+ * Data blocks reside on level 0, 1x indirect block on level 1, etc.
+ * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
+ * This effort is necessary to guarantee garbage collection to always make
+ * progress.
+ *
+ * LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
+ * LOGFS_MAX_LEVELS is one more for the actual data level of a file.  It is
+ * the maximal number of levels for one file.
+ * LOGFS_NO_AREAS is twice that, as the inode file and regular files are
+ * effectively stacked on top of each other.
+ */
+#define LOGFS_MAX_INDIRECT	(5)
+#define LOGFS_MAX_LEVELS	(LOGFS_MAX_INDIRECT + 1)
+#define LOGFS_NO_AREAS		(2 * LOGFS_MAX_LEVELS)
+
+/* Maximum size of filenames */
+#define LOGFS_MAX_NAMELEN	(255)
+
+/* Number of segments in the primary journal. */
+#define LOGFS_JOURNAL_SEGS	(16)
+
+/* Maximum number of free/erased/etc. segments in journal entries */
+#define MAX_CACHED_SEGS		(64)
+
+
+/*
+ * LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
+ * LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
+ * its header,
+ * LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
+ * its segment header and the padded space at the end when no further objects
+ * fit.
+ */
+#define LOGFS_OBJECT_HEADERSIZE	(0x1c)
+#define LOGFS_SEGMENT_HEADERSIZE (0x18)
+#define LOGFS_MAX_OBJECTSIZE	(LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)
+#define LOGFS_SEGMENT_RESERVE	\
+	(LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)
+
+/*
+ * Segment types:
+ * SEG_SUPER	- Data or indirect block
+ * SEG_JOURNAL	- Inode
+ * SEG_OSTORE	- Dentry
+ */
+enum {
+	SEG_SUPER	= 0x01,
+	SEG_JOURNAL	= 0x02,
+	SEG_OSTORE	= 0x03,
+};
+
+/**
+ * struct logfs_segment_header - per-segment header in the ostore
+ *
+ * @crc:			crc32 of header (there is no data)
+ * @pad:			unused, must be 0
+ * @type:			segment type, see above
+ * @level:			GC level for all objects in this segment
+ * @segno:			segment number
+ * @ec:				erase count for this segment
+ * @gec:			global erase count at time of writing
+ */
+struct logfs_segment_header {
+	__be32	crc;
+	__be16	pad;
+	__u8	type;
+	__u8	level;
+	__be32	segno;
+	__be32	ec;
+	__be64	gec;
+};
+
+SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);
+
+/**
+ * struct logfs_disk_super - on-medium superblock
+ *
+ * @ds_magic:			magic number, must equal LOGFS_MAGIC
+ * @ds_crc:			crc32 of structure starting with the next field
+ * @ds_ifile_levels:		maximum number of levels for ifile
+ * @ds_iblock_levels:		maximum number of levels for regular files
+ * @ds_data_levels:		number of seperate levels for data
+ * @pad0:			reserved, must be 0
+ * @ds_feature_incompat:	incompatible filesystem features
+ * @ds_feature_ro_compat:	read-only compatible filesystem features
+ * @ds_feature_compat:		compatible filesystem features
+ * @ds_flags:			flags
+ * @ds_segment_shift:		log2 of segment size
+ * @ds_block_shift:		log2 of block size
+ * @ds_write_shift:		log2 of write size
+ * @pad1:			reserved, must be 0
+ * @ds_journal_seg:		segments used by primary journal
+ * @ds_root_reserve:		bytes reserved for the superuser
+ * @ds_speed_reserve:		bytes reserved to speed up GC
+ * @ds_bad_seg_reserve:		number of segments reserved to handle bad blocks
+ * @pad2:			reserved, must be 0
+ * @pad3:			reserved, must be 0
+ *
+ * Contains only read-only fields.  Read-write fields like the amount of used
+ * space is tracked in the dynamic superblock, which is stored in the journal.
+ */
+struct logfs_disk_super {
+	struct logfs_segment_header ds_sh;
+	__be64	ds_magic;
+
+	__be32	ds_crc;
+	__u8	ds_ifile_levels;
+	__u8	ds_iblock_levels;
+	__u8	ds_data_levels;
+	__u8	ds_segment_shift;
+	__u8	ds_block_shift;
+	__u8	ds_write_shift;
+	__u8	pad0[6];
+
+	__be64	ds_filesystem_size;
+	__be32	ds_segment_size;
+	__be32  ds_bad_seg_reserve;
+
+	__be64	ds_feature_incompat;
+	__be64	ds_feature_ro_compat;
+
+	__be64	ds_feature_compat;
+	__be64	ds_feature_flags;
+
+	__be64	ds_root_reserve;
+	__be64  ds_speed_reserve;
+
+	__be32	ds_journal_seg[LOGFS_JOURNAL_SEGS];
+
+	__be64	ds_super_ofs[2];
+	__be64	pad3[8];
+};
+
+SIZE_CHECK(logfs_disk_super, 256);
+
+/*
+ * Object types:
+ * OBJ_BLOCK	- Data or indirect block
+ * OBJ_INODE	- Inode
+ * OBJ_DENTRY	- Dentry
+ */
+enum {
+	OBJ_BLOCK	= 0x04,
+	OBJ_INODE	= 0x05,
+	OBJ_DENTRY	= 0x06,
+};
+
+/**
+ * struct logfs_object_header - per-object header in the ostore
+ *
+ * @crc:			crc32 of header, excluding data_crc
+ * @len:			length of data
+ * @type:			object type, see above
+ * @compr:			compression type
+ * @ino:			inode number
+ * @bix:			block index
+ * @data_crc:			crc32 of payload
+ */
+struct logfs_object_header {
+	__be32	crc;
+	__be16	len;
+	__u8	type;
+	__u8	compr;
+	__be64	ino;
+	__be64	bix;
+	__be32	data_crc;
+} __attribute__((packed));
+
+SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);
+
+/*
+ * Reserved inode numbers:
+ * LOGFS_INO_MASTER	- master inode (for inode file)
+ * LOGFS_INO_ROOT	- root directory
+ * LOGFS_INO_SEGFILE	- per-segment used bytes and erase count
+ */
+enum {
+	LOGFS_INO_MAPPING	= 0x00,
+	LOGFS_INO_MASTER	= 0x01,
+	LOGFS_INO_ROOT		= 0x02,
+	LOGFS_INO_SEGFILE	= 0x03,
+	LOGFS_RESERVED_INOS	= 0x10,
+};
+
+/*
+ * Inode flags.  High bits should never be written to the medium.  They are
+ * reserved for in-memory usage.
+ * Low bits should either remain in sync with the corresponding FS_*_FL or
+ * reuse slots that obviously don't make sense for logfs.
+ *
+ * LOGFS_IF_DIRTY	Inode must be written back
+ * LOGFS_IF_ZOMBIE	Inode has been deleted
+ * LOGFS_IF_STILLBORN	-ENOSPC happened when creating inode
+ */
+#define LOGFS_IF_COMPRESSED	0x00000004 /* == FS_COMPR_FL */
+#define LOGFS_IF_DIRTY		0x20000000
+#define LOGFS_IF_ZOMBIE		0x40000000
+#define LOGFS_IF_STILLBORN	0x80000000
+
+/* Flags available to chattr */
+#define LOGFS_FL_USER_VISIBLE	(LOGFS_IF_COMPRESSED)
+#define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
+/* Flags inherited from parent directory on file/directory creation */
+#define LOGFS_FL_INHERITED	(LOGFS_IF_COMPRESSED)
+
+/**
+ * struct logfs_disk_inode - on-medium inode
+ *
+ * @di_mode:			file mode
+ * @di_pad:			reserved, must be 0
+ * @di_flags:			inode flags, see above
+ * @di_uid:			user id
+ * @di_gid:			group id
+ * @di_ctime:			change time
+ * @di_mtime:			modify time
+ * @di_refcount:		reference count (aka nlink or link count)
+ * @di_generation:		inode generation, for nfs
+ * @di_used_bytes:		number of bytes used
+ * @di_size:			file size
+ * @di_data:			data pointers
+ */
+struct logfs_disk_inode {
+	__be16	di_mode;
+	__u8	di_height;
+	__u8	di_pad;
+	__be32	di_flags;
+	__be32	di_uid;
+	__be32	di_gid;
+
+	__be64	di_ctime;
+	__be64	di_mtime;
+
+	__be64	di_atime;
+	__be32	di_refcount;
+	__be32	di_generation;
+
+	__be64	di_used_bytes;
+	__be64	di_size;
+
+	__be64	di_data[LOGFS_EMBEDDED_FIELDS];
+};
+
+SIZE_CHECK(logfs_disk_inode, 200);
+
+#define INODE_POINTER_OFS \
+	(offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))
+#define INODE_USED_OFS \
+	(offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))
+#define INODE_SIZE_OFS \
+	(offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))
+#define INODE_HEIGHT_OFS	(0)
+
+/**
+ * struct logfs_disk_dentry - on-medium dentry structure
+ *
+ * @ino:			inode number
+ * @namelen:			length of file name
+ * @type:			file type, identical to bits 12..15 of mode
+ * @name:			file name
+ */
+/* FIXME: add 6 bytes of padding to remove the __packed */
+struct logfs_disk_dentry {
+	__be64	ino;
+	__be16	namelen;
+	__u8	type;
+	__u8	name[LOGFS_MAX_NAMELEN];
+} __attribute__((packed));
+
+SIZE_CHECK(logfs_disk_dentry, 266);
+
+#define RESERVED		0xffffffff
+#define BADSEG			0xffffffff
+/**
+ * struct logfs_segment_entry - segment file entry
+ *
+ * @ec_level:			erase count and level
+ * @valid:			number of valid bytes
+ *
+ * Segment file contains one entry for every segment.  ec_level contains the
+ * erasecount in the upper 28 bits and the level in the lower 4 bits.  An
+ * ec_level of BADSEG (-1) identifies bad segments.  valid contains the number
+ * of valid bytes or RESERVED (-1 again) if the segment is used for either the
+ * superblock or the journal, or when the segment is bad.
+ */
+struct logfs_segment_entry {
+	__be32	ec_level;
+	__be32	valid;
+};
+
+SIZE_CHECK(logfs_segment_entry, 8);
+
+/**
+ * struct logfs_journal_header - header for journal entries (JEs)
+ *
+ * @h_crc:			crc32 of journal entry
+ * @h_len:			length of compressed journal entry,
+ *				not including header
+ * @h_datalen:			length of uncompressed data
+ * @h_type:			JE type
+ * @h_version:			unnormalized version of journal entry
+ * @h_compr:			compression type
+ * @h_pad:			reserved
+ */
+struct logfs_journal_header {
+	__be32	h_crc;
+	__be16	h_len;
+	__be16	h_datalen;
+	__be16	h_type;
+	__be16	h_version;
+	__u8	h_compr;
+	__u8	h_pad[3];
+};
+
+SIZE_CHECK(logfs_journal_header, 16);
+
+/*
+ * Life expectency of data.
+ * VIM_DEFAULT		- default vim
+ * VIM_SEGFILE		- for segment file only - very short-living
+ * VIM_GC		- GC'd data - likely long-living
+ */
+enum logfs_vim {
+	VIM_DEFAULT	= 0,
+	VIM_SEGFILE	= 1,
+};
+
+/**
+ * struct logfs_je_area - wbuf header
+ *
+ * @segno:			segment number of area
+ * @used_bytes:			number of bytes already used
+ * @gc_level:			GC level
+ * @vim:			life expectancy of data
+ *
+ * "Areas" are segments currently being used for writing.  There is at least
+ * one area per GC level.  Several may be used to seperate long-living from
+ * short-living data.  If an area with unknown vim is encountered, it can
+ * simply be closed.
+ * The write buffer immediately follow this header.
+ */
+struct logfs_je_area {
+	__be32	segno;
+	__be32	used_bytes;
+	__u8	gc_level;
+	__u8	vim;
+} __attribute__((packed));
+
+SIZE_CHECK(logfs_je_area, 10);
+
+#define MAX_JOURNAL_HEADER \
+	(sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))
+
+/**
+ * struct logfs_je_dynsb - dynamic superblock
+ *
+ * @ds_gec:			global erase count
+ * @ds_sweeper:			current position of GC "sweeper"
+ * @ds_rename_dir:		source directory ino (see dir.c documentation)
+ * @ds_rename_pos:		position of source dd (see dir.c documentation)
+ * @ds_victim_ino:		victims of incomplete dir operation (see dir.c)
+ * @ds_victim_ino:		parent inode of victim (see dir.c)
+ * @ds_used_bytes:		number of used bytes
+ */
+struct logfs_je_dynsb {
+	__be64	ds_gec;
+	__be64	ds_sweeper;
+
+	__be64	ds_rename_dir;
+	__be64	ds_rename_pos;
+
+	__be64	ds_victim_ino;
+	__be64	ds_victim_parent; /* XXX */
+
+	__be64	ds_used_bytes;
+	__be32	ds_generation;
+	__be32	pad;
+};
+
+SIZE_CHECK(logfs_je_dynsb, 64);
+
+/**
+ * struct logfs_je_anchor - anchor of filesystem tree, aka master inode
+ *
+ * @da_size:			size of inode file
+ * @da_last_ino:		last created inode
+ * @da_used_bytes:		number of bytes used
+ * @da_data:			data pointers
+ */
+struct logfs_je_anchor {
+	__be64	da_size;
+	__be64	da_last_ino;
+
+	__be64	da_used_bytes;
+	u8	da_height;
+	u8	pad[7];
+
+	__be64	da_data[LOGFS_EMBEDDED_FIELDS];
+};
+
+SIZE_CHECK(logfs_je_anchor, 168);
+
+/**
+ * struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
+ *
+ * @so_segment:			segments used for 2nd journal
+ *
+ * Length of the array is given by h_len field in the header.
+ */
+struct logfs_je_spillout {
+	__be64	so_segment[0];
+};
+
+SIZE_CHECK(logfs_je_spillout, 0);
+
+/**
+ * struct logfs_je_journal_ec - erase counts for all journal segments
+ *
+ * @ec:				erase count
+ *
+ * Length of the array is given by h_len field in the header.
+ */
+struct logfs_je_journal_ec {
+	__be32	ec[0];
+};
+
+SIZE_CHECK(logfs_je_journal_ec, 0);
+
+/**
+ * struct logfs_je_free_segments - list of free segmetns with erase count
+ */
+struct logfs_je_free_segments {
+	__be32	segno;
+	__be32	ec;
+};
+
+SIZE_CHECK(logfs_je_free_segments, 8);
+
+/**
+ * struct logfs_seg_alias - list of segment aliases
+ */
+struct logfs_seg_alias {
+	__be32	old_segno;
+	__be32	new_segno;
+};
+
+SIZE_CHECK(logfs_seg_alias, 8);
+
+/**
+ * struct logfs_obj_alias - list of object aliases
+ */
+struct logfs_obj_alias {
+	__be64	ino;
+	__be64	bix;
+	__be64	val;
+	u8	level;
+	u8	pad[5];
+	__be16	child_no;
+};
+
+SIZE_CHECK(logfs_obj_alias, 32);
+
+/**
+ * Compression types.
+ *
+ * COMPR_NONE	- uncompressed
+ * COMPR_ZLIB	- compressed with zlib
+ */
+enum {
+	COMPR_NONE	= 0,
+	COMPR_ZLIB	= 1,
+};
+
+/*
+ * Journal entries come in groups of 16.  First group contains unique
+ * entries, next groups contain one entry per level
+ *
+ * JE_FIRST	- smallest possible journal entry number
+ *
+ * JEG_BASE	- base group, containing unique entries
+ * JE_COMMIT	- commit entry, validates all previous entries
+ * JE_DYNSB	- dynamic superblock, anything that ought to be in the
+ *		  superblock but cannot because it is read-write data
+ * JE_ANCHOR	- anchor aka master inode aka inode file's inode
+ * JE_ERASECOUNT  erasecounts for all journal segments
+ * JE_SPILLOUT	- unused
+ * JE_SEG_ALIAS	- aliases segments
+ * JE_AREA	- area description
+ *
+ * JE_LAST	- largest possible journal entry number
+ */
+enum {
+	JE_FIRST	= 0x01,
+
+	JEG_BASE	= 0x00,
+	JE_COMMIT	= 0x02,
+	JE_DYNSB	= 0x03,
+	JE_ANCHOR	= 0x04,
+	JE_ERASECOUNT	= 0x05,
+	JE_SPILLOUT	= 0x06,
+	JE_OBJ_ALIAS	= 0x0d,
+	JE_AREA		= 0x0e,
+
+	JE_LAST		= 0x0e,
+};
+
+#endif
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
new file mode 100644
index 000000000000..1dbe6e8cccec
--- /dev/null
+++ b/fs/logfs/readwrite.c
@@ -0,0 +1,2246 @@
+/*
+ * fs/logfs/readwrite.c
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ *
+ * Actually contains five sets of very similar functions:
+ * read		read blocks from a file
+ * seek_hole	find next hole
+ * seek_data	find next data block
+ * valid	check whether a block still belongs to a file
+ * write	write blocks to a file
+ * delete	delete a block (for directories and ifile)
+ * rewrite	move existing blocks of a file to a new location (gc helper)
+ * truncate	truncate a file
+ */
+#include "logfs.h"
+#include <linux/sched.h>
+
+static u64 adjust_bix(u64 bix, level_t level)
+{
+	switch (level) {
+	case 0:
+		return bix;
+	case LEVEL(1):
+		return max_t(u64, bix, I0_BLOCKS);
+	case LEVEL(2):
+		return max_t(u64, bix, I1_BLOCKS);
+	case LEVEL(3):
+		return max_t(u64, bix, I2_BLOCKS);
+	case LEVEL(4):
+		return max_t(u64, bix, I3_BLOCKS);
+	case LEVEL(5):
+		return max_t(u64, bix, I4_BLOCKS);
+	default:
+		WARN_ON(1);
+		return bix;
+	}
+}
+
+static inline u64 maxbix(u8 height)
+{
+	return 1ULL << (LOGFS_BLOCK_BITS * height);
+}
+
+/**
+ * The inode address space is cut in two halves.  Lower half belongs to data
+ * pages, upper half to indirect blocks.  If the high bit (INDIRECT_BIT) is
+ * set, the actual block index (bix) and level can be derived from the page
+ * index.
+ *
+ * The lowest three bits of the block index are set to 0 after packing and
+ * unpacking.  Since the lowest n bits (9 for 4KiB blocksize) are ignored
+ * anyway this is harmless.
+ */
+#define ARCH_SHIFT	(BITS_PER_LONG - 32)
+#define INDIRECT_BIT	(0x80000000UL << ARCH_SHIFT)
+#define LEVEL_SHIFT	(28 + ARCH_SHIFT)
+static inline pgoff_t first_indirect_block(void)
+{
+	return INDIRECT_BIT | (1ULL << LEVEL_SHIFT);
+}
+
+pgoff_t logfs_pack_index(u64 bix, level_t level)
+{
+	pgoff_t index;
+
+	BUG_ON(bix >= INDIRECT_BIT);
+	if (level == 0)
+		return bix;
+
+	index  = INDIRECT_BIT;
+	index |= (__force long)level << LEVEL_SHIFT;
+	index |= bix >> ((__force u8)level * LOGFS_BLOCK_BITS);
+	return index;
+}
+
+void logfs_unpack_index(pgoff_t index, u64 *bix, level_t *level)
+{
+	u8 __level;
+
+	if (!(index & INDIRECT_BIT)) {
+		*bix = index;
+		*level = 0;
+		return;
+	}
+
+	__level = (index & ~INDIRECT_BIT) >> LEVEL_SHIFT;
+	*level = LEVEL(__level);
+	*bix = (index << (__level * LOGFS_BLOCK_BITS)) & ~INDIRECT_BIT;
+	*bix = adjust_bix(*bix, *level);
+	return;
+}
+#undef ARCH_SHIFT
+#undef INDIRECT_BIT
+#undef LEVEL_SHIFT
+
+/*
+ * Time is stored as nanoseconds since the epoch.
+ */
+static struct timespec be64_to_timespec(__be64 betime)
+{
+	return ns_to_timespec(be64_to_cpu(betime));
+}
+
+static __be64 timespec_to_be64(struct timespec tsp)
+{
+	return cpu_to_be64((u64)tsp.tv_sec * NSEC_PER_SEC + tsp.tv_nsec);
+}
+
+static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	inode->i_mode	= be16_to_cpu(di->di_mode);
+	li->li_height	= di->di_height;
+	li->li_flags	= be32_to_cpu(di->di_flags);
+	inode->i_uid	= be32_to_cpu(di->di_uid);
+	inode->i_gid	= be32_to_cpu(di->di_gid);
+	inode->i_size	= be64_to_cpu(di->di_size);
+	logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes));
+	inode->i_atime	= be64_to_timespec(di->di_atime);
+	inode->i_ctime	= be64_to_timespec(di->di_ctime);
+	inode->i_mtime	= be64_to_timespec(di->di_mtime);
+	inode->i_nlink	= be32_to_cpu(di->di_refcount);
+	inode->i_generation = be32_to_cpu(di->di_generation);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFSOCK:	/* fall through */
+	case S_IFBLK:	/* fall through */
+	case S_IFCHR:	/* fall through */
+	case S_IFIFO:
+		inode->i_rdev = be64_to_cpu(di->di_data[0]);
+		break;
+	case S_IFDIR:	/* fall through */
+	case S_IFREG:	/* fall through */
+	case S_IFLNK:
+		for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+			li->li_data[i] = be64_to_cpu(di->di_data[i]);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	di->di_mode	= cpu_to_be16(inode->i_mode);
+	di->di_height	= li->li_height;
+	di->di_pad	= 0;
+	di->di_flags	= cpu_to_be32(li->li_flags);
+	di->di_uid	= cpu_to_be32(inode->i_uid);
+	di->di_gid	= cpu_to_be32(inode->i_gid);
+	di->di_size	= cpu_to_be64(i_size_read(inode));
+	di->di_used_bytes = cpu_to_be64(li->li_used_bytes);
+	di->di_atime	= timespec_to_be64(inode->i_atime);
+	di->di_ctime	= timespec_to_be64(inode->i_ctime);
+	di->di_mtime	= timespec_to_be64(inode->i_mtime);
+	di->di_refcount	= cpu_to_be32(inode->i_nlink);
+	di->di_generation = cpu_to_be32(inode->i_generation);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFSOCK:	/* fall through */
+	case S_IFBLK:	/* fall through */
+	case S_IFCHR:	/* fall through */
+	case S_IFIFO:
+		di->di_data[0] = cpu_to_be64(inode->i_rdev);
+		break;
+	case S_IFDIR:	/* fall through */
+	case S_IFREG:	/* fall through */
+	case S_IFLNK:
+		for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+			di->di_data[i] = cpu_to_be64(li->li_data[i]);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void __logfs_set_blocks(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_inode *li = logfs_inode(inode);
+
+	inode->i_blocks = ULONG_MAX;
+	if (li->li_used_bytes >> sb->s_blocksize_bits < ULONG_MAX)
+		inode->i_blocks = ALIGN(li->li_used_bytes, 512) >> 9;
+}
+
+void logfs_set_blocks(struct inode *inode, u64 bytes)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	li->li_used_bytes = bytes;
+	__logfs_set_blocks(inode);
+}
+
+static void prelock_page(struct super_block *sb, struct page *page, int lock)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	BUG_ON(!PageLocked(page));
+	if (lock) {
+		BUG_ON(PagePreLocked(page));
+		SetPagePreLocked(page);
+	} else {
+		/* We are in GC path. */
+		if (PagePreLocked(page))
+			super->s_lock_count++;
+		else
+			SetPagePreLocked(page);
+	}
+}
+
+static void preunlock_page(struct super_block *sb, struct page *page, int lock)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	BUG_ON(!PageLocked(page));
+	if (lock)
+		ClearPagePreLocked(page);
+	else {
+		/* We are in GC path. */
+		BUG_ON(!PagePreLocked(page));
+		if (super->s_lock_count)
+			super->s_lock_count--;
+		else
+			ClearPagePreLocked(page);
+	}
+}
+
+/*
+ * Logfs is prone to an AB-BA deadlock where one task tries to acquire
+ * s_write_mutex with a locked page and GC tries to get that page while holding
+ * s_write_mutex.
+ * To solve this issue logfs will ignore the page lock iff the page in question
+ * is waiting for s_write_mutex.  We annotate this fact by setting PG_pre_locked
+ * in addition to PG_locked.
+ */
+static void logfs_get_wblocks(struct super_block *sb, struct page *page,
+		int lock)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	if (page)
+		prelock_page(sb, page, lock);
+
+	if (lock) {
+		mutex_lock(&super->s_write_mutex);
+		logfs_gc_pass(sb);
+		/* FIXME: We also have to check for shadowed space
+		 * and mempool fill grade */
+	}
+}
+
+static void logfs_put_wblocks(struct super_block *sb, struct page *page,
+		int lock)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	if (page)
+		preunlock_page(sb, page, lock);
+	/* Order matters - we must clear PG_pre_locked before releasing
+	 * s_write_mutex or we could race against another task. */
+	if (lock)
+		mutex_unlock(&super->s_write_mutex);
+}
+
+static struct page *logfs_get_read_page(struct inode *inode, u64 bix,
+		level_t level)
+{
+	return find_or_create_page(inode->i_mapping,
+			logfs_pack_index(bix, level), GFP_NOFS);
+}
+
+static void logfs_put_read_page(struct page *page)
+{
+	unlock_page(page);
+	page_cache_release(page);
+}
+
+static void logfs_lock_write_page(struct page *page)
+{
+	int loop = 0;
+
+	while (unlikely(!trylock_page(page))) {
+		if (loop++ > 0x1000) {
+			/* Has been observed once so far... */
+			printk(KERN_ERR "stack at %p\n", &loop);
+			BUG();
+		}
+		if (PagePreLocked(page)) {
+			/* Holder of page lock is waiting for us, it
+			 * is safe to use this page. */
+			break;
+		}
+		/* Some other process has this page locked and has
+		 * nothing to do with us.  Wait for it to finish.
+		 */
+		schedule();
+	}
+	BUG_ON(!PageLocked(page));
+}
+
+static struct page *logfs_get_write_page(struct inode *inode, u64 bix,
+		level_t level)
+{
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t index = logfs_pack_index(bix, level);
+	struct page *page;
+	int err;
+
+repeat:
+	page = find_get_page(mapping, index);
+	if (!page) {
+		page = __page_cache_alloc(GFP_NOFS);
+		if (!page)
+			return NULL;
+		err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS);
+		if (unlikely(err)) {
+			page_cache_release(page);
+			if (err == -EEXIST)
+				goto repeat;
+			return NULL;
+		}
+	} else logfs_lock_write_page(page);
+	BUG_ON(!PageLocked(page));
+	return page;
+}
+
+static void logfs_unlock_write_page(struct page *page)
+{
+	if (!PagePreLocked(page))
+		unlock_page(page);
+}
+
+static void logfs_put_write_page(struct page *page)
+{
+	logfs_unlock_write_page(page);
+	page_cache_release(page);
+}
+
+static struct page *logfs_get_page(struct inode *inode, u64 bix, level_t level,
+		int rw)
+{
+	if (rw == READ)
+		return logfs_get_read_page(inode, bix, level);
+	else
+		return logfs_get_write_page(inode, bix, level);
+}
+
+static void logfs_put_page(struct page *page, int rw)
+{
+	if (rw == READ)
+		logfs_put_read_page(page);
+	else
+		logfs_put_write_page(page);
+}
+
+static unsigned long __get_bits(u64 val, int skip, int no)
+{
+	u64 ret = val;
+
+	ret >>= skip * no;
+	ret <<= 64 - no;
+	ret >>= 64 - no;
+	return ret;
+}
+
+static unsigned long get_bits(u64 val, level_t skip)
+{
+	return __get_bits(val, (__force int)skip, LOGFS_BLOCK_BITS);
+}
+
+static inline void init_shadow_tree(struct super_block *sb,
+		struct shadow_tree *tree)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	btree_init_mempool64(&tree->new, super->s_btree_pool);
+	btree_init_mempool64(&tree->old, super->s_btree_pool);
+}
+
+static void indirect_write_block(struct logfs_block *block)
+{
+	struct page *page;
+	struct inode *inode;
+	int ret;
+
+	page = block->page;
+	inode = page->mapping->host;
+	logfs_lock_write_page(page);
+	ret = logfs_write_buf(inode, page, 0);
+	logfs_unlock_write_page(page);
+	/*
+	 * This needs some rework.  Unless you want your filesystem to run
+	 * completely synchronously (you don't), the filesystem will always
+	 * report writes as 'successful' before the actual work has been
+	 * done.  The actual work gets done here and this is where any errors
+	 * will show up.  And there isn't much we can do about it, really.
+	 *
+	 * Some attempts to fix the errors (move from bad blocks, retry io,...)
+	 * have already been done, so anything left should be either a broken
+	 * device or a bug somewhere in logfs itself.  Being relatively new,
+	 * the odds currently favor a bug, so for now the line below isn't
+	 * entirely tasteles.
+	 */
+	BUG_ON(ret);
+}
+
+static void inode_write_block(struct logfs_block *block)
+{
+	struct inode *inode;
+	int ret;
+
+	inode = block->inode;
+	if (inode->i_ino == LOGFS_INO_MASTER)
+		logfs_write_anchor(inode);
+	else {
+		ret = __logfs_write_inode(inode, 0);
+		/* see indirect_write_block comment */
+		BUG_ON(ret);
+	}
+}
+
+static gc_level_t inode_block_level(struct logfs_block *block)
+{
+	BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
+	return GC_LEVEL(LOGFS_MAX_LEVELS);
+}
+
+static gc_level_t indirect_block_level(struct logfs_block *block)
+{
+	struct page *page;
+	struct inode *inode;
+	u64 bix;
+	level_t level;
+
+	page = block->page;
+	inode = page->mapping->host;
+	logfs_unpack_index(page->index, &bix, &level);
+	return expand_level(inode->i_ino, level);
+}
+
+/*
+ * This silences a false, yet annoying gcc warning.  I hate it when my editor
+ * jumps into bitops.h each time I recompile this file.
+ * TODO: Complain to gcc folks about this and upgrade compiler.
+ */
+static unsigned long fnb(const unsigned long *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_bit(addr, size, offset);
+}
+
+static __be64 inode_val0(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u64 val;
+
+	/*
+	 * Explicit shifting generates good code, but must match the format
+	 * of the structure.  Add some paranoia just in case.
+	 */
+	BUILD_BUG_ON(offsetof(struct logfs_disk_inode, di_mode) != 0);
+	BUILD_BUG_ON(offsetof(struct logfs_disk_inode, di_height) != 2);
+	BUILD_BUG_ON(offsetof(struct logfs_disk_inode, di_flags) != 4);
+
+	val =	(u64)inode->i_mode << 48 |
+		(u64)li->li_height << 40 |
+		(u64)li->li_flags;
+	return cpu_to_be64(val);
+}
+
+static int inode_write_alias(struct super_block *sb,
+		struct logfs_block *block, write_alias_t *write_one_alias)
+{
+	struct inode *inode = block->inode;
+	struct logfs_inode *li = logfs_inode(inode);
+	unsigned long pos;
+	u64 ino , bix;
+	__be64 val;
+	level_t level;
+	int err;
+
+	for (pos = 0; ; pos++) {
+		pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
+		if (pos >= LOGFS_EMBEDDED_FIELDS + INODE_POINTER_OFS)
+			return 0;
+
+		switch (pos) {
+		case INODE_HEIGHT_OFS:
+			val = inode_val0(inode);
+			break;
+		case INODE_USED_OFS:
+			val = cpu_to_be64(li->li_used_bytes);;
+			break;
+		case INODE_SIZE_OFS:
+			val = cpu_to_be64(i_size_read(inode));
+			break;
+		case INODE_POINTER_OFS ... INODE_POINTER_OFS + LOGFS_EMBEDDED_FIELDS - 1:
+			val = cpu_to_be64(li->li_data[pos - INODE_POINTER_OFS]);
+			break;
+		default:
+			BUG();
+		}
+
+		ino = LOGFS_INO_MASTER;
+		bix = inode->i_ino;
+		level = LEVEL(0);
+		err = write_one_alias(sb, ino, bix, level, pos, val);
+		if (err)
+			return err;
+	}
+}
+
+static int indirect_write_alias(struct super_block *sb,
+		struct logfs_block *block, write_alias_t *write_one_alias)
+{
+	unsigned long pos;
+	struct page *page = block->page;
+	u64 ino , bix;
+	__be64 *child, val;
+	level_t level;
+	int err;
+
+	for (pos = 0; ; pos++) {
+		pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
+		if (pos >= LOGFS_BLOCK_FACTOR)
+			return 0;
+
+		ino = page->mapping->host->i_ino;
+		logfs_unpack_index(page->index, &bix, &level);
+		child = kmap_atomic(page, KM_USER0);
+		val = child[pos];
+		kunmap_atomic(child, KM_USER0);
+		err = write_one_alias(sb, ino, bix, level, pos, val);
+		if (err)
+			return err;
+	}
+}
+
+int logfs_write_obj_aliases_pagecache(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_block *block;
+	int err;
+
+	list_for_each_entry(block, &super->s_object_alias, alias_list) {
+		err = block->ops->write_alias(sb, block, write_alias_journal);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+void __free_block(struct super_block *sb, struct logfs_block *block)
+{
+	BUG_ON(!list_empty(&block->item_list));
+	list_del(&block->alias_list);
+	mempool_free(block, logfs_super(sb)->s_block_pool);
+}
+
+static void inode_free_block(struct super_block *sb, struct logfs_block *block)
+{
+	struct inode *inode = block->inode;
+
+	logfs_inode(inode)->li_block = NULL;
+	__free_block(sb, block);
+}
+
+static void indirect_free_block(struct super_block *sb,
+		struct logfs_block *block)
+{
+	ClearPagePrivate(block->page);
+	block->page->private = 0;
+	__free_block(sb, block);
+}
+
+
+static struct logfs_block_ops inode_block_ops = {
+	.write_block = inode_write_block,
+	.block_level = inode_block_level,
+	.free_block = inode_free_block,
+	.write_alias = inode_write_alias,
+};
+
+struct logfs_block_ops indirect_block_ops = {
+	.write_block = indirect_write_block,
+	.block_level = indirect_block_level,
+	.free_block = indirect_free_block,
+	.write_alias = indirect_write_alias,
+};
+
+struct logfs_block *__alloc_block(struct super_block *sb,
+		u64 ino, u64 bix, level_t level)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_block *block;
+
+	block = mempool_alloc(super->s_block_pool, GFP_NOFS);
+	memset(block, 0, sizeof(*block));
+	INIT_LIST_HEAD(&block->alias_list);
+	INIT_LIST_HEAD(&block->item_list);
+	block->sb = sb;
+	block->ino = ino;
+	block->bix = bix;
+	block->level = level;
+	return block;
+}
+
+static void alloc_inode_block(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block;
+
+	if (li->li_block)
+		return;
+
+	block = __alloc_block(inode->i_sb, LOGFS_INO_MASTER, inode->i_ino, 0);
+	block->inode = inode;
+	li->li_block = block;
+	block->ops = &inode_block_ops;
+}
+
+void initialize_block_counters(struct page *page, struct logfs_block *block,
+		__be64 *array, int page_is_empty)
+{
+	u64 ptr;
+	int i, start;
+
+	block->partial = 0;
+	block->full = 0;
+	start = 0;
+	if (page->index < first_indirect_block()) {
+		/* Counters are pointless on level 0 */
+		return;
+	}
+	if (page->index == first_indirect_block()) {
+		/* Skip unused pointers */
+		start = I0_BLOCKS;
+		block->full = I0_BLOCKS;
+	}
+	if (!page_is_empty) {
+		for (i = start; i < LOGFS_BLOCK_FACTOR; i++) {
+			ptr = be64_to_cpu(array[i]);
+			if (ptr)
+				block->partial++;
+			if (ptr & LOGFS_FULLY_POPULATED)
+				block->full++;
+		}
+	}
+}
+
+static void alloc_data_block(struct inode *inode, struct page *page)
+{
+	struct logfs_block *block;
+	u64 bix;
+	level_t level;
+
+	if (PagePrivate(page))
+		return;
+
+	logfs_unpack_index(page->index, &bix, &level);
+	block = __alloc_block(inode->i_sb, inode->i_ino, bix, level);
+	block->page = page;
+	SetPagePrivate(page);
+	page->private = (unsigned long)block;
+	block->ops = &indirect_block_ops;
+}
+
+static void alloc_indirect_block(struct inode *inode, struct page *page,
+		int page_is_empty)
+{
+	struct logfs_block *block;
+	__be64 *array;
+
+	if (PagePrivate(page))
+		return;
+
+	alloc_data_block(inode, page);
+
+	block = logfs_block(page);
+	array = kmap_atomic(page, KM_USER0);
+	initialize_block_counters(page, block, array, page_is_empty);
+	kunmap_atomic(array, KM_USER0);
+}
+
+static void block_set_pointer(struct page *page, int index, u64 ptr)
+{
+	struct logfs_block *block = logfs_block(page);
+	__be64 *array;
+	u64 oldptr;
+
+	BUG_ON(!block);
+	array = kmap_atomic(page, KM_USER0);
+	oldptr = be64_to_cpu(array[index]);
+	array[index] = cpu_to_be64(ptr);
+	kunmap_atomic(array, KM_USER0);
+	SetPageUptodate(page);
+
+	block->full += !!(ptr & LOGFS_FULLY_POPULATED)
+		- !!(oldptr & LOGFS_FULLY_POPULATED);
+	block->partial += !!ptr - !!oldptr;
+}
+
+static u64 block_get_pointer(struct page *page, int index)
+{
+	__be64 *block;
+	u64 ptr;
+
+	block = kmap_atomic(page, KM_USER0);
+	ptr = be64_to_cpu(block[index]);
+	kunmap_atomic(block, KM_USER0);
+	return ptr;
+}
+
+static int logfs_read_empty(struct page *page)
+{
+	zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	return 0;
+}
+
+static int logfs_read_direct(struct inode *inode, struct page *page)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	pgoff_t index = page->index;
+	u64 block;
+
+	block = li->li_data[index];
+	if (!block)
+		return logfs_read_empty(page);
+
+	return logfs_segment_read(inode, page, block, index, 0);
+}
+
+static int logfs_read_loop(struct inode *inode, struct page *page,
+		int rw_context)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u64 bix, bofs = li->li_data[INDIRECT_INDEX];
+	level_t level, target_level;
+	int ret;
+	struct page *ipage;
+
+	logfs_unpack_index(page->index, &bix, &target_level);
+	if (!bofs)
+		return logfs_read_empty(page);
+
+	if (bix >= maxbix(li->li_height))
+		return logfs_read_empty(page);
+
+	for (level = LEVEL(li->li_height);
+			(__force u8)level > (__force u8)target_level;
+			level = SUBLEVEL(level)){
+		ipage = logfs_get_page(inode, bix, level, rw_context);
+		if (!ipage)
+			return -ENOMEM;
+
+		ret = logfs_segment_read(inode, ipage, bofs, bix, level);
+		if (ret) {
+			logfs_put_read_page(ipage);
+			return ret;
+		}
+
+		bofs = block_get_pointer(ipage, get_bits(bix, SUBLEVEL(level)));
+		logfs_put_page(ipage, rw_context);
+		if (!bofs)
+			return logfs_read_empty(page);
+	}
+
+	return logfs_segment_read(inode, page, bofs, bix, 0);
+}
+
+static int logfs_read_block(struct inode *inode, struct page *page,
+		int rw_context)
+{
+	pgoff_t index = page->index;
+
+	if (index < I0_BLOCKS)
+		return logfs_read_direct(inode, page);
+	return logfs_read_loop(inode, page, rw_context);
+}
+
+static int logfs_exist_loop(struct inode *inode, u64 bix)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u64 bofs = li->li_data[INDIRECT_INDEX];
+	level_t level;
+	int ret;
+	struct page *ipage;
+
+	if (!bofs)
+		return 0;
+	if (bix >= maxbix(li->li_height))
+		return 0;
+
+	for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)) {
+		ipage = logfs_get_read_page(inode, bix, level);
+		if (!ipage)
+			return -ENOMEM;
+
+		ret = logfs_segment_read(inode, ipage, bofs, bix, level);
+		if (ret) {
+			logfs_put_read_page(ipage);
+			return ret;
+		}
+
+		bofs = block_get_pointer(ipage, get_bits(bix, SUBLEVEL(level)));
+		logfs_put_read_page(ipage);
+		if (!bofs)
+			return 0;
+	}
+
+	return 1;
+}
+
+int logfs_exist_block(struct inode *inode, u64 bix)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (bix < I0_BLOCKS)
+		return !!li->li_data[bix];
+	return logfs_exist_loop(inode, bix);
+}
+
+static u64 seek_holedata_direct(struct inode *inode, u64 bix, int data)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	for (; bix < I0_BLOCKS; bix++)
+		if (data ^ (li->li_data[bix] == 0))
+			return bix;
+	return I0_BLOCKS;
+}
+
+static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	__be64 *rblock;
+	u64 increment, bofs = li->li_data[INDIRECT_INDEX];
+	level_t level;
+	int ret, slot;
+	struct page *page;
+
+	BUG_ON(!bofs);
+
+	for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)) {
+		increment = 1 << (LOGFS_BLOCK_BITS * ((__force u8)level-1));
+		page = logfs_get_read_page(inode, bix, level);
+		if (!page)
+			return bix;
+
+		ret = logfs_segment_read(inode, page, bofs, bix, level);
+		if (ret) {
+			logfs_put_read_page(page);
+			return bix;
+		}
+
+		slot = get_bits(bix, SUBLEVEL(level));
+		rblock = kmap_atomic(page, KM_USER0);
+		while (slot < LOGFS_BLOCK_FACTOR) {
+			if (data && (rblock[slot] != 0))
+				break;
+			if (!data && !(be64_to_cpu(rblock[slot]) & LOGFS_FULLY_POPULATED))
+				break;
+			slot++;
+			bix += increment;
+			bix &= ~(increment - 1);
+		}
+		if (slot >= LOGFS_BLOCK_FACTOR) {
+			kunmap_atomic(rblock, KM_USER0);
+			logfs_put_read_page(page);
+			return bix;
+		}
+		bofs = be64_to_cpu(rblock[slot]);
+		kunmap_atomic(rblock, KM_USER0);
+		logfs_put_read_page(page);
+		if (!bofs) {
+			BUG_ON(data);
+			return bix;
+		}
+	}
+	return bix;
+}
+
+/**
+ * logfs_seek_hole - find next hole starting at a given block index
+ * @inode:		inode to search in
+ * @bix:		block index to start searching
+ *
+ * Returns next hole.  If the file doesn't contain any further holes, the
+ * block address next to eof is returned instead.
+ */
+u64 logfs_seek_hole(struct inode *inode, u64 bix)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (bix < I0_BLOCKS) {
+		bix = seek_holedata_direct(inode, bix, 0);
+		if (bix < I0_BLOCKS)
+			return bix;
+	}
+
+	if (!li->li_data[INDIRECT_INDEX])
+		return bix;
+	else if (li->li_data[INDIRECT_INDEX] & LOGFS_FULLY_POPULATED)
+		bix = maxbix(li->li_height);
+	else {
+		bix = seek_holedata_loop(inode, bix, 0);
+		if (bix < maxbix(li->li_height))
+			return bix;
+		/* Should not happen anymore.  But if some port writes semi-
+		 * corrupt images (as this one used to) we might run into it.
+		 */
+		WARN_ON_ONCE(bix == maxbix(li->li_height));
+	}
+
+	return bix;
+}
+
+static u64 __logfs_seek_data(struct inode *inode, u64 bix)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (bix < I0_BLOCKS) {
+		bix = seek_holedata_direct(inode, bix, 1);
+		if (bix < I0_BLOCKS)
+			return bix;
+	}
+
+	if (bix < maxbix(li->li_height)) {
+		if (!li->li_data[INDIRECT_INDEX])
+			bix = maxbix(li->li_height);
+		else
+			return seek_holedata_loop(inode, bix, 1);
+	}
+
+	return bix;
+}
+
+/**
+ * logfs_seek_data - find next data block after a given block index
+ * @inode:		inode to search in
+ * @bix:		block index to start searching
+ *
+ * Returns next data block.  If the file doesn't contain any further data
+ * blocks, the last block in the file is returned instead.
+ */
+u64 logfs_seek_data(struct inode *inode, u64 bix)
+{
+	struct super_block *sb = inode->i_sb;
+	u64 ret, end;
+
+	ret = __logfs_seek_data(inode, bix);
+	end = i_size_read(inode) >> sb->s_blocksize_bits;
+	if (ret >= end)
+		ret = max(bix, end);
+	return ret;
+}
+
+static int logfs_is_valid_direct(struct logfs_inode *li, u64 bix, u64 ofs)
+{
+	return pure_ofs(li->li_data[bix]) == ofs;
+}
+
+static int __logfs_is_valid_loop(struct inode *inode, u64 bix,
+		u64 ofs, u64 bofs)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	level_t level;
+	int ret;
+	struct page *page;
+
+	for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)){
+		page = logfs_get_write_page(inode, bix, level);
+		BUG_ON(!page);
+
+		ret = logfs_segment_read(inode, page, bofs, bix, level);
+		if (ret) {
+			logfs_put_write_page(page);
+			return 0;
+		}
+
+		bofs = block_get_pointer(page, get_bits(bix, SUBLEVEL(level)));
+		logfs_put_write_page(page);
+		if (!bofs)
+			return 0;
+
+		if (pure_ofs(bofs) == ofs)
+			return 1;
+	}
+	return 0;
+}
+
+static int logfs_is_valid_loop(struct inode *inode, u64 bix, u64 ofs)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u64 bofs = li->li_data[INDIRECT_INDEX];
+
+	if (!bofs)
+		return 0;
+
+	if (bix >= maxbix(li->li_height))
+		return 0;
+
+	if (pure_ofs(bofs) == ofs)
+		return 1;
+
+	return __logfs_is_valid_loop(inode, bix, ofs, bofs);
+}
+
+static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if ((inode->i_nlink == 0) && atomic_read(&inode->i_count) == 1)
+		return 0;
+
+	if (bix < I0_BLOCKS)
+		return logfs_is_valid_direct(li, bix, ofs);
+	return logfs_is_valid_loop(inode, bix, ofs);
+}
+
+/**
+ * logfs_is_valid_block - check whether this block is still valid
+ *
+ * @sb	- superblock
+ * @ofs	- block physical offset
+ * @ino	- block inode number
+ * @bix	- block index
+ * @level - block level
+ *
+ * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
+ * become invalid once the journal is written.
+ */
+int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 bix,
+		gc_level_t gc_level)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode;
+	int ret, cookie;
+
+	/* Umount closes a segment with free blocks remaining.  Those
+	 * blocks are by definition invalid. */
+	if (ino == -1)
+		return 0;
+
+	LOGFS_BUG_ON((u64)(u_long)ino != ino, sb);
+
+	inode = logfs_safe_iget(sb, ino, &cookie);
+	if (IS_ERR(inode))
+		goto invalid;
+
+	ret = __logfs_is_valid_block(inode, bix, ofs);
+	logfs_safe_iput(inode, cookie);
+	if (ret)
+		return ret;
+
+invalid:
+	/* Block is nominally invalid, but may still sit in the shadow tree,
+	 * waiting for a journal commit.
+	 */
+	if (btree_lookup64(&super->s_shadow_tree.old, ofs))
+		return 2;
+	return 0;
+}
+
+int logfs_readpage_nolock(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	int ret = -EIO;
+
+	ret = logfs_read_block(inode, page, READ);
+
+	if (ret) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		ClearPageError(page);
+	}
+	flush_dcache_page(page);
+
+	return ret;
+}
+
+static int logfs_reserve_bytes(struct inode *inode, int bytes)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	u64 available = super->s_free_bytes + super->s_dirty_free_bytes
+			- super->s_dirty_used_bytes - super->s_dirty_pages;
+
+	if (!bytes)
+		return 0;
+
+	if (available < bytes)
+		return -ENOSPC;
+
+	if (available < bytes + super->s_root_reserve &&
+			!capable(CAP_SYS_RESOURCE))
+		return -ENOSPC;
+
+	return 0;
+}
+
+int get_page_reserve(struct inode *inode, struct page *page)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	int ret;
+
+	if (logfs_block(page) && logfs_block(page)->reserved_bytes)
+		return 0;
+
+	logfs_get_wblocks(inode->i_sb, page, WF_LOCK);
+	ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE);
+	if (!ret) {
+		alloc_data_block(inode, page);
+		logfs_block(page)->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE;
+		super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE;
+	}
+	logfs_put_wblocks(inode->i_sb, page, WF_LOCK);
+	return ret;
+}
+
+/*
+ * We are protected by write lock.  Push victims up to superblock level
+ * and release transaction when appropriate.
+ */
+/* FIXME: This is currently called from the wrong spots. */
+static void logfs_handle_transaction(struct inode *inode,
+		struct logfs_transaction *ta)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+
+	if (!ta)
+		return;
+	logfs_inode(inode)->li_block->ta = NULL;
+
+	if (inode->i_ino != LOGFS_INO_MASTER) {
+		BUG(); /* FIXME: Yes, this needs more thought */
+		/* just remember the transaction until inode is written */
+		//BUG_ON(logfs_inode(inode)->li_transaction);
+		//logfs_inode(inode)->li_transaction = ta;
+		return;
+	}
+
+	switch (ta->state) {
+	case CREATE_1: /* fall through */
+	case UNLINK_1:
+		BUG_ON(super->s_victim_ino);
+		super->s_victim_ino = ta->ino;
+		break;
+	case CREATE_2: /* fall through */
+	case UNLINK_2:
+		BUG_ON(super->s_victim_ino != ta->ino);
+		super->s_victim_ino = 0;
+		/* transaction ends here - free it */
+		kfree(ta);
+		break;
+	case CROSS_RENAME_1:
+		BUG_ON(super->s_rename_dir);
+		BUG_ON(super->s_rename_pos);
+		super->s_rename_dir = ta->dir;
+		super->s_rename_pos = ta->pos;
+		break;
+	case CROSS_RENAME_2:
+		BUG_ON(super->s_rename_dir != ta->dir);
+		BUG_ON(super->s_rename_pos != ta->pos);
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		kfree(ta);
+		break;
+	case TARGET_RENAME_1:
+		BUG_ON(super->s_rename_dir);
+		BUG_ON(super->s_rename_pos);
+		BUG_ON(super->s_victim_ino);
+		super->s_rename_dir = ta->dir;
+		super->s_rename_pos = ta->pos;
+		super->s_victim_ino = ta->ino;
+		break;
+	case TARGET_RENAME_2:
+		BUG_ON(super->s_rename_dir != ta->dir);
+		BUG_ON(super->s_rename_pos != ta->pos);
+		BUG_ON(super->s_victim_ino != ta->ino);
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		break;
+	case TARGET_RENAME_3:
+		BUG_ON(super->s_rename_dir);
+		BUG_ON(super->s_rename_pos);
+		BUG_ON(super->s_victim_ino != ta->ino);
+		super->s_victim_ino = 0;
+		kfree(ta);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/*
+ * Not strictly a reservation, but rather a check that we still have enough
+ * space to satisfy the write.
+ */
+static int logfs_reserve_blocks(struct inode *inode, int blocks)
+{
+	return logfs_reserve_bytes(inode, blocks * LOGFS_MAX_OBJECTSIZE);
+}
+
+struct write_control {
+	u64 ofs;
+	long flags;
+};
+
+static struct logfs_shadow *alloc_shadow(struct inode *inode, u64 bix,
+		level_t level, u64 old_ofs)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	struct logfs_shadow *shadow;
+
+	shadow = mempool_alloc(super->s_shadow_pool, GFP_NOFS);
+	memset(shadow, 0, sizeof(*shadow));
+	shadow->ino = inode->i_ino;
+	shadow->bix = bix;
+	shadow->gc_level = expand_level(inode->i_ino, level);
+	shadow->old_ofs = old_ofs & ~LOGFS_FULLY_POPULATED;
+	return shadow;
+}
+
+static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+
+	mempool_free(shadow, super->s_shadow_pool);
+}
+
+/**
+ * fill_shadow_tree - Propagate shadow tree changes due to a write
+ * @inode:	Inode owning the page
+ * @page:	Struct page that was written
+ * @shadow:	Shadow for the current write
+ *
+ * Writes in logfs can result in two semi-valid objects.  The old object
+ * is still valid as long as it can be reached by following pointers on
+ * the medium.  Only when writes propagate all the way up to the journal
+ * has the new object safely replaced the old one.
+ *
+ * To handle this problem, a struct logfs_shadow is used to represent
+ * every single write.  It is attached to the indirect block, which is
+ * marked dirty.  When the indirect block is written, its shadows are
+ * handed up to the next indirect block (or inode).  Untimately they
+ * will reach the master inode and be freed upon journal commit.
+ *
+ * This function handles a single step in the propagation.  It adds the
+ * shadow for the current write to the tree, along with any shadows in
+ * the page's tree, in case it was an indirect block.  If a page is
+ * written, the inode parameter is left NULL, if an inode is written,
+ * the page parameter is left NULL.
+ */
+static void fill_shadow_tree(struct inode *inode, struct page *page,
+		struct logfs_shadow *shadow)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	struct logfs_block *block = logfs_block(page);
+	struct shadow_tree *tree = &super->s_shadow_tree;
+
+	if (PagePrivate(page)) {
+		if (block->alias_map)
+			super->s_no_object_aliases -= bitmap_weight(
+					block->alias_map, LOGFS_BLOCK_FACTOR);
+		logfs_handle_transaction(inode, block->ta);
+		block->ops->free_block(inode->i_sb, block);
+	}
+	if (shadow) {
+		if (shadow->old_ofs)
+			btree_insert64(&tree->old, shadow->old_ofs, shadow,
+					GFP_NOFS);
+		else
+			btree_insert64(&tree->new, shadow->new_ofs, shadow,
+					GFP_NOFS);
+
+		super->s_dirty_used_bytes += shadow->new_len;
+		super->s_dirty_free_bytes += shadow->old_len;
+	}
+}
+
+static void logfs_set_alias(struct super_block *sb, struct logfs_block *block,
+		long child_no)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	if (block->inode && block->inode->i_ino == LOGFS_INO_MASTER) {
+		/* Aliases in the master inode are pointless. */
+		return;
+	}
+
+	if (!test_bit(child_no, block->alias_map)) {
+		set_bit(child_no, block->alias_map);
+		super->s_no_object_aliases++;
+	}
+	list_move_tail(&block->alias_list, &super->s_object_alias);
+}
+
+/*
+ * Object aliases can and often do change the size and occupied space of a
+ * file.  So not only do we have to change the pointers, we also have to
+ * change inode->i_size and li->li_used_bytes.  Which is done by setting
+ * another two object aliases for the inode itself.
+ */
+static void set_iused(struct inode *inode, struct logfs_shadow *shadow)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (shadow->new_len == shadow->old_len)
+		return;
+
+	alloc_inode_block(inode);
+	li->li_used_bytes += shadow->new_len - shadow->old_len;
+	__logfs_set_blocks(inode);
+	logfs_set_alias(inode->i_sb, li->li_block, INODE_USED_OFS);
+	logfs_set_alias(inode->i_sb, li->li_block, INODE_SIZE_OFS);
+}
+
+static int logfs_write_i0(struct inode *inode, struct page *page,
+		struct write_control *wc)
+{
+	struct logfs_shadow *shadow;
+	u64 bix;
+	level_t level;
+	int full, err = 0;
+
+	logfs_unpack_index(page->index, &bix, &level);
+	if (wc->ofs == 0)
+		if (logfs_reserve_blocks(inode, 1))
+			return -ENOSPC;
+
+	shadow = alloc_shadow(inode, bix, level, wc->ofs);
+	if (wc->flags & WF_WRITE)
+		err = logfs_segment_write(inode, page, shadow);
+	if (wc->flags & WF_DELETE)
+		logfs_segment_delete(inode, shadow);
+	if (err) {
+		free_shadow(inode, shadow);
+		return err;
+	}
+
+	set_iused(inode, shadow);
+	full = 1;
+	if (level != 0) {
+		alloc_indirect_block(inode, page, 0);
+		full = logfs_block(page)->full == LOGFS_BLOCK_FACTOR;
+	}
+	fill_shadow_tree(inode, page, shadow);
+	wc->ofs = shadow->new_ofs;
+	if (wc->ofs && full)
+		wc->ofs |= LOGFS_FULLY_POPULATED;
+	return 0;
+}
+
+static int logfs_write_direct(struct inode *inode, struct page *page,
+		long flags)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct write_control wc = {
+		.ofs = li->li_data[page->index],
+		.flags = flags,
+	};
+	int err;
+
+	alloc_inode_block(inode);
+
+	err = logfs_write_i0(inode, page, &wc);
+	if (err)
+		return err;
+
+	li->li_data[page->index] = wc.ofs;
+	logfs_set_alias(inode->i_sb, li->li_block,
+			page->index + INODE_POINTER_OFS);
+	return 0;
+}
+
+static int ptr_change(u64 ofs, struct page *page)
+{
+	struct logfs_block *block = logfs_block(page);
+	int empty0, empty1, full0, full1;
+
+	empty0 = ofs == 0;
+	empty1 = block->partial == 0;
+	if (empty0 != empty1)
+		return 1;
+
+	/* The !! is necessary to shrink result to int */
+	full0 = !!(ofs & LOGFS_FULLY_POPULATED);
+	full1 = block->full == LOGFS_BLOCK_FACTOR;
+	if (full0 != full1)
+		return 1;
+	return 0;
+}
+
+static int __logfs_write_rec(struct inode *inode, struct page *page,
+		struct write_control *this_wc,
+		pgoff_t bix, level_t target_level, level_t level)
+{
+	int ret, page_empty = 0;
+	int child_no = get_bits(bix, SUBLEVEL(level));
+	struct page *ipage;
+	struct write_control child_wc = {
+		.flags = this_wc->flags,
+	};
+
+	ipage = logfs_get_write_page(inode, bix, level);
+	if (!ipage)
+		return -ENOMEM;
+
+	if (this_wc->ofs) {
+		ret = logfs_segment_read(inode, ipage, this_wc->ofs, bix, level);
+		if (ret)
+			goto out;
+	} else if (!PageUptodate(ipage)) {
+		page_empty = 1;
+		logfs_read_empty(ipage);
+	}
+
+	child_wc.ofs = block_get_pointer(ipage, child_no);
+
+	if ((__force u8)level-1 > (__force u8)target_level)
+		ret = __logfs_write_rec(inode, page, &child_wc, bix,
+				target_level, SUBLEVEL(level));
+	else
+		ret = logfs_write_i0(inode, page, &child_wc);
+
+	if (ret)
+		goto out;
+
+	alloc_indirect_block(inode, ipage, page_empty);
+	block_set_pointer(ipage, child_no, child_wc.ofs);
+	/* FIXME: first condition seems superfluous */
+	if (child_wc.ofs || logfs_block(ipage)->partial)
+		this_wc->flags |= WF_WRITE;
+	/* the condition on this_wc->ofs ensures that we won't consume extra
+	 * space for indirect blocks in the future, which we cannot reserve */
+	if (!this_wc->ofs || ptr_change(this_wc->ofs, ipage))
+		ret = logfs_write_i0(inode, ipage, this_wc);
+	else
+		logfs_set_alias(inode->i_sb, logfs_block(ipage), child_no);
+out:
+	logfs_put_write_page(ipage);
+	return ret;
+}
+
+static int logfs_write_rec(struct inode *inode, struct page *page,
+		pgoff_t bix, level_t target_level, long flags)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct write_control wc = {
+		.ofs = li->li_data[INDIRECT_INDEX],
+		.flags = flags,
+	};
+	int ret;
+
+	alloc_inode_block(inode);
+
+	if (li->li_height > (__force u8)target_level)
+		ret = __logfs_write_rec(inode, page, &wc, bix, target_level,
+				LEVEL(li->li_height));
+	else
+		ret = logfs_write_i0(inode, page, &wc);
+	if (ret)
+		return ret;
+
+	if (li->li_data[INDIRECT_INDEX] != wc.ofs) {
+		li->li_data[INDIRECT_INDEX] = wc.ofs;
+		logfs_set_alias(inode->i_sb, li->li_block,
+				INDIRECT_INDEX + INODE_POINTER_OFS);
+	}
+	return ret;
+}
+
+void logfs_add_transaction(struct inode *inode, struct logfs_transaction *ta)
+{
+	alloc_inode_block(inode);
+	logfs_inode(inode)->li_block->ta = ta;
+}
+
+void logfs_del_transaction(struct inode *inode, struct logfs_transaction *ta)
+{
+	struct logfs_block *block = logfs_inode(inode)->li_block;
+
+	if (block && block->ta)
+		block->ta = NULL;
+}
+
+static int grow_inode(struct inode *inode, u64 bix, level_t level)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u8 height = (__force u8)level;
+	struct page *page;
+	struct write_control wc = {
+		.flags = WF_WRITE,
+	};
+	int err;
+
+	BUG_ON(height > 5 || li->li_height > 5);
+	while (height > li->li_height || bix >= maxbix(li->li_height)) {
+		page = logfs_get_write_page(inode, I0_BLOCKS + 1,
+				LEVEL(li->li_height + 1));
+		if (!page)
+			return -ENOMEM;
+		logfs_read_empty(page);
+		alloc_indirect_block(inode, page, 1);
+		block_set_pointer(page, 0, li->li_data[INDIRECT_INDEX]);
+		err = logfs_write_i0(inode, page, &wc);
+		logfs_put_write_page(page);
+		if (err)
+			return err;
+		li->li_data[INDIRECT_INDEX] = wc.ofs;
+		wc.ofs = 0;
+		li->li_height++;
+		logfs_set_alias(inode->i_sb, li->li_block, INODE_HEIGHT_OFS);
+	}
+	return 0;
+}
+
+static int __logfs_write_buf(struct inode *inode, struct page *page, long flags)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	pgoff_t index = page->index;
+	u64 bix;
+	level_t level;
+	int err;
+
+	flags |= WF_WRITE | WF_DELETE;
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+
+	logfs_unpack_index(index, &bix, &level);
+	if (logfs_block(page) && logfs_block(page)->reserved_bytes)
+		super->s_dirty_pages -= logfs_block(page)->reserved_bytes;
+
+	if (index < I0_BLOCKS)
+		return logfs_write_direct(inode, page, flags);
+
+	bix = adjust_bix(bix, level);
+	err = grow_inode(inode, bix, level);
+	if (err)
+		return err;
+	return logfs_write_rec(inode, page, bix, level, flags);
+}
+
+int logfs_write_buf(struct inode *inode, struct page *page, long flags)
+{
+	struct super_block *sb = inode->i_sb;
+	int ret;
+
+	logfs_get_wblocks(sb, page, flags & WF_LOCK);
+	ret = __logfs_write_buf(inode, page, flags);
+	logfs_put_wblocks(sb, page, flags & WF_LOCK);
+	return ret;
+}
+
+static int __logfs_delete(struct inode *inode, struct page *page)
+{
+	long flags = WF_DELETE;
+
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+
+	if (page->index < I0_BLOCKS)
+		return logfs_write_direct(inode, page, flags);
+	return logfs_write_rec(inode, page, page->index, 0, flags);
+}
+
+int logfs_delete(struct inode *inode, pgoff_t index,
+		struct shadow_tree *shadow_tree)
+{
+	struct super_block *sb = inode->i_sb;
+	struct page *page;
+	int ret;
+
+	page = logfs_get_read_page(inode, index, 0);
+	if (!page)
+		return -ENOMEM;
+
+	logfs_get_wblocks(sb, page, 1);
+	ret = __logfs_delete(inode, page);
+	logfs_put_wblocks(sb, page, 1);
+
+	logfs_put_read_page(page);
+
+	return ret;
+}
+
+/* Rewrite cannot mark the inode dirty but has to write it immediatly. */
+int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
+		gc_level_t gc_level, long flags)
+{
+	level_t level = shrink_level(gc_level);
+	struct page *page;
+	int err;
+
+	page = logfs_get_write_page(inode, bix, level);
+	if (!page)
+		return -ENOMEM;
+
+	err = logfs_segment_read(inode, page, ofs, bix, level);
+	if (!err) {
+		if (level != 0)
+			alloc_indirect_block(inode, page, 0);
+		err = logfs_write_buf(inode, page, flags);
+	}
+	logfs_put_write_page(page);
+	return err;
+}
+
+static int truncate_data_block(struct inode *inode, struct page *page,
+		u64 ofs, struct logfs_shadow *shadow, u64 size)
+{
+	loff_t pageofs = page->index << inode->i_sb->s_blocksize_bits;
+	u64 bix;
+	level_t level;
+	int err;
+
+	/* Does truncation happen within this page? */
+	if (size <= pageofs || size - pageofs >= PAGE_SIZE)
+		return 0;
+
+	logfs_unpack_index(page->index, &bix, &level);
+	BUG_ON(level != 0);
+
+	err = logfs_segment_read(inode, page, ofs, bix, level);
+	if (err)
+		return err;
+
+	zero_user_segment(page, size - pageofs, PAGE_CACHE_SIZE);
+	return logfs_segment_write(inode, page, shadow);
+}
+
+static int logfs_truncate_i0(struct inode *inode, struct page *page,
+		struct write_control *wc, u64 size)
+{
+	struct logfs_shadow *shadow;
+	u64 bix;
+	level_t level;
+	int err = 0;
+
+	logfs_unpack_index(page->index, &bix, &level);
+	BUG_ON(level != 0);
+	shadow = alloc_shadow(inode, bix, level, wc->ofs);
+
+	err = truncate_data_block(inode, page, wc->ofs, shadow, size);
+	if (err) {
+		free_shadow(inode, shadow);
+		return err;
+	}
+
+	logfs_segment_delete(inode, shadow);
+	set_iused(inode, shadow);
+	fill_shadow_tree(inode, page, shadow);
+	wc->ofs = shadow->new_ofs;
+	return 0;
+}
+
+static int logfs_truncate_direct(struct inode *inode, u64 size)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct write_control wc;
+	struct page *page;
+	int e;
+	int err;
+
+	alloc_inode_block(inode);
+
+	for (e = I0_BLOCKS - 1; e >= 0; e--) {
+		if (size > (e+1) * LOGFS_BLOCKSIZE)
+			break;
+
+		wc.ofs = li->li_data[e];
+		if (!wc.ofs)
+			continue;
+
+		page = logfs_get_write_page(inode, e, 0);
+		if (!page)
+			return -ENOMEM;
+		err = logfs_segment_read(inode, page, wc.ofs, e, 0);
+		if (err) {
+			logfs_put_write_page(page);
+			return err;
+		}
+		err = logfs_truncate_i0(inode, page, &wc, size);
+		logfs_put_write_page(page);
+		if (err)
+			return err;
+
+		li->li_data[e] = wc.ofs;
+	}
+	return 0;
+}
+
+/* FIXME: these need to become per-sb once we support different blocksizes */
+static u64 __logfs_step[] = {
+	1,
+	I1_BLOCKS,
+	I2_BLOCKS,
+	I3_BLOCKS,
+};
+
+static u64 __logfs_start_index[] = {
+	I0_BLOCKS,
+	I1_BLOCKS,
+	I2_BLOCKS,
+	I3_BLOCKS
+};
+
+static inline u64 logfs_step(level_t level)
+{
+	return __logfs_step[(__force u8)level];
+}
+
+static inline u64 logfs_factor(u8 level)
+{
+	return __logfs_step[level] * LOGFS_BLOCKSIZE;
+}
+
+static inline u64 logfs_start_index(level_t level)
+{
+	return __logfs_start_index[(__force u8)level];
+}
+
+static void logfs_unpack_raw_index(pgoff_t index, u64 *bix, level_t *level)
+{
+	logfs_unpack_index(index, bix, level);
+	if (*bix <= logfs_start_index(SUBLEVEL(*level)))
+		*bix = 0;
+}
+
+static int __logfs_truncate_rec(struct inode *inode, struct page *ipage,
+		struct write_control *this_wc, u64 size)
+{
+	int truncate_happened = 0;
+	int e, err = 0;
+	u64 bix, child_bix, next_bix;
+	level_t level;
+	struct page *page;
+	struct write_control child_wc = { /* FIXME: flags */ };
+
+	logfs_unpack_raw_index(ipage->index, &bix, &level);
+	err = logfs_segment_read(inode, ipage, this_wc->ofs, bix, level);
+	if (err)
+		return err;
+
+	for (e = LOGFS_BLOCK_FACTOR - 1; e >= 0; e--) {
+		child_bix = bix + e * logfs_step(SUBLEVEL(level));
+		next_bix = child_bix + logfs_step(SUBLEVEL(level));
+		if (size > next_bix * LOGFS_BLOCKSIZE)
+			break;
+
+		child_wc.ofs = pure_ofs(block_get_pointer(ipage, e));
+		if (!child_wc.ofs)
+			continue;
+
+		page = logfs_get_write_page(inode, child_bix, SUBLEVEL(level));
+		if (!page)
+			return -ENOMEM;
+
+		if ((__force u8)level > 1)
+			err = __logfs_truncate_rec(inode, page, &child_wc, size);
+		else
+			err = logfs_truncate_i0(inode, page, &child_wc, size);
+		logfs_put_write_page(page);
+		if (err)
+			return err;
+
+		truncate_happened = 1;
+		alloc_indirect_block(inode, ipage, 0);
+		block_set_pointer(ipage, e, child_wc.ofs);
+	}
+
+	if (!truncate_happened) {
+		printk("ineffectual truncate (%lx, %lx, %llx)\n", inode->i_ino, ipage->index, size);
+		return 0;
+	}
+
+	this_wc->flags = WF_DELETE;
+	if (logfs_block(ipage)->partial)
+		this_wc->flags |= WF_WRITE;
+
+	return logfs_write_i0(inode, ipage, this_wc);
+}
+
+static int logfs_truncate_rec(struct inode *inode, u64 size)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct write_control wc = {
+		.ofs = li->li_data[INDIRECT_INDEX],
+	};
+	struct page *page;
+	int err;
+
+	alloc_inode_block(inode);
+
+	if (!wc.ofs)
+		return 0;
+
+	page = logfs_get_write_page(inode, 0, LEVEL(li->li_height));
+	if (!page)
+		return -ENOMEM;
+
+	err = __logfs_truncate_rec(inode, page, &wc, size);
+	logfs_put_write_page(page);
+	if (err)
+		return err;
+
+	if (li->li_data[INDIRECT_INDEX] != wc.ofs)
+		li->li_data[INDIRECT_INDEX] = wc.ofs;
+	return 0;
+}
+
+static int __logfs_truncate(struct inode *inode, u64 size)
+{
+	int ret;
+
+	if (size >= logfs_factor(logfs_inode(inode)->li_height))
+		return 0;
+
+	ret = logfs_truncate_rec(inode, size);
+	if (ret)
+		return ret;
+
+	return logfs_truncate_direct(inode, size);
+}
+
+int logfs_truncate(struct inode *inode, u64 size)
+{
+	struct super_block *sb = inode->i_sb;
+	int err;
+
+	logfs_get_wblocks(sb, NULL, 1);
+	err = __logfs_truncate(inode, size);
+	if (!err)
+		err = __logfs_write_inode(inode, 0);
+	logfs_put_wblocks(sb, NULL, 1);
+
+	if (!err)
+		err = vmtruncate(inode, size);
+
+	/* I don't trust error recovery yet. */
+	WARN_ON(err);
+	return err;
+}
+
+static void move_page_to_inode(struct inode *inode, struct page *page)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block = logfs_block(page);
+
+	if (!block)
+		return;
+
+	log_blockmove("move_page_to_inode(%llx, %llx, %x)\n",
+			block->ino, block->bix, block->level);
+	BUG_ON(li->li_block);
+	block->ops = &inode_block_ops;
+	block->inode = inode;
+	li->li_block = block;
+
+	block->page = NULL;
+	page->private = 0;
+	ClearPagePrivate(page);
+}
+
+static void move_inode_to_page(struct page *page, struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block = li->li_block;
+
+	if (!block)
+		return;
+
+	log_blockmove("move_inode_to_page(%llx, %llx, %x)\n",
+			block->ino, block->bix, block->level);
+	BUG_ON(PagePrivate(page));
+	block->ops = &indirect_block_ops;
+	block->page = page;
+	page->private = (unsigned long)block;
+	SetPagePrivate(page);
+
+	block->inode = NULL;
+	li->li_block = NULL;
+}
+
+int logfs_read_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *master_inode = super->s_master_inode;
+	struct page *page;
+	struct logfs_disk_inode *di;
+	u64 ino = inode->i_ino;
+
+	if (ino << sb->s_blocksize_bits > i_size_read(master_inode))
+		return -ENODATA;
+	if (!logfs_exist_block(master_inode, ino))
+		return -ENODATA;
+
+	page = read_cache_page(master_inode->i_mapping, ino,
+			(filler_t *)logfs_readpage, NULL);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	di = kmap_atomic(page, KM_USER0);
+	logfs_disk_to_inode(di, inode);
+	kunmap_atomic(di, KM_USER0);
+	move_page_to_inode(inode, page);
+	page_cache_release(page);
+	return 0;
+}
+
+/* Caller must logfs_put_write_page(page); */
+static struct page *inode_to_page(struct inode *inode)
+{
+	struct inode *master_inode = logfs_super(inode->i_sb)->s_master_inode;
+	struct logfs_disk_inode *di;
+	struct page *page;
+
+	BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
+
+	page = logfs_get_write_page(master_inode, inode->i_ino, 0);
+	if (!page)
+		return NULL;
+
+	di = kmap_atomic(page, KM_USER0);
+	logfs_inode_to_disk(inode, di);
+	kunmap_atomic(di, KM_USER0);
+	move_inode_to_page(page, inode);
+	return page;
+}
+
+/* Cheaper version of write_inode.  All changes are concealed in
+ * aliases, which are moved back.  No write to the medium happens.
+ */
+void logfs_clear_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block = li->li_block;
+	struct page *page;
+
+	/* Only deleted files may be dirty at this point */
+	BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
+	if (!block)
+		return;
+	if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
+		block->ops->free_block(inode->i_sb, block);
+		return;
+	}
+
+	BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
+	page = inode_to_page(inode);
+	BUG_ON(!page); /* FIXME: Use emergency page */
+	logfs_put_write_page(page);
+}
+
+static int do_write_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct inode *master_inode = logfs_super(sb)->s_master_inode;
+	loff_t size = (inode->i_ino + 1) << inode->i_sb->s_blocksize_bits;
+	struct page *page;
+	int err;
+
+	BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
+	/* FIXME: lock inode */
+
+	if (i_size_read(master_inode) < size)
+		i_size_write(master_inode, size);
+
+	/* TODO: Tell vfs this inode is clean now */
+
+	page = inode_to_page(inode);
+	if (!page)
+		return -ENOMEM;
+
+	/* FIXME: transaction is part of logfs_block now.  Is that enough? */
+	err = logfs_write_buf(master_inode, page, 0);
+	logfs_put_write_page(page);
+	return err;
+}
+
+static void logfs_mod_segment_entry(struct super_block *sb, u32 segno,
+		int write,
+		void (*change_se)(struct logfs_segment_entry *, long),
+		long arg)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode;
+	struct page *page;
+	struct logfs_segment_entry *se;
+	pgoff_t page_no;
+	int child_no;
+
+	page_no = segno >> (sb->s_blocksize_bits - 3);
+	child_no = segno & ((sb->s_blocksize >> 3) - 1);
+
+	inode = super->s_segfile_inode;
+	page = logfs_get_write_page(inode, page_no, 0);
+	BUG_ON(!page); /* FIXME: We need some reserve page for this case */
+	if (!PageUptodate(page))
+		logfs_read_block(inode, page, WRITE);
+
+	if (write)
+		alloc_indirect_block(inode, page, 0);
+	se = kmap_atomic(page, KM_USER0);
+	change_se(se + child_no, arg);
+	if (write) {
+		logfs_set_alias(sb, logfs_block(page), child_no);
+		BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize);
+	}
+	kunmap_atomic(se, KM_USER0);
+
+	logfs_put_write_page(page);
+}
+
+static void __get_segment_entry(struct logfs_segment_entry *se, long _target)
+{
+	struct logfs_segment_entry *target = (void *)_target;
+
+	*target = *se;
+}
+
+void logfs_get_segment_entry(struct super_block *sb, u32 segno,
+		struct logfs_segment_entry *se)
+{
+	logfs_mod_segment_entry(sb, segno, 0, __get_segment_entry, (long)se);
+}
+
+static void __set_segment_used(struct logfs_segment_entry *se, long increment)
+{
+	u32 valid;
+
+	valid = be32_to_cpu(se->valid);
+	valid += increment;
+	se->valid = cpu_to_be32(valid);
+}
+
+void logfs_set_segment_used(struct super_block *sb, u64 ofs, int increment)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u32 segno = ofs >> super->s_segshift;
+
+	if (!increment)
+		return;
+
+	logfs_mod_segment_entry(sb, segno, 1, __set_segment_used, increment);
+}
+
+static void __set_segment_erased(struct logfs_segment_entry *se, long ec_level)
+{
+	se->ec_level = cpu_to_be32(ec_level);
+}
+
+void logfs_set_segment_erased(struct super_block *sb, u32 segno, u32 ec,
+		gc_level_t gc_level)
+{
+	u32 ec_level = ec << 4 | (__force u8)gc_level;
+
+	logfs_mod_segment_entry(sb, segno, 1, __set_segment_erased, ec_level);
+}
+
+static void __set_segment_reserved(struct logfs_segment_entry *se, long ignore)
+{
+	se->valid = cpu_to_be32(RESERVED);
+}
+
+void logfs_set_segment_reserved(struct super_block *sb, u32 segno)
+{
+	logfs_mod_segment_entry(sb, segno, 1, __set_segment_reserved, 0);
+}
+
+static void __set_segment_unreserved(struct logfs_segment_entry *se,
+		long ec_level)
+{
+	se->valid = 0;
+	se->ec_level = cpu_to_be32(ec_level);
+}
+
+void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec)
+{
+	u32 ec_level = ec << 4;
+
+	logfs_mod_segment_entry(sb, segno, 1, __set_segment_unreserved,
+			ec_level);
+}
+
+int __logfs_write_inode(struct inode *inode, long flags)
+{
+	struct super_block *sb = inode->i_sb;
+	int ret;
+
+	logfs_get_wblocks(sb, NULL, flags & WF_LOCK);
+	ret = do_write_inode(inode);
+	logfs_put_wblocks(sb, NULL, flags & WF_LOCK);
+	return ret;
+}
+
+static int do_delete_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct inode *master_inode = logfs_super(sb)->s_master_inode;
+	struct page *page;
+	int ret;
+
+	page = logfs_get_write_page(master_inode, inode->i_ino, 0);
+	if (!page)
+		return -ENOMEM;
+
+	move_inode_to_page(page, inode);
+
+	logfs_get_wblocks(sb, page, 1);
+	ret = __logfs_delete(master_inode, page);
+	logfs_put_wblocks(sb, page, 1);
+
+	logfs_put_write_page(page);
+	return ret;
+}
+
+/*
+ * ZOMBIE inodes have already been deleted before and should remain dead,
+ * if it weren't for valid checking.  No need to kill them again here.
+ */
+void logfs_delete_inode(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
+		li->li_flags |= LOGFS_IF_ZOMBIE;
+		if (i_size_read(inode) > 0)
+			logfs_truncate(inode, 0);
+		do_delete_inode(inode);
+	}
+	truncate_inode_pages(&inode->i_data, 0);
+	clear_inode(inode);
+}
+
+void btree_write_block(struct logfs_block *block)
+{
+	struct inode *inode;
+	struct page *page;
+	int err, cookie;
+
+	inode = logfs_safe_iget(block->sb, block->ino, &cookie);
+	page = logfs_get_write_page(inode, block->bix, block->level);
+
+	err = logfs_readpage_nolock(page);
+	BUG_ON(err);
+	BUG_ON(!PagePrivate(page));
+	BUG_ON(logfs_block(page) != block);
+	err = __logfs_write_buf(inode, page, 0);
+	BUG_ON(err);
+	BUG_ON(PagePrivate(page) || page->private);
+
+	logfs_put_write_page(page);
+	logfs_safe_iput(inode, cookie);
+}
+
+/**
+ * logfs_inode_write - write inode or dentry objects
+ *
+ * @inode:		parent inode (ifile or directory)
+ * @buf:		object to write (inode or dentry)
+ * @n:			object size
+ * @_pos:		object number (file position in blocks/objects)
+ * @flags:		write flags
+ * @lock:		0 if write lock is already taken, 1 otherwise
+ * @shadow_tree:	shadow below this inode
+ *
+ * FIXME: All caller of this put a 200-300 byte variable on the stack,
+ * only to call here and do a memcpy from that stack variable.  A good
+ * example of wasted performance and stack space.
+ */
+int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
+		loff_t bix, long flags, struct shadow_tree *shadow_tree)
+{
+	loff_t pos = bix << inode->i_sb->s_blocksize_bits;
+	int err;
+	struct page *page;
+	void *pagebuf;
+
+	BUG_ON(pos & (LOGFS_BLOCKSIZE-1));
+	BUG_ON(count > LOGFS_BLOCKSIZE);
+	page = logfs_get_write_page(inode, bix, 0);
+	if (!page)
+		return -ENOMEM;
+
+	pagebuf = kmap_atomic(page, KM_USER0);
+	memcpy(pagebuf, buf, count);
+	flush_dcache_page(page);
+	kunmap_atomic(pagebuf, KM_USER0);
+
+	if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE)
+		i_size_write(inode, pos + LOGFS_BLOCKSIZE);
+
+	err = logfs_write_buf(inode, page, flags);
+	logfs_put_write_page(page);
+	return err;
+}
+
+int logfs_open_segfile(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode;
+
+	inode = logfs_read_meta_inode(sb, LOGFS_INO_SEGFILE);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	super->s_segfile_inode = inode;
+	return 0;
+}
+
+int logfs_init_rw(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int min_fill = 3 * super->s_no_blocks;
+
+	INIT_LIST_HEAD(&super->s_object_alias);
+	mutex_init(&super->s_write_mutex);
+	super->s_block_pool = mempool_create_kmalloc_pool(min_fill,
+			sizeof(struct logfs_block));
+	super->s_shadow_pool = mempool_create_kmalloc_pool(min_fill,
+			sizeof(struct logfs_shadow));
+	return 0;
+}
+
+void logfs_cleanup_rw(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	destroy_meta_inode(super->s_segfile_inode);
+	if (super->s_block_pool)
+		mempool_destroy(super->s_block_pool);
+	if (super->s_shadow_pool)
+		mempool_destroy(super->s_shadow_pool);
+}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
new file mode 100644
index 000000000000..5f58b74516ca
--- /dev/null
+++ b/fs/logfs/segment.c
@@ -0,0 +1,924 @@
+/*
+ * fs/logfs/segment.c	- Handling the Object Store
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ * Object store or ostore makes up the complete device with exception of
+ * the superblock and journal areas.  Apart from its own metadata it stores
+ * three kinds of objects: inodes, dentries and blocks, both data and indirect.
+ */
+#include "logfs.h"
+
+static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct btree_head32 *head = &super->s_reserved_segments;
+	int err;
+
+	err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
+	if (err)
+		return err;
+	logfs_super(sb)->s_bad_segments++;
+	/* FIXME: write to journal */
+	return 0;
+}
+
+int logfs_erase_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	super->s_gec++;
+
+	return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
+			super->s_segsize);
+}
+
+static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
+{
+	s32 ofs;
+
+	logfs_open_area(area, bytes);
+
+	ofs = area->a_used_bytes;
+	area->a_used_bytes += bytes;
+	BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
+
+	return dev_ofs(area->a_sb, area->a_segno, ofs);
+}
+
+static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
+		int use_filler)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = super->s_devops->readpage;
+	struct page *page;
+
+	BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
+	if (use_filler)
+		page = read_cache_page(mapping, index, filler, sb);
+	else {
+		page = find_or_create_page(mapping, index, GFP_NOFS);
+		unlock_page(page);
+	}
+	return page;
+}
+
+void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
+		int use_filler)
+{
+	pgoff_t index = ofs >> PAGE_SHIFT;
+	struct page *page;
+	long offset = ofs & (PAGE_SIZE-1);
+	long copylen;
+
+	/* Only logfs_wbuf_recover may use len==0 */
+	BUG_ON(!len && !use_filler);
+	do {
+		copylen = min((ulong)len, PAGE_SIZE - offset);
+
+		page = get_mapping_page(area->a_sb, index, use_filler);
+		SetPageUptodate(page);
+		BUG_ON(!page); /* FIXME: reserve a pool */
+		memcpy(page_address(page) + offset, buf, copylen);
+		SetPagePrivate(page);
+		page_cache_release(page);
+
+		buf += copylen;
+		len -= copylen;
+		offset = 0;
+		index++;
+	} while (len);
+}
+
+/*
+ * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
+ */
+static void pad_wbuf(struct logfs_area *area, int final)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct page *page;
+	u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
+	pgoff_t index = ofs >> PAGE_SHIFT;
+	long offset = ofs & (PAGE_SIZE-1);
+	u32 len = PAGE_SIZE - offset;
+
+	if (len == PAGE_SIZE) {
+		/* The math in this function can surely use some love */
+		len = 0;
+	}
+	if (len) {
+		BUG_ON(area->a_used_bytes >= super->s_segsize);
+
+		page = get_mapping_page(area->a_sb, index, 0);
+		BUG_ON(!page); /* FIXME: reserve a pool */
+		memset(page_address(page) + offset, 0xff, len);
+		SetPagePrivate(page);
+		page_cache_release(page);
+	}
+
+	if (!final)
+		return;
+
+	area->a_used_bytes += len;
+	for ( ; area->a_used_bytes < super->s_segsize;
+			area->a_used_bytes += PAGE_SIZE) {
+		/* Memset another page */
+		index++;
+		page = get_mapping_page(area->a_sb, index, 0);
+		BUG_ON(!page); /* FIXME: reserve a pool */
+		memset(page_address(page), 0xff, PAGE_SIZE);
+		SetPagePrivate(page);
+		page_cache_release(page);
+	}
+}
+
+/*
+ * We have to be careful with the alias tree.  Since lookup is done by bix,
+ * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
+ * indirect blocks.  So always use it through accessor functions.
+ */
+static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
+		level_t level)
+{
+	struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
+	pgoff_t index = logfs_pack_index(bix, level);
+
+	return btree_lookup128(head, ino, index);
+}
+
+static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
+		level_t level, void *val)
+{
+	struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
+	pgoff_t index = logfs_pack_index(bix, level);
+
+	return btree_insert128(head, ino, index, val, GFP_NOFS);
+}
+
+static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
+		write_alias_t *write_one_alias)
+{
+	struct object_alias_item *item;
+	int err;
+
+	list_for_each_entry(item, &block->item_list, list) {
+		err = write_alias_journal(sb, block->ino, block->bix,
+				block->level, item->child_no, item->val);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static gc_level_t btree_block_level(struct logfs_block *block)
+{
+	return expand_level(block->ino, block->level);
+}
+
+static struct logfs_block_ops btree_block_ops = {
+	.write_block	= btree_write_block,
+	.block_level	= btree_block_level,
+	.free_block	= __free_block,
+	.write_alias	= btree_write_alias,
+};
+
+int logfs_load_object_aliases(struct super_block *sb,
+		struct logfs_obj_alias *oa, int count)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_block *block;
+	struct object_alias_item *item;
+	u64 ino, bix;
+	level_t level;
+	int i, err;
+
+	super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
+	count /= sizeof(*oa);
+	for (i = 0; i < count; i++) {
+		item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
+		if (!item)
+			return -ENOMEM;
+		memset(item, 0, sizeof(*item));
+
+		super->s_no_object_aliases++;
+		item->val = oa[i].val;
+		item->child_no = be16_to_cpu(oa[i].child_no);
+
+		ino = be64_to_cpu(oa[i].ino);
+		bix = be64_to_cpu(oa[i].bix);
+		level = LEVEL(oa[i].level);
+
+		log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
+				ino, bix, level, item->child_no,
+				be64_to_cpu(item->val));
+		block = alias_tree_lookup(sb, ino, bix, level);
+		if (!block) {
+			block = __alloc_block(sb, ino, bix, level);
+			block->ops = &btree_block_ops;
+			err = alias_tree_insert(sb, ino, bix, level, block);
+			BUG_ON(err); /* mempool empty */
+		}
+		if (test_and_set_bit(item->child_no, block->alias_map)) {
+			printk(KERN_ERR"LogFS: Alias collision detected\n");
+			return -EIO;
+		}
+		list_move_tail(&block->alias_list, &super->s_object_alias);
+		list_add(&item->list, &block->item_list);
+	}
+	return 0;
+}
+
+static void kill_alias(void *_block, unsigned long ignore0,
+		u64 ignore1, u64 ignore2, size_t ignore3)
+{
+	struct logfs_block *block = _block;
+	struct super_block *sb = block->sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct object_alias_item *item;
+
+	while (!list_empty(&block->item_list)) {
+		item = list_entry(block->item_list.next, typeof(*item), list);
+		list_del(&item->list);
+		mempool_free(item, super->s_alias_pool);
+	}
+	block->ops->free_block(sb, block);
+}
+
+static int obj_type(struct inode *inode, level_t level)
+{
+	if (level == 0) {
+		if (S_ISDIR(inode->i_mode))
+			return OBJ_DENTRY;
+		if (inode->i_ino == LOGFS_INO_MASTER)
+			return OBJ_INODE;
+	}
+	return OBJ_BLOCK;
+}
+
+static int obj_len(struct super_block *sb, int obj_type)
+{
+	switch (obj_type) {
+	case OBJ_DENTRY:
+		return sizeof(struct logfs_disk_dentry);
+	case OBJ_INODE:
+		return sizeof(struct logfs_disk_inode);
+	case OBJ_BLOCK:
+		return sb->s_blocksize;
+	default:
+		BUG();
+	}
+}
+
+static int __logfs_segment_write(struct inode *inode, void *buf,
+		struct logfs_shadow *shadow, int type, int len, int compr)
+{
+	struct logfs_area *area;
+	struct super_block *sb = inode->i_sb;
+	s64 ofs;
+	struct logfs_object_header h;
+	int acc_len;
+
+	if (shadow->gc_level == 0)
+		acc_len = len;
+	else
+		acc_len = obj_len(sb, type);
+
+	area = get_area(sb, shadow->gc_level);
+	ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
+	LOGFS_BUG_ON(ofs <= 0, sb);
+	/*
+	 * Order is important.  logfs_get_free_bytes(), by modifying the
+	 * segment file, may modify the content of the very page we're about
+	 * to write now.  Which is fine, as long as the calculated crc and
+	 * written data still match.  So do the modifications _before_
+	 * calculating the crc.
+	 */
+
+	h.len	= cpu_to_be16(len);
+	h.type	= type;
+	h.compr	= compr;
+	h.ino	= cpu_to_be64(inode->i_ino);
+	h.bix	= cpu_to_be64(shadow->bix);
+	h.crc	= logfs_crc32(&h, sizeof(h) - 4, 4);
+	h.data_crc = logfs_crc32(buf, len, 0);
+
+	logfs_buf_write(area, ofs, &h, sizeof(h));
+	logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
+
+	shadow->new_ofs = ofs;
+	shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
+
+	return 0;
+}
+
+static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
+		struct logfs_shadow *shadow, int type, int len)
+{
+	struct super_block *sb = inode->i_sb;
+	void *compressor_buf = logfs_super(sb)->s_compressed_je;
+	ssize_t compr_len;
+	int ret;
+
+	mutex_lock(&logfs_super(sb)->s_journal_mutex);
+	compr_len = logfs_compress(buf, compressor_buf, len, len);
+
+	if (compr_len >= 0) {
+		ret = __logfs_segment_write(inode, compressor_buf, shadow,
+				type, compr_len, COMPR_ZLIB);
+	} else {
+		ret = __logfs_segment_write(inode, buf, shadow, type, len,
+				COMPR_NONE);
+	}
+	mutex_unlock(&logfs_super(sb)->s_journal_mutex);
+	return ret;
+}
+
+/**
+ * logfs_segment_write - write data block to object store
+ * @inode:		inode containing data
+ *
+ * Returns an errno or zero.
+ */
+int logfs_segment_write(struct inode *inode, struct page *page,
+		struct logfs_shadow *shadow)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+	int do_compress, type, len;
+	int ret;
+	void *buf;
+
+	BUG_ON(logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
+	do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
+	if (shadow->gc_level != 0) {
+		/* temporarily disable compression for indirect blocks */
+		do_compress = 0;
+	}
+
+	type = obj_type(inode, shrink_level(shadow->gc_level));
+	len = obj_len(sb, type);
+	buf = kmap(page);
+	if (do_compress)
+		ret = logfs_segment_write_compress(inode, buf, shadow, type,
+				len);
+	else
+		ret = __logfs_segment_write(inode, buf, shadow, type, len,
+				COMPR_NONE);
+	kunmap(page);
+
+	log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
+			shadow->ino, shadow->bix, shadow->gc_level,
+			shadow->old_ofs, shadow->new_ofs,
+			shadow->old_len, shadow->new_len);
+	/* this BUG_ON did catch a locking bug.  useful */
+	BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
+	return ret;
+}
+
+int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
+{
+	pgoff_t index = ofs >> PAGE_SHIFT;
+	struct page *page;
+	long offset = ofs & (PAGE_SIZE-1);
+	long copylen;
+
+	while (len) {
+		copylen = min((ulong)len, PAGE_SIZE - offset);
+
+		page = get_mapping_page(sb, index, 1);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		memcpy(buf, page_address(page) + offset, copylen);
+		page_cache_release(page);
+
+		buf += copylen;
+		len -= copylen;
+		offset = 0;
+		index++;
+	}
+	return 0;
+}
+
+/*
+ * The "position" of indirect blocks is ambiguous.  It can be the position
+ * of any data block somewhere behind this indirect block.  So we need to
+ * normalize the positions through logfs_block_mask() before comparing.
+ */
+static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
+{
+	return	(pos1 & logfs_block_mask(sb, level)) !=
+		(pos2 & logfs_block_mask(sb, level));
+}
+
+#if 0
+static int read_seg_header(struct super_block *sb, u64 ofs,
+		struct logfs_segment_header *sh)
+{
+	__be32 crc;
+	int err;
+
+	err = wbuf_read(sb, ofs, sizeof(*sh), sh);
+	if (err)
+		return err;
+	crc = logfs_crc32(sh, sizeof(*sh), 4);
+	if (crc != sh->crc) {
+		printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
+				"got %x\n", ofs, be32_to_cpu(sh->crc),
+				be32_to_cpu(crc));
+		return -EIO;
+	}
+	return 0;
+}
+#endif
+
+static int read_obj_header(struct super_block *sb, u64 ofs,
+		struct logfs_object_header *oh)
+{
+	__be32 crc;
+	int err;
+
+	err = wbuf_read(sb, ofs, sizeof(*oh), oh);
+	if (err)
+		return err;
+	crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
+	if (crc != oh->crc) {
+		printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
+				"got %x\n", ofs, be32_to_cpu(oh->crc),
+				be32_to_cpu(crc));
+		return -EIO;
+	}
+	return 0;
+}
+
+static void move_btree_to_page(struct inode *inode, struct page *page,
+		__be64 *data)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct btree_head128 *head = &super->s_object_alias_tree;
+	struct logfs_block *block;
+	struct object_alias_item *item, *next;
+
+	if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
+		return;
+
+	block = btree_remove128(head, inode->i_ino, page->index);
+	if (!block)
+		return;
+
+	log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
+			block->ino, block->bix, block->level);
+	list_for_each_entry_safe(item, next, &block->item_list, list) {
+		data[item->child_no] = item->val;
+		list_del(&item->list);
+		mempool_free(item, super->s_alias_pool);
+	}
+	block->page = page;
+	SetPagePrivate(page);
+	page->private = (unsigned long)block;
+	block->ops = &indirect_block_ops;
+	initialize_block_counters(page, block, data, 0);
+}
+
+/*
+ * This silences a false, yet annoying gcc warning.  I hate it when my editor
+ * jumps into bitops.h each time I recompile this file.
+ * TODO: Complain to gcc folks about this and upgrade compiler.
+ */
+static unsigned long fnb(const unsigned long *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_bit(addr, size, offset);
+}
+
+void move_page_to_btree(struct page *page)
+{
+	struct logfs_block *block = logfs_block(page);
+	struct super_block *sb = block->sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct object_alias_item *item;
+	unsigned long pos;
+	__be64 *child;
+	int err;
+
+	if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
+		block->ops->free_block(sb, block);
+		return;
+	}
+	log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
+			block->ino, block->bix, block->level);
+	super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
+
+	for (pos = 0; ; pos++) {
+		pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
+		if (pos >= LOGFS_BLOCK_FACTOR)
+			break;
+
+		item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
+		BUG_ON(!item); /* mempool empty */
+		memset(item, 0, sizeof(*item));
+
+		child = kmap_atomic(page, KM_USER0);
+		item->val = child[pos];
+		kunmap_atomic(child, KM_USER0);
+		item->child_no = pos;
+		list_add(&item->list, &block->item_list);
+	}
+	block->page = NULL;
+	ClearPagePrivate(page);
+	page->private = 0;
+	block->ops = &btree_block_ops;
+	err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
+			block);
+	BUG_ON(err); /* mempool empty */
+	ClearPageUptodate(page);
+}
+
+static int __logfs_segment_read(struct inode *inode, void *buf,
+		u64 ofs, u64 bix, level_t level)
+{
+	struct super_block *sb = inode->i_sb;
+	void *compressor_buf = logfs_super(sb)->s_compressed_je;
+	struct logfs_object_header oh;
+	__be32 crc;
+	u16 len;
+	int err, block_len;
+
+	block_len = obj_len(sb, obj_type(inode, level));
+	err = read_obj_header(sb, ofs, &oh);
+	if (err)
+		goto out_err;
+
+	err = -EIO;
+	if (be64_to_cpu(oh.ino) != inode->i_ino
+			|| check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
+		printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
+				"expected (%lx, %llx), got (%llx, %llx)\n",
+				ofs, inode->i_ino, bix,
+				be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
+		goto out_err;
+	}
+
+	len = be16_to_cpu(oh.len);
+
+	switch (oh.compr) {
+	case COMPR_NONE:
+		err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
+		if (err)
+			goto out_err;
+		crc = logfs_crc32(buf, len, 0);
+		if (crc != oh.data_crc) {
+			printk(KERN_ERR"LOGFS: uncompressed data crc error at "
+					"%llx: expected %x, got %x\n", ofs,
+					be32_to_cpu(oh.data_crc),
+					be32_to_cpu(crc));
+			goto out_err;
+		}
+		break;
+	case COMPR_ZLIB:
+		mutex_lock(&logfs_super(sb)->s_journal_mutex);
+		err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
+				compressor_buf);
+		if (err) {
+			mutex_unlock(&logfs_super(sb)->s_journal_mutex);
+			goto out_err;
+		}
+		crc = logfs_crc32(compressor_buf, len, 0);
+		if (crc != oh.data_crc) {
+			printk(KERN_ERR"LOGFS: compressed data crc error at "
+					"%llx: expected %x, got %x\n", ofs,
+					be32_to_cpu(oh.data_crc),
+					be32_to_cpu(crc));
+			mutex_unlock(&logfs_super(sb)->s_journal_mutex);
+			goto out_err;
+		}
+		err = logfs_uncompress(compressor_buf, buf, len, block_len);
+		mutex_unlock(&logfs_super(sb)->s_journal_mutex);
+		if (err) {
+			printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
+			goto out_err;
+		}
+		break;
+	default:
+		LOGFS_BUG(sb);
+		err = -EIO;
+		goto out_err;
+	}
+	return 0;
+
+out_err:
+	logfs_set_ro(sb);
+	printk(KERN_ERR"LOGFS: device is read-only now\n");
+	LOGFS_BUG(sb);
+	return err;
+}
+
+/**
+ * logfs_segment_read - read data block from object store
+ * @inode:		inode containing data
+ * @buf:		data buffer
+ * @ofs:		physical data offset
+ * @bix:		block index
+ * @level:		block level
+ *
+ * Returns 0 on success or a negative errno.
+ */
+int logfs_segment_read(struct inode *inode, struct page *page,
+		u64 ofs, u64 bix, level_t level)
+{
+	int err;
+	void *buf;
+
+	if (PageUptodate(page))
+		return 0;
+
+	ofs &= ~LOGFS_FULLY_POPULATED;
+
+	buf = kmap(page);
+	err = __logfs_segment_read(inode, buf, ofs, bix, level);
+	if (!err) {
+		move_btree_to_page(inode, page, buf);
+		SetPageUptodate(page);
+	}
+	kunmap(page);
+	log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
+			inode->i_ino, bix, level, ofs, err);
+	return err;
+}
+
+int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_object_header h;
+	u16 len;
+	int err;
+
+	BUG_ON(logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
+	BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
+	if (!shadow->old_ofs)
+		return 0;
+
+	log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
+			shadow->ino, shadow->bix, shadow->gc_level,
+			shadow->old_ofs, shadow->new_ofs,
+			shadow->old_len, shadow->new_len);
+	err = read_obj_header(sb, shadow->old_ofs, &h);
+	LOGFS_BUG_ON(err, sb);
+	LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
+	LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
+				shrink_level(shadow->gc_level)), sb);
+
+	if (shadow->gc_level == 0)
+		len = be16_to_cpu(h.len);
+	else
+		len = obj_len(sb, h.type);
+	shadow->old_len = len + sizeof(h);
+	return 0;
+}
+
+static void freeseg(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct page *page;
+	u64 ofs, start, end;
+
+	start = dev_ofs(sb, segno, 0);
+	end = dev_ofs(sb, segno + 1, 0);
+	for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
+		page = find_get_page(mapping, ofs >> PAGE_SHIFT);
+		if (!page)
+			continue;
+		ClearPagePrivate(page);
+		page_cache_release(page);
+	}
+}
+
+int logfs_open_area(struct logfs_area *area, size_t bytes)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+	int err, closed = 0;
+
+	if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
+		return 0;
+
+	if (area->a_is_open) {
+		u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
+		u32 len = super->s_segsize - area->a_written_bytes;
+
+		log_gc("logfs_close_area(%x)\n", area->a_segno);
+		pad_wbuf(area, 1);
+		super->s_devops->writeseg(area->a_sb, ofs, len);
+		freeseg(sb, area->a_segno);
+		closed = 1;
+	}
+
+	area->a_used_bytes = 0;
+	area->a_written_bytes = 0;
+again:
+	area->a_ops->get_free_segment(area);
+	area->a_ops->get_erase_count(area);
+
+	log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
+	err = area->a_ops->erase_segment(area);
+	if (err) {
+		printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
+				area->a_segno);
+		logfs_mark_segment_bad(sb, area->a_segno);
+		goto again;
+	}
+	area->a_is_open = 1;
+	return closed;
+}
+
+void logfs_sync_area(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+	u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
+	u32 len = (area->a_used_bytes - area->a_written_bytes);
+
+	if (super->s_writesize)
+		len &= ~(super->s_writesize - 1);
+	if (len == 0)
+		return;
+	pad_wbuf(area, 0);
+	super->s_devops->writeseg(sb, ofs, len);
+	area->a_written_bytes += len;
+}
+
+void logfs_sync_segments(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	for_each_area(i)
+		logfs_sync_area(super->s_area[i]);
+}
+
+/*
+ * Pick a free segment to be used for this area.  Effectively takes a
+ * candidate from the free list (not really a candidate anymore).
+ */
+static void ostore_get_free_segment(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+
+	if (super->s_free_list.count == 0) {
+		printk(KERN_ERR"LOGFS: ran out of free segments\n");
+		LOGFS_BUG(sb);
+	}
+
+	area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
+}
+
+static void ostore_get_erase_count(struct logfs_area *area)
+{
+	struct logfs_segment_entry se;
+	u32 ec_level;
+
+	logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
+	BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
+			se.valid == cpu_to_be32(RESERVED));
+
+	ec_level = be32_to_cpu(se.ec_level);
+	area->a_erase_count = (ec_level >> 4) + 1;
+}
+
+static int ostore_erase_segment(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_segment_header sh;
+	u64 ofs;
+	int err;
+
+	err = logfs_erase_segment(sb, area->a_segno);
+	if (err)
+		return err;
+
+	sh.pad = 0;
+	sh.type = SEG_OSTORE;
+	sh.level = (__force u8)area->a_level;
+	sh.segno = cpu_to_be32(area->a_segno);
+	sh.ec = cpu_to_be32(area->a_erase_count);
+	sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
+	sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
+
+	logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
+			area->a_level);
+
+	ofs = dev_ofs(sb, area->a_segno, 0);
+	area->a_used_bytes = sizeof(sh);
+	logfs_buf_write(area, ofs, &sh, sizeof(sh));
+	return 0;
+}
+
+static const struct logfs_area_ops ostore_area_ops = {
+	.get_free_segment	= ostore_get_free_segment,
+	.get_erase_count	= ostore_get_erase_count,
+	.erase_segment		= ostore_erase_segment,
+};
+
+static void free_area(struct logfs_area *area)
+{
+	if (area)
+		freeseg(area->a_sb, area->a_segno);
+	kfree(area);
+}
+
+static struct logfs_area *alloc_area(struct super_block *sb)
+{
+	struct logfs_area *area;
+
+	area = kzalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+
+	area->a_sb = sb;
+	return area;
+}
+
+static void map_invalidatepage(struct page *page, unsigned long l)
+{
+	BUG();
+}
+
+static int map_releasepage(struct page *page, gfp_t g)
+{
+	/* Don't release these pages */
+	return 0;
+}
+
+static const struct address_space_operations mapping_aops = {
+	.invalidatepage = map_invalidatepage,
+	.releasepage	= map_releasepage,
+	.set_page_dirty = __set_page_dirty_nobuffers,
+};
+
+int logfs_init_mapping(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping;
+	struct inode *inode;
+
+	inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	super->s_mapping_inode = inode;
+	mapping = inode->i_mapping;
+	mapping->a_ops = &mapping_aops;
+	/* Would it be possible to use __GFP_HIGHMEM as well? */
+	mapping_set_gfp_mask(mapping, GFP_NOFS);
+	return 0;
+}
+
+int logfs_init_areas(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i = -1;
+
+	super->s_alias_pool = mempool_create_kmalloc_pool(600,
+			sizeof(struct object_alias_item));
+	if (!super->s_alias_pool)
+		return -ENOMEM;
+
+	super->s_journal_area = alloc_area(sb);
+	if (!super->s_journal_area)
+		goto err;
+
+	for_each_area(i) {
+		super->s_area[i] = alloc_area(sb);
+		if (!super->s_area[i])
+			goto err;
+		super->s_area[i]->a_level = GC_LEVEL(i);
+		super->s_area[i]->a_ops = &ostore_area_ops;
+	}
+	btree_init_mempool128(&super->s_object_alias_tree,
+			super->s_btree_pool);
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		free_area(super->s_area[i]);
+	free_area(super->s_journal_area);
+	mempool_destroy(super->s_alias_pool);
+	return -ENOMEM;
+}
+
+void logfs_cleanup_areas(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
+	for_each_area(i)
+		free_area(super->s_area[i]);
+	free_area(super->s_journal_area);
+	destroy_meta_inode(super->s_mapping_inode);
+}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
new file mode 100644
index 000000000000..d128a2c1c8d1
--- /dev/null
+++ b/fs/logfs/super.c
@@ -0,0 +1,634 @@
+/*
+ * fs/logfs/super.c
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ * Generally contains mount/umount code and also serves as a dump area for
+ * any functions that don't fit elsewhere and neither justify a file of their
+ * own.
+ */
+#include "logfs.h"
+#include <linux/bio.h>
+#include <linux/mtd/mtd.h>
+#include <linux/statfs.h>
+#include <linux/buffer_head.h>
+
+static DEFINE_MUTEX(emergency_mutex);
+static struct page *emergency_page;
+
+struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
+{
+	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+	struct page *page;
+	int err;
+
+	page = read_cache_page(mapping, index, filler, NULL);
+	if (page)
+		return page;
+
+	/* No more pages available, switch to emergency page */
+	printk(KERN_INFO"Logfs: Using emergency page\n");
+	mutex_lock(&emergency_mutex);
+	err = filler(NULL, emergency_page);
+	if (err) {
+		mutex_unlock(&emergency_mutex);
+		printk(KERN_EMERG"Logfs: Error reading emergency page\n");
+		return ERR_PTR(err);
+	}
+	return emergency_page;
+}
+
+void emergency_read_end(struct page *page)
+{
+	if (page == emergency_page)
+		mutex_unlock(&emergency_mutex);
+	else
+		page_cache_release(page);
+}
+
+static void dump_segfile(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_segment_entry se;
+	u32 segno;
+
+	for (segno = 0; segno < super->s_no_segs; segno++) {
+		logfs_get_segment_entry(sb, segno, &se);
+		printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
+				be32_to_cpu(se.valid));
+		if (++segno < super->s_no_segs) {
+			logfs_get_segment_entry(sb, segno, &se);
+			printk(" %6x %8x", be32_to_cpu(se.ec_level),
+					be32_to_cpu(se.valid));
+		}
+		if (++segno < super->s_no_segs) {
+			logfs_get_segment_entry(sb, segno, &se);
+			printk(" %6x %8x", be32_to_cpu(se.ec_level),
+					be32_to_cpu(se.valid));
+		}
+		if (++segno < super->s_no_segs) {
+			logfs_get_segment_entry(sb, segno, &se);
+			printk(" %6x %8x", be32_to_cpu(se.ec_level),
+					be32_to_cpu(se.valid));
+		}
+		printk("\n");
+	}
+}
+
+/*
+ * logfs_crash_dump - dump debug information to device
+ *
+ * The LogFS superblock only occupies part of a segment.  This function will
+ * write as much debug information as it can gather into the spare space.
+ */
+void logfs_crash_dump(struct super_block *sb)
+{
+	dump_segfile(sb);
+}
+
+/*
+ * TODO: move to lib/string.c
+ */
+/**
+ * memchr_inv - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first character other than @c, or %NULL
+ * if the whole buffer contains just @c.
+ */
+void *memchr_inv(const void *s, int c, size_t n)
+{
+	const unsigned char *p = s;
+	while (n-- != 0)
+		if ((unsigned char)c != *p++)
+			return (void *)(p - 1);
+
+	return NULL;
+}
+
+/*
+ * FIXME: There should be a reserve for root, similar to ext2.
+ */
+int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct logfs_super *super = logfs_super(sb);
+
+	stats->f_type		= LOGFS_MAGIC_U32;
+	stats->f_bsize		= sb->s_blocksize;
+	stats->f_blocks		= super->s_size >> LOGFS_BLOCK_BITS >> 3;
+	stats->f_bfree		= super->s_free_bytes >> sb->s_blocksize_bits;
+	stats->f_bavail		= super->s_free_bytes >> sb->s_blocksize_bits;
+	stats->f_files		= 0;
+	stats->f_ffree		= 0;
+	stats->f_namelen	= LOGFS_MAX_NAMELEN;
+	return 0;
+}
+
+static int logfs_sb_set(struct super_block *sb, void *_super)
+{
+	struct logfs_super *super = _super;
+
+	sb->s_fs_info = super;
+	sb->s_mtd = super->s_mtd;
+	sb->s_bdev = super->s_bdev;
+	return 0;
+}
+
+static int logfs_sb_test(struct super_block *sb, void *_super)
+{
+	struct logfs_super *super = _super;
+	struct mtd_info *mtd = super->s_mtd;
+
+	if (mtd && sb->s_mtd == mtd)
+		return 1;
+	if (super->s_bdev && sb->s_bdev == super->s_bdev)
+		return 1;
+	return 0;
+}
+
+static void set_segment_header(struct logfs_segment_header *sh, u8 type,
+		u8 level, u32 segno, u32 ec)
+{
+	sh->pad = 0;
+	sh->type = type;
+	sh->level = level;
+	sh->segno = cpu_to_be32(segno);
+	sh->ec = cpu_to_be32(ec);
+	sh->gec = cpu_to_be64(segno);
+	sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
+}
+
+static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
+		u32 segno, u32 ec)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_segment_header *sh = &ds->ds_sh;
+	int i;
+
+	memset(ds, 0, sizeof(*ds));
+	set_segment_header(sh, SEG_SUPER, 0, segno, ec);
+
+	ds->ds_ifile_levels	= super->s_ifile_levels;
+	ds->ds_iblock_levels	= super->s_iblock_levels;
+	ds->ds_data_levels	= super->s_data_levels; /* XXX: Remove */
+	ds->ds_segment_shift	= super->s_segshift;
+	ds->ds_block_shift	= sb->s_blocksize_bits;
+	ds->ds_write_shift	= super->s_writeshift;
+	ds->ds_filesystem_size	= cpu_to_be64(super->s_size);
+	ds->ds_segment_size	= cpu_to_be32(super->s_segsize);
+	ds->ds_bad_seg_reserve	= cpu_to_be32(super->s_bad_seg_reserve);
+	ds->ds_feature_incompat	= cpu_to_be64(super->s_feature_incompat);
+	ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
+	ds->ds_feature_compat	= cpu_to_be64(super->s_feature_compat);
+	ds->ds_feature_flags	= cpu_to_be64(super->s_feature_flags);
+	ds->ds_root_reserve	= cpu_to_be64(super->s_root_reserve);
+	ds->ds_speed_reserve	= cpu_to_be64(super->s_speed_reserve);
+	journal_for_each(i)
+		ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
+	ds->ds_magic		= cpu_to_be64(LOGFS_MAGIC);
+	ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
+			LOGFS_SEGMENT_HEADERSIZE + 12);
+}
+
+static int write_one_sb(struct super_block *sb,
+		struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_disk_super *ds;
+	struct logfs_segment_entry se;
+	struct page *page;
+	u64 ofs;
+	u32 ec, segno;
+	int err;
+
+	page = find_sb(sb, &ofs);
+	if (!page)
+		return -EIO;
+	ds = page_address(page);
+	segno = seg_no(sb, ofs);
+	logfs_get_segment_entry(sb, segno, &se);
+	ec = be32_to_cpu(se.ec_level) >> 4;
+	ec++;
+	logfs_set_segment_erased(sb, segno, ec, 0);
+	logfs_write_ds(sb, ds, segno, ec);
+	err = super->s_devops->write_sb(sb, page);
+	page_cache_release(page);
+	return err;
+}
+
+int logfs_write_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int err;
+
+	/* First superblock */
+	err = write_one_sb(sb, super->s_devops->find_first_sb);
+	if (err)
+		return err;
+
+	/* Last superblock */
+	err = write_one_sb(sb, super->s_devops->find_last_sb);
+	if (err)
+		return err;
+	return 0;
+}
+
+static int ds_cmp(const void *ds0, const void *ds1)
+{
+	size_t len = sizeof(struct logfs_disk_super);
+
+	/* We know the segment headers differ, so ignore them */
+	len -= LOGFS_SEGMENT_HEADERSIZE;
+	ds0 += LOGFS_SEGMENT_HEADERSIZE;
+	ds1 += LOGFS_SEGMENT_HEADERSIZE;
+	return memcmp(ds0, ds1, len);
+}
+
+static int logfs_recover_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_disk_super _ds0, *ds0 = &_ds0;
+	struct logfs_disk_super _ds1, *ds1 = &_ds1;
+	int err, valid0, valid1;
+
+	/* read first superblock */
+	err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
+	if (err)
+		return err;
+	/* read last superblock */
+	err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
+	if (err)
+		return err;
+	valid0 = logfs_check_ds(ds0) == 0;
+	valid1 = logfs_check_ds(ds1) == 0;
+
+	if (!valid0 && valid1) {
+		printk(KERN_INFO"First superblock is invalid - fixing.\n");
+		return write_one_sb(sb, super->s_devops->find_first_sb);
+	}
+	if (valid0 && !valid1) {
+		printk(KERN_INFO"Last superblock is invalid - fixing.\n");
+		return write_one_sb(sb, super->s_devops->find_last_sb);
+	}
+	if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
+		printk(KERN_INFO"Superblocks don't match - fixing.\n");
+		return write_one_sb(sb, super->s_devops->find_last_sb);
+	}
+	/* If neither is valid now, something's wrong.  Didn't we properly
+	 * check them before?!? */
+	BUG_ON(!valid0 && !valid1);
+	return 0;
+}
+
+static int logfs_make_writeable(struct super_block *sb)
+{
+	int err;
+
+	/* Repair any broken superblock copies */
+	err = logfs_recover_sb(sb);
+	if (err)
+		return err;
+
+	/* Check areas for trailing unaccounted data */
+	err = logfs_check_areas(sb);
+	if (err)
+		return err;
+
+	err = logfs_open_segfile(sb);
+	if (err)
+		return err;
+
+	/* Do one GC pass before any data gets dirtied */
+	logfs_gc_pass(sb);
+
+	/* after all initializations are done, replay the journal
+	 * for rw-mounts, if necessary */
+	err = logfs_replay_journal(sb);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
+{
+	struct inode *rootdir;
+	int err;
+
+	/* root dir */
+	rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
+	if (IS_ERR(rootdir))
+		goto fail;
+
+	sb->s_root = d_alloc_root(rootdir);
+	if (!sb->s_root)
+		goto fail;
+
+	/* FIXME: check for read-only mounts */
+	err = logfs_make_writeable(sb);
+	if (err)
+		goto fail2;
+
+	log_super("LogFS: Finished mounting\n");
+	simple_set_mnt(mnt, sb);
+	return 0;
+
+fail2:
+	iput(rootdir);
+fail:
+	iput(logfs_super(sb)->s_master_inode);
+	return -EIO;
+}
+
+int logfs_check_ds(struct logfs_disk_super *ds)
+{
+	struct logfs_segment_header *sh = &ds->ds_sh;
+
+	if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
+		return -EINVAL;
+	if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
+		return -EINVAL;
+	if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
+				LOGFS_SEGMENT_HEADERSIZE + 12))
+		return -EINVAL;
+	return 0;
+}
+
+static struct page *find_super_block(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct page *first, *last;
+
+	first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
+	if (!first || IS_ERR(first))
+		return NULL;
+	last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
+	if (!last || IS_ERR(first)) {
+		page_cache_release(first);
+		return NULL;
+	}
+
+	if (!logfs_check_ds(page_address(first))) {
+		page_cache_release(last);
+		return first;
+	}
+
+	/* First one didn't work, try the second superblock */
+	if (!logfs_check_ds(page_address(last))) {
+		page_cache_release(first);
+		return last;
+	}
+
+	/* Neither worked, sorry folks */
+	page_cache_release(first);
+	page_cache_release(last);
+	return NULL;
+}
+
+static int __logfs_read_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct page *page;
+	struct logfs_disk_super *ds;
+	int i;
+
+	page = find_super_block(sb);
+	if (!page)
+		return -EIO;
+
+	ds = page_address(page);
+	super->s_size = be64_to_cpu(ds->ds_filesystem_size);
+	super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
+	super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
+	super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
+	super->s_segsize = 1 << ds->ds_segment_shift;
+	super->s_segmask = (1 << ds->ds_segment_shift) - 1;
+	super->s_segshift = ds->ds_segment_shift;
+	sb->s_blocksize = 1 << ds->ds_block_shift;
+	sb->s_blocksize_bits = ds->ds_block_shift;
+	super->s_writesize = 1 << ds->ds_write_shift;
+	super->s_writeshift = ds->ds_write_shift;
+	super->s_no_segs = super->s_size >> super->s_segshift;
+	super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
+	super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
+	super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
+	super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
+	super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
+
+	journal_for_each(i)
+		super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
+
+	super->s_ifile_levels = ds->ds_ifile_levels;
+	super->s_iblock_levels = ds->ds_iblock_levels;
+	super->s_data_levels = ds->ds_data_levels;
+	super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
+		+ super->s_data_levels;
+	page_cache_release(page);
+	return 0;
+}
+
+static int logfs_read_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int ret;
+
+	super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
+	if (!super->s_btree_pool)
+		return -ENOMEM;
+
+	btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
+	btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
+
+	ret = logfs_init_mapping(sb);
+	if (ret)
+		return ret;
+
+	ret = __logfs_read_sb(sb);
+	if (ret)
+		return ret;
+
+	mutex_init(&super->s_dirop_mutex);
+	mutex_init(&super->s_object_alias_mutex);
+	INIT_LIST_HEAD(&super->s_freeing_list);
+
+	ret = logfs_init_rw(sb);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_areas(sb);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_gc(sb);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_journal(sb);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void logfs_kill_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	log_super("LogFS: Start unmounting\n");
+	/* Alias entries slow down mount, so evict as many as possible */
+	sync_filesystem(sb);
+	logfs_write_anchor(super->s_master_inode);
+
+	/*
+	 * From this point on alias entries are simply dropped - and any
+	 * writes to the object store are considered bugs.
+	 */
+	super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
+	log_super("LogFS: Now in shutdown\n");
+	generic_shutdown_super(sb);
+
+	BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
+
+	logfs_cleanup_gc(sb);
+	logfs_cleanup_journal(sb);
+	logfs_cleanup_areas(sb);
+	logfs_cleanup_rw(sb);
+	super->s_devops->put_device(sb);
+	mempool_destroy(super->s_btree_pool);
+	mempool_destroy(super->s_alias_pool);
+	kfree(super);
+	log_super("LogFS: Finished unmounting\n");
+}
+
+int logfs_get_sb_device(struct file_system_type *type, int flags,
+		struct mtd_info *mtd, struct block_device *bdev,
+		const struct logfs_device_ops *devops, struct vfsmount *mnt)
+{
+	struct logfs_super *super;
+	struct super_block *sb;
+	int err = -ENOMEM;
+	static int mount_count;
+
+	log_super("LogFS: Start mount %x\n", mount_count++);
+	super = kzalloc(sizeof(*super), GFP_KERNEL);
+	if (!super)
+		goto err0;
+
+	super->s_mtd	= mtd;
+	super->s_bdev	= bdev;
+	err = -EINVAL;
+	sb = sget(type, logfs_sb_test, logfs_sb_set, super);
+	if (IS_ERR(sb))
+		goto err0;
+
+	if (sb->s_root) {
+		/* Device is already in use */
+		err = 0;
+		simple_set_mnt(mnt, sb);
+		goto err0;
+	}
+
+	super->s_devops = devops;
+
+	/*
+	 * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
+	 * only covers 16TB and the upper 8TB are used for indirect blocks.
+	 * On 64bit system we could bump up the limit, but that would make
+	 * the filesystem incompatible with 32bit systems.
+	 */
+	sb->s_maxbytes	= (1ull << 43) - 1;
+	sb->s_op	= &logfs_super_operations;
+	sb->s_flags	= flags | MS_NOATIME;
+
+	err = logfs_read_sb(sb);
+	if (err)
+		goto err1;
+
+	sb->s_flags |= MS_ACTIVE;
+	err = logfs_get_sb_final(sb, mnt);
+	if (err)
+		goto err1;
+	return 0;
+
+err1:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	return err;
+err0:
+	kfree(super);
+	//devops->put_device(sb);
+	return err;
+}
+
+static int logfs_get_sb(struct file_system_type *type, int flags,
+		const char *devname, void *data, struct vfsmount *mnt)
+{
+	ulong mtdnr;
+
+	if (!devname)
+		return logfs_get_sb_bdev(type, flags, devname, mnt);
+	if (strncmp(devname, "mtd", 3))
+		return logfs_get_sb_bdev(type, flags, devname, mnt);
+
+	{
+		char *garbage;
+		mtdnr = simple_strtoul(devname+3, &garbage, 0);
+		if (*garbage)
+			return -EINVAL;
+	}
+
+	return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+}
+
+static struct file_system_type logfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "logfs",
+	.get_sb		= logfs_get_sb,
+	.kill_sb	= logfs_kill_sb,
+	.fs_flags	= FS_REQUIRES_DEV,
+
+};
+
+static int __init logfs_init(void)
+{
+	int ret;
+
+	emergency_page = alloc_pages(GFP_KERNEL, 0);
+	if (!emergency_page)
+		return -ENOMEM;
+
+	ret = logfs_compr_init();
+	if (ret)
+		goto out1;
+
+	ret = logfs_init_inode_cache();
+	if (ret)
+		goto out2;
+
+	return register_filesystem(&logfs_fs_type);
+out2:
+	logfs_compr_exit();
+out1:
+	__free_pages(emergency_page, 0);
+	return ret;
+}
+
+static void __exit logfs_exit(void)
+{
+	unregister_filesystem(&logfs_fs_type);
+	logfs_destroy_inode_cache();
+	logfs_compr_exit();
+	__free_pages(emergency_page, 0);
+}
+
+module_init(logfs_init);
+module_exit(logfs_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
+MODULE_DESCRIPTION("scalable flash filesystem");
diff --git a/include/linux/btree-128.h b/include/linux/btree-128.h
new file mode 100644
index 000000000000..0b3414c4c928
--- /dev/null
+++ b/include/linux/btree-128.h
@@ -0,0 +1,109 @@
+extern struct btree_geo btree_geo128;
+
+struct btree_head128 { struct btree_head h; };
+
+static inline void btree_init_mempool128(struct btree_head128 *head,
+					 mempool_t *mempool)
+{
+	btree_init_mempool(&head->h, mempool);
+}
+
+static inline int btree_init128(struct btree_head128 *head)
+{
+	return btree_init(&head->h);
+}
+
+static inline void btree_destroy128(struct btree_head128 *head)
+{
+	btree_destroy(&head->h);
+}
+
+static inline void *btree_lookup128(struct btree_head128 *head, u64 k1, u64 k2)
+{
+	u64 key[2] = {k1, k2};
+	return btree_lookup(&head->h, &btree_geo128, (unsigned long *)&key);
+}
+
+static inline void *btree_get_prev128(struct btree_head128 *head,
+				      u64 *k1, u64 *k2)
+{
+	u64 key[2] = {*k1, *k2};
+	void *val;
+
+	val = btree_get_prev(&head->h, &btree_geo128,
+			     (unsigned long *)&key);
+	*k1 = key[0];
+	*k2 = key[1];
+	return val;
+}
+
+static inline int btree_insert128(struct btree_head128 *head, u64 k1, u64 k2,
+				  void *val, gfp_t gfp)
+{
+	u64 key[2] = {k1, k2};
+	return btree_insert(&head->h, &btree_geo128,
+			    (unsigned long *)&key, val, gfp);
+}
+
+static inline int btree_update128(struct btree_head128 *head, u64 k1, u64 k2,
+				  void *val)
+{
+	u64 key[2] = {k1, k2};
+	return btree_update(&head->h, &btree_geo128,
+			    (unsigned long *)&key, val);
+}
+
+static inline void *btree_remove128(struct btree_head128 *head, u64 k1, u64 k2)
+{
+	u64 key[2] = {k1, k2};
+	return btree_remove(&head->h, &btree_geo128, (unsigned long *)&key);
+}
+
+static inline void *btree_last128(struct btree_head128 *head, u64 *k1, u64 *k2)
+{
+	u64 key[2];
+	void *val;
+
+	val = btree_last(&head->h, &btree_geo128, (unsigned long *)&key[0]);
+	if (val) {
+		*k1 = key[0];
+		*k2 = key[1];
+	}
+
+	return val;
+}
+
+static inline int btree_merge128(struct btree_head128 *target,
+				 struct btree_head128 *victim,
+				 gfp_t gfp)
+{
+	return btree_merge(&target->h, &victim->h, &btree_geo128, gfp);
+}
+
+void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
+		size_t index, void *__func);
+
+typedef void (*visitor128_t)(void *elem, unsigned long opaque,
+			     u64 key1, u64 key2, size_t index);
+
+static inline size_t btree_visitor128(struct btree_head128 *head,
+				      unsigned long opaque,
+				      visitor128_t func2)
+{
+	return btree_visitor(&head->h, &btree_geo128, opaque,
+			     visitor128, func2);
+}
+
+static inline size_t btree_grim_visitor128(struct btree_head128 *head,
+					   unsigned long opaque,
+					   visitor128_t func2)
+{
+	return btree_grim_visitor(&head->h, &btree_geo128, opaque,
+				  visitor128, func2);
+}
+
+#define btree_for_each_safe128(head, k1, k2, val)	\
+	for (val = btree_last128(head, &k1, &k2);	\
+	     val;					\
+	     val = btree_get_prev128(head, &k1, &k2))
+
diff --git a/include/linux/btree-type.h b/include/linux/btree-type.h
new file mode 100644
index 000000000000..9a1147ef8563
--- /dev/null
+++ b/include/linux/btree-type.h
@@ -0,0 +1,147 @@
+#define __BTREE_TP(pfx, type, sfx)	pfx ## type ## sfx
+#define _BTREE_TP(pfx, type, sfx)	__BTREE_TP(pfx, type, sfx)
+#define BTREE_TP(pfx)			_BTREE_TP(pfx, BTREE_TYPE_SUFFIX,)
+#define BTREE_FN(name)			BTREE_TP(btree_ ## name)
+#define BTREE_TYPE_HEAD			BTREE_TP(struct btree_head)
+#define VISITOR_FN			BTREE_TP(visitor)
+#define VISITOR_FN_T			_BTREE_TP(visitor, BTREE_TYPE_SUFFIX, _t)
+
+BTREE_TYPE_HEAD {
+	struct btree_head h;
+};
+
+static inline void BTREE_FN(init_mempool)(BTREE_TYPE_HEAD *head,
+					  mempool_t *mempool)
+{
+	btree_init_mempool(&head->h, mempool);
+}
+
+static inline int BTREE_FN(init)(BTREE_TYPE_HEAD *head)
+{
+	return btree_init(&head->h);
+}
+
+static inline void BTREE_FN(destroy)(BTREE_TYPE_HEAD *head)
+{
+	btree_destroy(&head->h);
+}
+
+static inline int BTREE_FN(merge)(BTREE_TYPE_HEAD *target,
+				  BTREE_TYPE_HEAD *victim,
+				  gfp_t gfp)
+{
+	return btree_merge(&target->h, &victim->h, BTREE_TYPE_GEO, gfp);
+}
+
+#if (BITS_PER_LONG > BTREE_TYPE_BITS)
+static inline void *BTREE_FN(lookup)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key)
+{
+	unsigned long _key = key;
+	return btree_lookup(&head->h, BTREE_TYPE_GEO, &_key);
+}
+
+static inline int BTREE_FN(insert)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key,
+				   void *val, gfp_t gfp)
+{
+	unsigned long _key = key;
+	return btree_insert(&head->h, BTREE_TYPE_GEO, &_key, val, gfp);
+}
+
+static inline int BTREE_FN(update)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key,
+		void *val)
+{
+	unsigned long _key = key;
+	return btree_update(&head->h, BTREE_TYPE_GEO, &_key, val);
+}
+
+static inline void *BTREE_FN(remove)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key)
+{
+	unsigned long _key = key;
+	return btree_remove(&head->h, BTREE_TYPE_GEO, &_key);
+}
+
+static inline void *BTREE_FN(last)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE *key)
+{
+	unsigned long _key;
+	void *val = btree_last(&head->h, BTREE_TYPE_GEO, &_key);
+	if (val)
+		*key = _key;
+	return val;
+}
+
+static inline void *BTREE_FN(get_prev)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE *key)
+{
+	unsigned long _key = *key;
+	void *val = btree_get_prev(&head->h, BTREE_TYPE_GEO, &_key);
+	if (val)
+		*key = _key;
+	return val;
+}
+#else
+static inline void *BTREE_FN(lookup)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key)
+{
+	return btree_lookup(&head->h, BTREE_TYPE_GEO, (unsigned long *)&key);
+}
+
+static inline int BTREE_FN(insert)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key,
+			   void *val, gfp_t gfp)
+{
+	return btree_insert(&head->h, BTREE_TYPE_GEO, (unsigned long *)&key,
+			    val, gfp);
+}
+
+static inline int BTREE_FN(update)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key,
+		void *val)
+{
+	return btree_update(&head->h, BTREE_TYPE_GEO, (unsigned long *)&key, val);
+}
+
+static inline void *BTREE_FN(remove)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key)
+{
+	return btree_remove(&head->h, BTREE_TYPE_GEO, (unsigned long *)&key);
+}
+
+static inline void *BTREE_FN(last)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE *key)
+{
+	return btree_last(&head->h, BTREE_TYPE_GEO, (unsigned long *)key);
+}
+
+static inline void *BTREE_FN(get_prev)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE *key)
+{
+	return btree_get_prev(&head->h, BTREE_TYPE_GEO, (unsigned long *)key);
+}
+#endif
+
+void VISITOR_FN(void *elem, unsigned long opaque, unsigned long *key,
+		size_t index, void *__func);
+
+typedef void (*VISITOR_FN_T)(void *elem, unsigned long opaque,
+			     BTREE_KEYTYPE key, size_t index);
+
+static inline size_t BTREE_FN(visitor)(BTREE_TYPE_HEAD *head,
+				       unsigned long opaque,
+				       VISITOR_FN_T func2)
+{
+	return btree_visitor(&head->h, BTREE_TYPE_GEO, opaque,
+			     visitorl, func2);
+}
+
+static inline size_t BTREE_FN(grim_visitor)(BTREE_TYPE_HEAD *head,
+					    unsigned long opaque,
+					    VISITOR_FN_T func2)
+{
+	return btree_grim_visitor(&head->h, BTREE_TYPE_GEO, opaque,
+				  visitorl, func2);
+}
+
+#undef VISITOR_FN
+#undef VISITOR_FN_T
+#undef __BTREE_TP
+#undef _BTREE_TP
+#undef BTREE_TP
+#undef BTREE_FN
+#undef BTREE_TYPE_HEAD
+#undef BTREE_TYPE_SUFFIX
+#undef BTREE_TYPE_GEO
+#undef BTREE_KEYTYPE
+#undef BTREE_TYPE_BITS
diff --git a/include/linux/btree.h b/include/linux/btree.h
new file mode 100644
index 000000000000..65b5bb058324
--- /dev/null
+++ b/include/linux/btree.h
@@ -0,0 +1,243 @@
+#ifndef BTREE_H
+#define BTREE_H
+
+#include <linux/kernel.h>
+#include <linux/mempool.h>
+
+/**
+ * DOC: B+Tree basics
+ *
+ * A B+Tree is a data structure for looking up arbitrary (currently allowing
+ * unsigned long, u32, u64 and 2 * u64) keys into pointers. The data structure
+ * is described at http://en.wikipedia.org/wiki/B-tree, we currently do not
+ * use binary search to find the key on lookups.
+ *
+ * Each B+Tree consists of a head, that contains bookkeeping information and
+ * a variable number (starting with zero) nodes. Each node contains the keys
+ * and pointers to sub-nodes, or, for leaf nodes, the keys and values for the
+ * tree entries.
+ *
+ * Each node in this implementation has the following layout:
+ * [key1, key2, ..., keyN] [val1, val2, ..., valN]
+ *
+ * Each key here is an array of unsigned longs, geo->no_longs in total. The
+ * number of keys and values (N) is geo->no_pairs.
+ */
+
+/**
+ * struct btree_head - btree head
+ *
+ * @node: the first node in the tree
+ * @mempool: mempool used for node allocations
+ * @height: current of the tree
+ */
+struct btree_head {
+	unsigned long *node;
+	mempool_t *mempool;
+	int height;
+};
+
+/* btree geometry */
+struct btree_geo;
+
+/**
+ * btree_alloc - allocate function for the mempool
+ * @gfp_mask: gfp mask for the allocation
+ * @pool_data: unused
+ */
+void *btree_alloc(gfp_t gfp_mask, void *pool_data);
+
+/**
+ * btree_free - free function for the mempool
+ * @element: the element to free
+ * @pool_data: unused
+ */
+void btree_free(void *element, void *pool_data);
+
+/**
+ * btree_init_mempool - initialise a btree with given mempool
+ *
+ * @head: the btree head to initialise
+ * @mempool: the mempool to use
+ *
+ * When this function is used, there is no need to destroy
+ * the mempool.
+ */
+void btree_init_mempool(struct btree_head *head, mempool_t *mempool);
+
+/**
+ * btree_init - initialise a btree
+ *
+ * @head: the btree head to initialise
+ *
+ * This function allocates the memory pool that the
+ * btree needs. Returns zero or a negative error code
+ * (-%ENOMEM) when memory allocation fails.
+ *
+ */
+int __must_check btree_init(struct btree_head *head);
+
+/**
+ * btree_destroy - destroy mempool
+ *
+ * @head: the btree head to destroy
+ *
+ * This function destroys the internal memory pool, use only
+ * when using btree_init(), not with btree_init_mempool().
+ */
+void btree_destroy(struct btree_head *head);
+
+/**
+ * btree_lookup - look up a key in the btree
+ *
+ * @head: the btree to look in
+ * @geo: the btree geometry
+ * @key: the key to look up
+ *
+ * This function returns the value for the given key, or %NULL.
+ */
+void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
+		   unsigned long *key);
+
+/**
+ * btree_insert - insert an entry into the btree
+ *
+ * @head: the btree to add to
+ * @geo: the btree geometry
+ * @key: the key to add (must not already be present)
+ * @val: the value to add (must not be %NULL)
+ * @gfp: allocation flags for node allocations
+ *
+ * This function returns 0 if the item could be added, or an
+ * error code if it failed (may fail due to memory pressure).
+ */
+int __must_check btree_insert(struct btree_head *head, struct btree_geo *geo,
+			      unsigned long *key, void *val, gfp_t gfp);
+/**
+ * btree_update - update an entry in the btree
+ *
+ * @head: the btree to update
+ * @geo: the btree geometry
+ * @key: the key to update
+ * @val: the value to change it to (must not be %NULL)
+ *
+ * This function returns 0 if the update was successful, or
+ * -%ENOENT if the key could not be found.
+ */
+int btree_update(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key, void *val);
+/**
+ * btree_remove - remove an entry from the btree
+ *
+ * @head: the btree to update
+ * @geo: the btree geometry
+ * @key: the key to remove
+ *
+ * This function returns the removed entry, or %NULL if the key
+ * could not be found.
+ */
+void *btree_remove(struct btree_head *head, struct btree_geo *geo,
+		   unsigned long *key);
+
+/**
+ * btree_merge - merge two btrees
+ *
+ * @target: the tree that gets all the entries
+ * @victim: the tree that gets merged into @target
+ * @geo: the btree geometry
+ * @gfp: allocation flags
+ *
+ * The two trees @target and @victim may not contain the same keys,
+ * that is a bug and triggers a BUG(). This function returns zero
+ * if the trees were merged successfully, and may return a failure
+ * when memory allocation fails, in which case both trees might have
+ * been partially merged, i.e. some entries have been moved from
+ * @victim to @target.
+ */
+int btree_merge(struct btree_head *target, struct btree_head *victim,
+		struct btree_geo *geo, gfp_t gfp);
+
+/**
+ * btree_last - get last entry in btree
+ *
+ * @head: btree head
+ * @geo: btree geometry
+ * @key: last key
+ *
+ * Returns the last entry in the btree, and sets @key to the key
+ * of that entry; returns NULL if the tree is empty, in that case
+ * key is not changed.
+ */
+void *btree_last(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key);
+
+/**
+ * btree_get_prev - get previous entry
+ *
+ * @head: btree head
+ * @geo: btree geometry
+ * @key: pointer to key
+ *
+ * The function returns the next item right before the value pointed to by
+ * @key, and updates @key with its key, or returns %NULL when there is no
+ * entry with a key smaller than the given key.
+ */
+void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long *key);
+
+
+/* internal use, use btree_visitor{l,32,64,128} */
+size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long opaque,
+		     void (*func)(void *elem, unsigned long opaque,
+				  unsigned long *key, size_t index,
+				  void *func2),
+		     void *func2);
+
+/* internal use, use btree_grim_visitor{l,32,64,128} */
+size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
+			  unsigned long opaque,
+			  void (*func)(void *elem, unsigned long opaque,
+				       unsigned long *key,
+				       size_t index, void *func2),
+			  void *func2);
+
+
+#include <linux/btree-128.h>
+
+extern struct btree_geo btree_geo32;
+#define BTREE_TYPE_SUFFIX l
+#define BTREE_TYPE_BITS BITS_PER_LONG
+#define BTREE_TYPE_GEO &btree_geo32
+#define BTREE_KEYTYPE unsigned long
+#include <linux/btree-type.h>
+
+#define btree_for_each_safel(head, key, val)	\
+	for (val = btree_lastl(head, &key);	\
+	     val;				\
+	     val = btree_get_prevl(head, &key))
+
+#define BTREE_TYPE_SUFFIX 32
+#define BTREE_TYPE_BITS 32
+#define BTREE_TYPE_GEO &btree_geo32
+#define BTREE_KEYTYPE u32
+#include <linux/btree-type.h>
+
+#define btree_for_each_safe32(head, key, val)	\
+	for (val = btree_last32(head, &key);	\
+	     val;				\
+	     val = btree_get_prev32(head, &key))
+
+extern struct btree_geo btree_geo64;
+#define BTREE_TYPE_SUFFIX 64
+#define BTREE_TYPE_BITS 64
+#define BTREE_TYPE_GEO &btree_geo64
+#define BTREE_KEYTYPE u64
+#include <linux/btree-type.h>
+
+#define btree_for_each_safe64(head, key, val)	\
+	for (val = btree_last64(head, &key);	\
+	     val;				\
+	     val = btree_get_prev64(head, &key))
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index bb1326d3839c..277fbfb233b9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -156,6 +156,9 @@ config TEXTSEARCH_BM
 config TEXTSEARCH_FSM
 	tristate
 
+config BTREE
+	boolean
+
 config HAS_IOMEM
 	boolean
 	depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index 2e78277eff9d..cff82612e98b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -41,6 +41,7 @@ lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 000000000000..41859a820218
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,797 @@
+/*
+ * lib/btree.c	- Simple In-memory B+Tree
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Bits and pieces stolen from Peter Zijlstra's code, which is
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * GPLv2
+ *
+ * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
+ *
+ * A relatively simple B+Tree implementation.  I have written it as a learning
+ * excercise to understand how B+Trees work.  Turned out to be useful as well.
+ *
+ * B+Trees can be used similar to Linux radix trees (which don't have anything
+ * in common with textbook radix trees, beware).  Prerequisite for them working
+ * well is that access to a random tree node is much faster than a large number
+ * of operations within each node.
+ *
+ * Disks have fulfilled the prerequisite for a long time.  More recently DRAM
+ * has gained similar properties, as memory access times, when measured in cpu
+ * cycles, have increased.  Cacheline sizes have increased as well, which also
+ * helps B+Trees.
+ *
+ * Compared to radix trees, B+Trees are more efficient when dealing with a
+ * sparsely populated address space.  Between 25% and 50% of the memory is
+ * occupied with valid pointers.  When densely populated, radix trees contain
+ * ~98% pointers - hard to beat.  Very sparse radix trees contain only ~2%
+ * pointers.
+ *
+ * This particular implementation stores pointers identified by a long value.
+ * Storing NULL pointers is illegal, lookup will return NULL when no entry
+ * was found.
+ *
+ * A tricks was used that is not commonly found in textbooks.  The lowest
+ * values are to the right, not to the left.  All used slots within a node
+ * are on the left, all unused slots contain NUL values.  Most operations
+ * simply loop once over all slots and terminate on the first NUL.
+ */
+
+#include <linux/btree.h>
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define NODESIZE MAX(L1_CACHE_BYTES, 128)
+
+struct btree_geo {
+	int keylen;
+	int no_pairs;
+	int no_longs;
+};
+
+struct btree_geo btree_geo32 = {
+	.keylen = 1,
+	.no_pairs = NODESIZE / sizeof(long) / 2,
+	.no_longs = NODESIZE / sizeof(long) / 2,
+};
+EXPORT_SYMBOL_GPL(btree_geo32);
+
+#define LONG_PER_U64 (64 / BITS_PER_LONG)
+struct btree_geo btree_geo64 = {
+	.keylen = LONG_PER_U64,
+	.no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
+	.no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo64);
+
+struct btree_geo btree_geo128 = {
+	.keylen = 2 * LONG_PER_U64,
+	.no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
+	.no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo128);
+
+static struct kmem_cache *btree_cachep;
+
+void *btree_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	return kmem_cache_alloc(btree_cachep, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(btree_alloc);
+
+void btree_free(void *element, void *pool_data)
+{
+	kmem_cache_free(btree_cachep, element);
+}
+EXPORT_SYMBOL_GPL(btree_free);
+
+static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
+{
+	unsigned long *node;
+
+	node = mempool_alloc(head->mempool, gfp);
+	memset(node, 0, NODESIZE);
+	return node;
+}
+
+static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++) {
+		if (l1[i] < l2[i])
+			return -1;
+		if (l1[i] > l2[i])
+			return 1;
+	}
+	return 0;
+}
+
+static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
+		size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		dest[i] = src[i];
+	return dest;
+}
+
+static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		s[i] = c;
+	return s;
+}
+
+static void dec_key(struct btree_geo *geo, unsigned long *key)
+{
+	unsigned long val;
+	int i;
+
+	for (i = geo->keylen - 1; i >= 0; i--) {
+		val = key[i];
+		key[i] = val - 1;
+		if (val)
+			break;
+	}
+}
+
+static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
+{
+	return &node[n * geo->keylen];
+}
+
+static void *bval(struct btree_geo *geo, unsigned long *node, int n)
+{
+	return (void *)node[geo->no_longs + n];
+}
+
+static void setkey(struct btree_geo *geo, unsigned long *node, int n,
+		   unsigned long *key)
+{
+	longcpy(bkey(geo, node, n), key, geo->keylen);
+}
+
+static void setval(struct btree_geo *geo, unsigned long *node, int n,
+		   void *val)
+{
+	node[geo->no_longs + n] = (unsigned long) val;
+}
+
+static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
+{
+	longset(bkey(geo, node, n), 0, geo->keylen);
+	node[geo->no_longs + n] = 0;
+}
+
+static inline void __btree_init(struct btree_head *head)
+{
+	head->node = NULL;
+	head->height = 0;
+}
+
+void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
+{
+	__btree_init(head);
+	head->mempool = mempool;
+}
+EXPORT_SYMBOL_GPL(btree_init_mempool);
+
+int btree_init(struct btree_head *head)
+{
+	__btree_init(head);
+	head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
+	if (!head->mempool)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btree_init);
+
+void btree_destroy(struct btree_head *head)
+{
+	mempool_destroy(head->mempool);
+	head->mempool = NULL;
+}
+EXPORT_SYMBOL_GPL(btree_destroy);
+
+void *btree_last(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key)
+{
+	int height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return NULL;
+
+	for ( ; height > 1; height--)
+		node = bval(geo, node, 0);
+
+	longcpy(key, bkey(geo, node, 0), geo->keylen);
+	return bval(geo, node, 0);
+}
+EXPORT_SYMBOL_GPL(btree_last);
+
+static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
+		  unsigned long *key)
+{
+	return longcmp(bkey(geo, node, pos), key, geo->keylen);
+}
+
+static int keyzero(struct btree_geo *geo, unsigned long *key)
+{
+	int i;
+
+	for (i = 0; i < geo->keylen; i++)
+		if (key[i])
+			return 0;
+
+	return 1;
+}
+
+void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key)
+{
+	int i, height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return NULL;
+
+	for ( ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			return NULL;
+		node = bval(geo, node, i);
+		if (!node)
+			return NULL;
+	}
+
+	if (!node)
+		return NULL;
+
+	for (i = 0; i < geo->no_pairs; i++)
+		if (keycmp(geo, node, i, key) == 0)
+			return bval(geo, node, i);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(btree_lookup);
+
+int btree_update(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key, void *val)
+{
+	int i, height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return -ENOENT;
+
+	for ( ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			return -ENOENT;
+		node = bval(geo, node, i);
+		if (!node)
+			return -ENOENT;
+	}
+
+	if (!node)
+		return -ENOENT;
+
+	for (i = 0; i < geo->no_pairs; i++)
+		if (keycmp(geo, node, i, key) == 0) {
+			setval(geo, node, i, val);
+			return 0;
+		}
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(btree_update);
+
+/*
+ * Usually this function is quite similar to normal lookup.  But the key of
+ * a parent node may be smaller than the smallest key of all its siblings.
+ * In such a case we cannot just return NULL, as we have only proven that no
+ * key smaller than __key, but larger than this parent key exists.
+ * So we set __key to the parent key and retry.  We have to use the smallest
+ * such parent key, which is the last parent key we encountered.
+ */
+void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long *__key)
+{
+	int i, height;
+	unsigned long *node, *oldnode;
+	unsigned long *retry_key = NULL, key[geo->keylen];
+
+	if (keyzero(geo, __key))
+		return NULL;
+
+	if (head->height == 0)
+		return NULL;
+retry:
+	longcpy(key, __key, geo->keylen);
+	dec_key(geo, key);
+
+	node = head->node;
+	for (height = head->height ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			goto miss;
+		oldnode = node;
+		node = bval(geo, node, i);
+		if (!node)
+			goto miss;
+		retry_key = bkey(geo, oldnode, i);
+	}
+
+	if (!node)
+		goto miss;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		if (keycmp(geo, node, i, key) <= 0) {
+			if (bval(geo, node, i)) {
+				longcpy(__key, bkey(geo, node, i), geo->keylen);
+				return bval(geo, node, i);
+			} else
+				goto miss;
+		}
+	}
+miss:
+	if (retry_key) {
+		__key = retry_key;
+		retry_key = NULL;
+		goto retry;
+	}
+	return NULL;
+}
+
+static int getpos(struct btree_geo *geo, unsigned long *node,
+		unsigned long *key)
+{
+	int i;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		if (keycmp(geo, node, i, key) <= 0)
+			break;
+	}
+	return i;
+}
+
+static int getfill(struct btree_geo *geo, unsigned long *node, int start)
+{
+	int i;
+
+	for (i = start; i < geo->no_pairs; i++)
+		if (!bval(geo, node, i))
+			break;
+	return i;
+}
+
+/*
+ * locate the correct leaf node in the btree
+ */
+static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level)
+{
+	unsigned long *node = head->node;
+	int i, height;
+
+	for (height = head->height; height > level; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+
+		if ((i == geo->no_pairs) || !bval(geo, node, i)) {
+			/* right-most key is too large, update it */
+			/* FIXME: If the right-most key on higher levels is
+			 * always zero, this wouldn't be necessary. */
+			i--;
+			setkey(geo, node, i, key);
+		}
+		BUG_ON(i < 0);
+		node = bval(geo, node, i);
+	}
+	BUG_ON(!node);
+	return node;
+}
+
+static int btree_grow(struct btree_head *head, struct btree_geo *geo,
+		      gfp_t gfp)
+{
+	unsigned long *node;
+	int fill;
+
+	node = btree_node_alloc(head, gfp);
+	if (!node)
+		return -ENOMEM;
+	if (head->node) {
+		fill = getfill(geo, head->node, 0);
+		setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
+		setval(geo, node, 0, head->node);
+	}
+	head->node = node;
+	head->height++;
+	return 0;
+}
+
+static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
+{
+	unsigned long *node;
+	int fill;
+
+	if (head->height <= 1)
+		return;
+
+	node = head->node;
+	fill = getfill(geo, node, 0);
+	BUG_ON(fill > 1);
+	head->node = bval(geo, node, 0);
+	head->height--;
+	mempool_free(node, head->mempool);
+}
+
+static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
+			      unsigned long *key, void *val, int level,
+			      gfp_t gfp)
+{
+	unsigned long *node;
+	int i, pos, fill, err;
+
+	BUG_ON(!val);
+	if (head->height < level) {
+		err = btree_grow(head, geo, gfp);
+		if (err)
+			return err;
+	}
+
+retry:
+	node = find_level(head, geo, key, level);
+	pos = getpos(geo, node, key);
+	fill = getfill(geo, node, pos);
+	/* two identical keys are not allowed */
+	BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
+
+	if (fill == geo->no_pairs) {
+		/* need to split node */
+		unsigned long *new;
+
+		new = btree_node_alloc(head, gfp);
+		if (!new)
+			return -ENOMEM;
+		err = btree_insert_level(head, geo,
+				bkey(geo, node, fill / 2 - 1),
+				new, level + 1, gfp);
+		if (err) {
+			mempool_free(new, head->mempool);
+			return err;
+		}
+		for (i = 0; i < fill / 2; i++) {
+			setkey(geo, new, i, bkey(geo, node, i));
+			setval(geo, new, i, bval(geo, node, i));
+			setkey(geo, node, i, bkey(geo, node, i + fill / 2));
+			setval(geo, node, i, bval(geo, node, i + fill / 2));
+			clearpair(geo, node, i + fill / 2);
+		}
+		if (fill & 1) {
+			setkey(geo, node, i, bkey(geo, node, fill - 1));
+			setval(geo, node, i, bval(geo, node, fill - 1));
+			clearpair(geo, node, fill - 1);
+		}
+		goto retry;
+	}
+	BUG_ON(fill >= geo->no_pairs);
+
+	/* shift and insert */
+	for (i = fill; i > pos; i--) {
+		setkey(geo, node, i, bkey(geo, node, i - 1));
+		setval(geo, node, i, bval(geo, node, i - 1));
+	}
+	setkey(geo, node, pos, key);
+	setval(geo, node, pos, val);
+
+	return 0;
+}
+
+int btree_insert(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, void *val, gfp_t gfp)
+{
+	return btree_insert_level(head, geo, key, val, 1, gfp);
+}
+EXPORT_SYMBOL_GPL(btree_insert);
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level);
+static void merge(struct btree_head *head, struct btree_geo *geo, int level,
+		unsigned long *left, int lfill,
+		unsigned long *right, int rfill,
+		unsigned long *parent, int lpos)
+{
+	int i;
+
+	for (i = 0; i < rfill; i++) {
+		/* Move all keys to the left */
+		setkey(geo, left, lfill + i, bkey(geo, right, i));
+		setval(geo, left, lfill + i, bval(geo, right, i));
+	}
+	/* Exchange left and right child in parent */
+	setval(geo, parent, lpos, right);
+	setval(geo, parent, lpos + 1, left);
+	/* Remove left (formerly right) child from parent */
+	btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
+	mempool_free(right, head->mempool);
+}
+
+static void rebalance(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level, unsigned long *child, int fill)
+{
+	unsigned long *parent, *left = NULL, *right = NULL;
+	int i, no_left, no_right;
+
+	if (fill == 0) {
+		/* Because we don't steal entries from a neigbour, this case
+		 * can happen.  Parent node contains a single child, this
+		 * node, so merging with a sibling never happens.
+		 */
+		btree_remove_level(head, geo, key, level + 1);
+		mempool_free(child, head->mempool);
+		return;
+	}
+
+	parent = find_level(head, geo, key, level + 1);
+	i = getpos(geo, parent, key);
+	BUG_ON(bval(geo, parent, i) != child);
+
+	if (i > 0) {
+		left = bval(geo, parent, i - 1);
+		no_left = getfill(geo, left, 0);
+		if (fill + no_left <= geo->no_pairs) {
+			merge(head, geo, level,
+					left, no_left,
+					child, fill,
+					parent, i - 1);
+			return;
+		}
+	}
+	if (i + 1 < getfill(geo, parent, i)) {
+		right = bval(geo, parent, i + 1);
+		no_right = getfill(geo, right, 0);
+		if (fill + no_right <= geo->no_pairs) {
+			merge(head, geo, level,
+					child, fill,
+					right, no_right,
+					parent, i);
+			return;
+		}
+	}
+	/*
+	 * We could also try to steal one entry from the left or right
+	 * neighbor.  By not doing so we changed the invariant from
+	 * "all nodes are at least half full" to "no two neighboring
+	 * nodes can be merged".  Which means that the average fill of
+	 * all nodes is still half or better.
+	 */
+}
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level)
+{
+	unsigned long *node;
+	int i, pos, fill;
+	void *ret;
+
+	if (level > head->height) {
+		/* we recursed all the way up */
+		head->height = 0;
+		head->node = NULL;
+		return NULL;
+	}
+
+	node = find_level(head, geo, key, level);
+	pos = getpos(geo, node, key);
+	fill = getfill(geo, node, pos);
+	if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
+		return NULL;
+	ret = bval(geo, node, pos);
+
+	/* remove and shift */
+	for (i = pos; i < fill - 1; i++) {
+		setkey(geo, node, i, bkey(geo, node, i + 1));
+		setval(geo, node, i, bval(geo, node, i + 1));
+	}
+	clearpair(geo, node, fill - 1);
+
+	if (fill - 1 < geo->no_pairs / 2) {
+		if (level < head->height)
+			rebalance(head, geo, key, level, node, fill - 1);
+		else if (fill - 1 == 1)
+			btree_shrink(head, geo);
+	}
+
+	return ret;
+}
+
+void *btree_remove(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key)
+{
+	if (head->height == 0)
+		return NULL;
+
+	return btree_remove_level(head, geo, key, 1);
+}
+EXPORT_SYMBOL_GPL(btree_remove);
+
+int btree_merge(struct btree_head *target, struct btree_head *victim,
+		struct btree_geo *geo, gfp_t gfp)
+{
+	unsigned long key[geo->keylen];
+	unsigned long dup[geo->keylen];
+	void *val;
+	int err;
+
+	BUG_ON(target == victim);
+
+	if (!(target->node)) {
+		/* target is empty, just copy fields over */
+		target->node = victim->node;
+		target->height = victim->height;
+		__btree_init(victim);
+		return 0;
+	}
+
+	/* TODO: This needs some optimizations.  Currently we do three tree
+	 * walks to remove a single object from the victim.
+	 */
+	for (;;) {
+		if (!btree_last(victim, geo, key))
+			break;
+		val = btree_lookup(victim, geo, key);
+		err = btree_insert(target, geo, key, val, gfp);
+		if (err)
+			return err;
+		/* We must make a copy of the key, as the original will get
+		 * mangled inside btree_remove. */
+		longcpy(dup, key, geo->keylen);
+		btree_remove(victim, geo, dup);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btree_merge);
+
+static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
+			       unsigned long *node, unsigned long opaque,
+			       void (*func)(void *elem, unsigned long opaque,
+					    unsigned long *key, size_t index,
+					    void *func2),
+			       void *func2, int reap, int height, size_t count)
+{
+	int i;
+	unsigned long *child;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		child = bval(geo, node, i);
+		if (!child)
+			break;
+		if (height > 1)
+			count = __btree_for_each(head, geo, child, opaque,
+					func, func2, reap, height - 1, count);
+		else
+			func(child, opaque, bkey(geo, node, i), count++,
+					func2);
+	}
+	if (reap)
+		mempool_free(node, head->mempool);
+	return count;
+}
+
+static void empty(void *elem, unsigned long opaque, unsigned long *key,
+		  size_t index, void *func2)
+{
+}
+
+void visitorl(void *elem, unsigned long opaque, unsigned long *key,
+	      size_t index, void *__func)
+{
+	visitorl_t func = __func;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitorl);
+
+void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
+	       size_t index, void *__func)
+{
+	visitor32_t func = __func;
+	u32 *key = (void *)__key;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor32);
+
+void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
+	       size_t index, void *__func)
+{
+	visitor64_t func = __func;
+	u64 *key = (void *)__key;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor64);
+
+void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
+		size_t index, void *__func)
+{
+	visitor128_t func = __func;
+	u64 *key = (void *)__key;
+
+	func(elem, opaque, key[0], key[1], index);
+}
+EXPORT_SYMBOL_GPL(visitor128);
+
+size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long opaque,
+		     void (*func)(void *elem, unsigned long opaque,
+		     		  unsigned long *key,
+		     		  size_t index, void *func2),
+		     void *func2)
+{
+	size_t count = 0;
+
+	if (!func2)
+		func = empty;
+	if (head->node)
+		count = __btree_for_each(head, geo, head->node, opaque, func,
+				func2, 0, head->height, 0);
+	return count;
+}
+EXPORT_SYMBOL_GPL(btree_visitor);
+
+size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
+			  unsigned long opaque,
+			  void (*func)(void *elem, unsigned long opaque,
+				       unsigned long *key,
+				       size_t index, void *func2),
+			  void *func2)
+{
+	size_t count = 0;
+
+	if (!func2)
+		func = empty;
+	if (head->node)
+		count = __btree_for_each(head, geo, head->node, opaque, func,
+				func2, 1, head->height, 0);
+	__btree_init(head);
+	return count;
+}
+EXPORT_SYMBOL_GPL(btree_grim_visitor);
+
+static int __init btree_module_init(void)
+{
+	btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
+			SLAB_HWCACHE_ALIGN, NULL);
+	return 0;
+}
+
+static void __exit btree_module_exit(void)
+{
+	kmem_cache_destroy(btree_cachep);
+}
+
+/* If core code starts using btree, initialization should happen even earlier */
+module_init(btree_module_init);
+module_exit(btree_module_exit);
+
+MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 78c210efdefe07131f91ed512a3308b15bb14e2f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 6 Aug 2009 15:41:34 -0400
Subject: Revert "knfsd: avoid overloading the CPU scheduler with enormous load
 averages"

This reverts commit 59a252ff8c0f2fa32c896f69d56ae33e641ce7ad.

This helps in an entirely cached workload but not necessarily in
workloads that require waiting on disk.

Conflicts:

	include/linux/sunrpc/svc.h
	net/sunrpc/svc_xprt.c

Reported-by: Simon Kirby <sim@hostway.ca>
Tested-by: Jesper Krogh <jesper@krogh.cc>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc.h |  3 ---
 net/sunrpc/svc_xprt.c      | 31 +++++++++----------------------
 2 files changed, 9 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 52e8cb0a7569..d1567d627557 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -29,7 +29,6 @@ struct svc_pool_stats {
 	unsigned long	packets;
 	unsigned long	sockets_queued;
 	unsigned long	threads_woken;
-	unsigned long	overloads_avoided;
 	unsigned long	threads_timedout;
 };
 
@@ -50,7 +49,6 @@ struct svc_pool {
 	struct list_head	sp_sockets;	/* pending sockets */
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
-	int			sp_nwaking;	/* number of threads woken but not yet active */
 	struct svc_pool_stats	sp_stats;	/* statistics on pool operation */
 } ____cacheline_aligned_in_smp;
 
@@ -284,7 +282,6 @@ struct svc_rqst {
 						 * cache pages */
 	wait_queue_head_t	rq_wait;	/* synchronization */
 	struct task_struct	*rq_task;	/* service thread */
-	int			rq_waking;	/* 1 if thread is being woken */
 };
 
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index df124f78ee48..2c58b75a236f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -16,8 +16,6 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
-#define SVC_MAX_WAKING 5
-
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -306,7 +304,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
 	int cpu;
-	int thread_avail;
 
 	if (!(xprt->xpt_flags &
 	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
@@ -318,6 +315,12 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 
 	spin_lock_bh(&pool->sp_lock);
 
+	if (!list_empty(&pool->sp_threads) &&
+	    !list_empty(&pool->sp_sockets))
+		printk(KERN_ERR
+		       "svc_xprt_enqueue: "
+		       "threads and transports both waiting??\n");
+
 	if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 		/* Don't enqueue dead transports */
 		dprintk("svc: transport %p is dead, not enqueued\n", xprt);
@@ -358,15 +361,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 	}
 
  process:
-	/* Work out whether threads are available */
-	thread_avail = !list_empty(&pool->sp_threads);	/* threads are asleep */
-	if (pool->sp_nwaking >= SVC_MAX_WAKING) {
-		/* too many threads are runnable and trying to wake up */
-		thread_avail = 0;
-		pool->sp_stats.overloads_avoided++;
-	}
-
-	if (thread_avail) {
+	if (!list_empty(&pool->sp_threads)) {
 		rqstp = list_entry(pool->sp_threads.next,
 				   struct svc_rqst,
 				   rq_list);
@@ -381,8 +376,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 		svc_xprt_get(xprt);
 		rqstp->rq_reserved = serv->sv_max_mesg;
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
-		rqstp->rq_waking = 1;
-		pool->sp_nwaking++;
 		pool->sp_stats.threads_woken++;
 		BUG_ON(xprt->xpt_pool != pool);
 		wake_up(&rqstp->rq_wait);
@@ -651,11 +644,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		return -EINTR;
 
 	spin_lock_bh(&pool->sp_lock);
-	if (rqstp->rq_waking) {
-		rqstp->rq_waking = 0;
-		pool->sp_nwaking--;
-		BUG_ON(pool->sp_nwaking < 0);
-	}
 	xprt = svc_xprt_dequeue(pool);
 	if (xprt) {
 		rqstp->rq_xprt = xprt;
@@ -1204,16 +1192,15 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
 	struct svc_pool *pool = p;
 
 	if (p == SEQ_START_TOKEN) {
-		seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
+		seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n");
 		return 0;
 	}
 
-	seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
+	seq_printf(m, "%u %lu %lu %lu %lu\n",
 		pool->sp_id,
 		pool->sp_stats.packets,
 		pool->sp_stats.sockets_queued,
 		pool->sp_stats.threads_woken,
-		pool->sp_stats.overloads_avoided,
 		pool->sp_stats.threads_timedout);
 
 	return 0;
-- 
cgit v1.2.3


From e169cfbef46d62e042614ffafa8880eed1d894bb Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Mon, 23 Nov 2009 14:53:09 -0700
Subject: of/flattree: merge find_flat_dt_string and initial_boot_params

Merge common code between Microblaze and PowerPC.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Tested-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/Kconfig       |  1 +
 arch/microblaze/kernel/prom.c |  8 --------
 arch/powerpc/Kconfig          |  1 +
 arch/powerpc/kernel/prom.c    | 12 ------------
 drivers/of/Kconfig            |  4 ++++
 drivers/of/Makefile           |  1 +
 drivers/of/fdt.c              | 21 +++++++++++++++++++++
 include/linux/of_fdt.h        |  4 ++++
 8 files changed, 32 insertions(+), 20 deletions(-)
 create mode 100644 drivers/of/fdt.c

(limited to 'include/linux')

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index bbd8327f1890..f39c9275a29b 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -111,6 +111,7 @@ config CMDLINE_FORCE
 
 config OF
 	def_bool y
+	select OF_FLATTREE
 
 config PROC_DEVICETREE
 	bool "Support for device tree in /proc"
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index b817df172aa9..06d620ab4168 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -47,17 +47,9 @@ static int __initdata dt_root_size_cells;
 
 typedef u32 cell_t;
 
-static struct boot_param_header *initial_boot_params;
-
 /* export that to outside world */
 struct device_node *of_chosen;
 
-static inline char *find_flat_dt_string(u32 offset)
-{
-	return ((char *)initial_boot_params) +
-		initial_boot_params->off_dt_strings + offset;
-}
-
 /**
  * This function is used to scan the flattened device-tree, it is
  * used to extract the memory informations at boot before we can
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2ba14e77296c..2a75c6ae2a8b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -163,6 +163,7 @@ config PPC_OF
 
 config OF
 	def_bool y
+	select OF_FLATTREE
 
 config PPC_UDBG_16550
 	bool
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4ec300862466..fccf7e49bb28 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -73,12 +73,6 @@ unsigned long tce_alloc_start, tce_alloc_end;
 
 typedef u32 cell_t;
 
-#if 0
-static struct boot_param_header *initial_boot_params __initdata;
-#else
-struct boot_param_header *initial_boot_params;
-#endif
-
 extern struct device_node *allnodes;	/* temporary while merging */
 
 extern rwlock_t devtree_lock;	/* temporary while merging */
@@ -86,12 +80,6 @@ extern rwlock_t devtree_lock;	/* temporary while merging */
 /* export that to outside world */
 struct device_node *of_chosen;
 
-static inline char *find_flat_dt_string(u32 offset)
-{
-	return ((char *)initial_boot_params) +
-		initial_boot_params->off_dt_strings + offset;
-}
-
 /**
  * This function is used to scan the flattened device-tree, it is
  * used to extract the memory informations at boot before we can
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index d2fa27c5c1b2..462825e03123 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -1,3 +1,7 @@
+config OF_FLATTREE
+	bool
+	depends on OF
+
 config OF_DEVICE
 	def_bool y
 	depends on OF && (SPARC || PPC_OF || MICROBLAZE)
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index bdfb5f5d4b06..f232cc98ce00 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,4 +1,5 @@
 obj-y = base.o
+obj-$(CONFIG_OF_FLATTREE) += fdt.o
 obj-$(CONFIG_OF_DEVICE) += device.o platform.o
 obj-$(CONFIG_OF_GPIO)   += gpio.o
 obj-$(CONFIG_OF_I2C)	+= of_i2c.o
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
new file mode 100644
index 000000000000..9faa9a5cbdf0
--- /dev/null
+++ b/drivers/of/fdt.c
@@ -0,0 +1,21 @@
+/*
+ * Functions for working with the Flattened Device Tree data format
+ *
+ * Copyright 2009 Benjamin Herrenschmidt, IBM Corp
+ * benh@kernel.crashing.org
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+struct boot_param_header *initial_boot_params;
+
+char *find_flat_dt_string(u32 offset)
+{
+	return ((char *)initial_boot_params) +
+		initial_boot_params->off_dt_strings + offset;
+}
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 41d432b13553..d1a79f3da789 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -57,7 +57,11 @@ struct boot_param_header {
 	u32	dt_struct_size;		/* size of the DT structure block */
 };
 
+/* TBD: Temporary export of fdt globals - remove when code fully merged */
+extern struct boot_param_header *initial_boot_params;
+
 /* For scanning the flat device-tree at boot time */
+extern char *find_flat_dt_string(u32 offset);
 extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
 					    const char *uname, int depth,
 					    void *data),
-- 
cgit v1.2.3


From 31a6a87dfc34fbf02aef9a160adf558ec56d3ccd Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Mon, 23 Nov 2009 19:49:38 -0700
Subject: of/flattree: remove __init annotations from the header file

__init annotation belongs in the .c file, not the header.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Tested-by: Michal Simek <monstr@monstr.eu>
---
 include/linux/of_fdt.h | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index d1a79f3da789..81231e04e8f3 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -62,15 +62,13 @@ extern struct boot_param_header *initial_boot_params;
 
 /* For scanning the flat device-tree at boot time */
 extern char *find_flat_dt_string(u32 offset);
-extern int __init of_scan_flat_dt(int (*it)(unsigned long node,
-					    const char *uname, int depth,
-					    void *data),
-				  void *data);
-extern void __init *of_get_flat_dt_prop(unsigned long node, const char *name,
-					unsigned long *size);
-extern int __init of_flat_dt_is_compatible(unsigned long node,
-					   const char *name);
-extern unsigned long __init of_get_flat_dt_root(void);
+extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
+				     int depth, void *data),
+			   void *data);
+extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
+				 unsigned long *size);
+extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
+extern unsigned long of_get_flat_dt_root(void);
 
 /* Other Prototypes */
 extern void finish_device_tree(void);
-- 
cgit v1.2.3


From bbd33931a08362f78266a4016211a35947b91041 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Mon, 23 Nov 2009 20:07:00 -0700
Subject: of/flattree: Merge unflatten_dt_node

Merge common code between PowerPC and MicroBlaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Tested-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/kernel/prom.c | 195 ----------------------------------------
 arch/powerpc/kernel/prom.c    | 194 ----------------------------------------
 drivers/of/fdt.c              | 200 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_fdt.h        |   4 +
 4 files changed, 204 insertions(+), 389 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index eb27bd3a39b4..021770abfbd7 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -50,201 +50,6 @@ typedef u32 cell_t;
 /* export that to outside world */
 struct device_node *of_chosen;
 
-static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
-					unsigned long align)
-{
-	void *res;
-
-	*mem = _ALIGN(*mem, align);
-	res = (void *)*mem;
-	*mem += size;
-
-	return res;
-}
-
-static unsigned long __init unflatten_dt_node(unsigned long mem,
-					unsigned long *p,
-					struct device_node *dad,
-					struct device_node ***allnextpp,
-					unsigned long fpsize)
-{
-	struct device_node *np;
-	struct property *pp, **prev_pp = NULL;
-	char *pathp;
-	u32 tag;
-	unsigned int l, allocl;
-	int has_name = 0;
-	int new_format = 0;
-
-	tag = *((u32 *)(*p));
-	if (tag != OF_DT_BEGIN_NODE) {
-		printk("Weird tag at start of node: %x\n", tag);
-		return mem;
-	}
-	*p += 4;
-	pathp = (char *)*p;
-	l = allocl = strlen(pathp) + 1;
-	*p = _ALIGN(*p + l, 4);
-
-	/* version 0x10 has a more compact unit name here instead of the full
-	 * path. we accumulate the full path size using "fpsize", we'll rebuild
-	 * it later. We detect this because the first character of the name is
-	 * not '/'.
-	 */
-	if ((*pathp) != '/') {
-		new_format = 1;
-		if (fpsize == 0) {
-			/* root node: special case. fpsize accounts for path
-			 * plus terminating zero. root node only has '/', so
-			 * fpsize should be 2, but we want to avoid the first
-			 * level nodes to have two '/' so we use fpsize 1 here
-			 */
-			fpsize = 1;
-			allocl = 2;
-		} else {
-			/* account for '/' and path size minus terminal 0
-			 * already in 'l'
-			 */
-			fpsize += l;
-			allocl = fpsize;
-		}
-	}
-
-	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
-				__alignof__(struct device_node));
-	if (allnextpp) {
-		memset(np, 0, sizeof(*np));
-		np->full_name = ((char *)np) + sizeof(struct device_node);
-		if (new_format) {
-			char *p2 = np->full_name;
-			/* rebuild full path for new format */
-			if (dad && dad->parent) {
-				strcpy(p2, dad->full_name);
-#ifdef DEBUG
-				if ((strlen(p2) + l + 1) != allocl) {
-					pr_debug("%s: p: %d, l: %d, a: %d\n",
-						pathp, (int)strlen(p2),
-						l, allocl);
-				}
-#endif
-				p2 += strlen(p2);
-			}
-			*(p2++) = '/';
-			memcpy(p2, pathp, l);
-		} else
-			memcpy(np->full_name, pathp, l);
-		prev_pp = &np->properties;
-		**allnextpp = np;
-		*allnextpp = &np->allnext;
-		if (dad != NULL) {
-			np->parent = dad;
-			/* we temporarily use the next field as `last_child'*/
-			if (dad->next == NULL)
-				dad->child = np;
-			else
-				dad->next->sibling = np;
-			dad->next = np;
-		}
-		kref_init(&np->kref);
-	}
-	while (1) {
-		u32 sz, noff;
-		char *pname;
-
-		tag = *((u32 *)(*p));
-		if (tag == OF_DT_NOP) {
-			*p += 4;
-			continue;
-		}
-		if (tag != OF_DT_PROP)
-			break;
-		*p += 4;
-		sz = *((u32 *)(*p));
-		noff = *((u32 *)((*p) + 4));
-		*p += 8;
-		if (initial_boot_params->version < 0x10)
-			*p = _ALIGN(*p, sz >= 8 ? 8 : 4);
-
-		pname = find_flat_dt_string(noff);
-		if (pname == NULL) {
-			printk(KERN_INFO
-				"Can't find property name in list !\n");
-			break;
-		}
-		if (strcmp(pname, "name") == 0)
-			has_name = 1;
-		l = strlen(pname) + 1;
-		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
-					__alignof__(struct property));
-		if (allnextpp) {
-			if (strcmp(pname, "linux,phandle") == 0) {
-				np->node = *((u32 *)*p);
-				if (np->linux_phandle == 0)
-					np->linux_phandle = np->node;
-			}
-			if (strcmp(pname, "ibm,phandle") == 0)
-				np->linux_phandle = *((u32 *)*p);
-			pp->name = pname;
-			pp->length = sz;
-			pp->value = (void *)*p;
-			*prev_pp = pp;
-			prev_pp = &pp->next;
-		}
-		*p = _ALIGN((*p) + sz, 4);
-	}
-	/* with version 0x10 we may not have the name property, recreate
-	 * it here from the unit name if absent
-	 */
-	if (!has_name) {
-		char *p1 = pathp, *ps = pathp, *pa = NULL;
-		int sz;
-
-		while (*p1) {
-			if ((*p1) == '@')
-				pa = p1;
-			if ((*p1) == '/')
-				ps = p1 + 1;
-			p1++;
-		}
-		if (pa < ps)
-			pa = p1;
-		sz = (pa - ps) + 1;
-		pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
-					__alignof__(struct property));
-		if (allnextpp) {
-			pp->name = "name";
-			pp->length = sz;
-			pp->value = pp + 1;
-			*prev_pp = pp;
-			prev_pp = &pp->next;
-			memcpy(pp->value, ps, sz - 1);
-			((char *)pp->value)[sz - 1] = 0;
-			pr_debug("fixed up name for %s -> %s\n", pathp,
-				(char *)pp->value);
-		}
-	}
-	if (allnextpp) {
-		*prev_pp = NULL;
-		np->name = of_get_property(np, "name", NULL);
-		np->type = of_get_property(np, "device_type", NULL);
-
-		if (!np->name)
-			np->name = "<NULL>";
-		if (!np->type)
-			np->type = "<NULL>";
-	}
-	while (tag == OF_DT_BEGIN_NODE) {
-		mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize);
-		tag = *((u32 *)(*p));
-	}
-	if (tag != OF_DT_END_NODE) {
-		printk(KERN_INFO "Weird tag at end of node: %x\n", tag);
-		return mem;
-	}
-	*p += 4;
-	return mem;
-}
-
 /**
  * unflattens the device-tree passed by the firmware, creating the
  * tree of struct device_node. It also fills the "name" and "type"
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 413e608863dd..a102a0a33ed1 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -80,200 +80,6 @@ extern rwlock_t devtree_lock;	/* temporary while merging */
 /* export that to outside world */
 struct device_node *of_chosen;
 
-static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
-				       unsigned long align)
-{
-	void *res;
-
-	*mem = _ALIGN(*mem, align);
-	res = (void *)*mem;
-	*mem += size;
-
-	return res;
-}
-
-static unsigned long __init unflatten_dt_node(unsigned long mem,
-					      unsigned long *p,
-					      struct device_node *dad,
-					      struct device_node ***allnextpp,
-					      unsigned long fpsize)
-{
-	struct device_node *np;
-	struct property *pp, **prev_pp = NULL;
-	char *pathp;
-	u32 tag;
-	unsigned int l, allocl;
-	int has_name = 0;
-	int new_format = 0;
-
-	tag = *((u32 *)(*p));
-	if (tag != OF_DT_BEGIN_NODE) {
-		printk("Weird tag at start of node: %x\n", tag);
-		return mem;
-	}
-	*p += 4;
-	pathp = (char *)*p;
-	l = allocl = strlen(pathp) + 1;
-	*p = _ALIGN(*p + l, 4);
-
-	/* version 0x10 has a more compact unit name here instead of the full
-	 * path. we accumulate the full path size using "fpsize", we'll rebuild
-	 * it later. We detect this because the first character of the name is
-	 * not '/'.
-	 */
-	if ((*pathp) != '/') {
-		new_format = 1;
-		if (fpsize == 0) {
-			/* root node: special case. fpsize accounts for path
-			 * plus terminating zero. root node only has '/', so
-			 * fpsize should be 2, but we want to avoid the first
-			 * level nodes to have two '/' so we use fpsize 1 here
-			 */
-			fpsize = 1;
-			allocl = 2;
-		} else {
-			/* account for '/' and path size minus terminal 0
-			 * already in 'l'
-			 */
-			fpsize += l;
-			allocl = fpsize;
-		}
-	}
-
-
-	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
-				__alignof__(struct device_node));
-	if (allnextpp) {
-		memset(np, 0, sizeof(*np));
-		np->full_name = ((char*)np) + sizeof(struct device_node);
-		if (new_format) {
-			char *p = np->full_name;
-			/* rebuild full path for new format */
-			if (dad && dad->parent) {
-				strcpy(p, dad->full_name);
-#ifdef DEBUG
-				if ((strlen(p) + l + 1) != allocl) {
-					DBG("%s: p: %d, l: %d, a: %d\n",
-					    pathp, (int)strlen(p), l, allocl);
-				}
-#endif
-				p += strlen(p);
-			}
-			*(p++) = '/';
-			memcpy(p, pathp, l);
-		} else
-			memcpy(np->full_name, pathp, l);
-		prev_pp = &np->properties;
-		**allnextpp = np;
-		*allnextpp = &np->allnext;
-		if (dad != NULL) {
-			np->parent = dad;
-			/* we temporarily use the next field as `last_child'*/
-			if (dad->next == 0)
-				dad->child = np;
-			else
-				dad->next->sibling = np;
-			dad->next = np;
-		}
-		kref_init(&np->kref);
-	}
-	while(1) {
-		u32 sz, noff;
-		char *pname;
-
-		tag = *((u32 *)(*p));
-		if (tag == OF_DT_NOP) {
-			*p += 4;
-			continue;
-		}
-		if (tag != OF_DT_PROP)
-			break;
-		*p += 4;
-		sz = *((u32 *)(*p));
-		noff = *((u32 *)((*p) + 4));
-		*p += 8;
-		if (initial_boot_params->version < 0x10)
-			*p = _ALIGN(*p, sz >= 8 ? 8 : 4);
-
-		pname = find_flat_dt_string(noff);
-		if (pname == NULL) {
-			printk("Can't find property name in list !\n");
-			break;
-		}
-		if (strcmp(pname, "name") == 0)
-			has_name = 1;
-		l = strlen(pname) + 1;
-		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
-					__alignof__(struct property));
-		if (allnextpp) {
-			if (strcmp(pname, "linux,phandle") == 0) {
-				np->node = *((u32 *)*p);
-				if (np->linux_phandle == 0)
-					np->linux_phandle = np->node;
-			}
-			if (strcmp(pname, "ibm,phandle") == 0)
-				np->linux_phandle = *((u32 *)*p);
-			pp->name = pname;
-			pp->length = sz;
-			pp->value = (void *)*p;
-			*prev_pp = pp;
-			prev_pp = &pp->next;
-		}
-		*p = _ALIGN((*p) + sz, 4);
-	}
-	/* with version 0x10 we may not have the name property, recreate
-	 * it here from the unit name if absent
-	 */
-	if (!has_name) {
-		char *p = pathp, *ps = pathp, *pa = NULL;
-		int sz;
-
-		while (*p) {
-			if ((*p) == '@')
-				pa = p;
-			if ((*p) == '/')
-				ps = p + 1;
-			p++;
-		}
-		if (pa < ps)
-			pa = p;
-		sz = (pa - ps) + 1;
-		pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
-					__alignof__(struct property));
-		if (allnextpp) {
-			pp->name = "name";
-			pp->length = sz;
-			pp->value = pp + 1;
-			*prev_pp = pp;
-			prev_pp = &pp->next;
-			memcpy(pp->value, ps, sz - 1);
-			((char *)pp->value)[sz - 1] = 0;
-			DBG("fixed up name for %s -> %s\n", pathp,
-				(char *)pp->value);
-		}
-	}
-	if (allnextpp) {
-		*prev_pp = NULL;
-		np->name = of_get_property(np, "name", NULL);
-		np->type = of_get_property(np, "device_type", NULL);
-
-		if (!np->name)
-			np->name = "<NULL>";
-		if (!np->type)
-			np->type = "<NULL>";
-	}
-	while (tag == OF_DT_BEGIN_NODE) {
-		mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize);
-		tag = *((u32 *)(*p));
-	}
-	if (tag != OF_DT_END_NODE) {
-		printk("Weird tag at end of node: %x\n", tag);
-		return mem;
-	}
-	*p += 4;
-	return mem;
-}
-
 static int __init early_parse_mem(char *p)
 {
 	if (!p)
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 5cdd958db9af..6852ecf6d1e1 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -166,3 +166,203 @@ int __init of_flat_dt_is_compatible(unsigned long node, const char *compat)
 	return 0;
 }
 
+static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size,
+				       unsigned long align)
+{
+	void *res;
+
+	*mem = _ALIGN(*mem, align);
+	res = (void *)*mem;
+	*mem += size;
+
+	return res;
+}
+
+/**
+ * unflatten_dt_node - Alloc and populate a device_node from the flat tree
+ * @p: pointer to node in flat tree
+ * @dad: Parent struct device_node
+ * @allnextpp: pointer to ->allnext from last allocated device_node
+ * @fpsize: Size of the node path up at the current depth.
+ */
+unsigned long __init unflatten_dt_node(unsigned long mem,
+					unsigned long *p,
+					struct device_node *dad,
+					struct device_node ***allnextpp,
+					unsigned long fpsize)
+{
+	struct device_node *np;
+	struct property *pp, **prev_pp = NULL;
+	char *pathp;
+	u32 tag;
+	unsigned int l, allocl;
+	int has_name = 0;
+	int new_format = 0;
+
+	tag = *((u32 *)(*p));
+	if (tag != OF_DT_BEGIN_NODE) {
+		pr_err("Weird tag at start of node: %x\n", tag);
+		return mem;
+	}
+	*p += 4;
+	pathp = (char *)*p;
+	l = allocl = strlen(pathp) + 1;
+	*p = _ALIGN(*p + l, 4);
+
+	/* version 0x10 has a more compact unit name here instead of the full
+	 * path. we accumulate the full path size using "fpsize", we'll rebuild
+	 * it later. We detect this because the first character of the name is
+	 * not '/'.
+	 */
+	if ((*pathp) != '/') {
+		new_format = 1;
+		if (fpsize == 0) {
+			/* root node: special case. fpsize accounts for path
+			 * plus terminating zero. root node only has '/', so
+			 * fpsize should be 2, but we want to avoid the first
+			 * level nodes to have two '/' so we use fpsize 1 here
+			 */
+			fpsize = 1;
+			allocl = 2;
+		} else {
+			/* account for '/' and path size minus terminal 0
+			 * already in 'l'
+			 */
+			fpsize += l;
+			allocl = fpsize;
+		}
+	}
+
+	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
+				__alignof__(struct device_node));
+	if (allnextpp) {
+		memset(np, 0, sizeof(*np));
+		np->full_name = ((char *)np) + sizeof(struct device_node);
+		if (new_format) {
+			char *fn = np->full_name;
+			/* rebuild full path for new format */
+			if (dad && dad->parent) {
+				strcpy(fn, dad->full_name);
+#ifdef DEBUG
+				if ((strlen(fn) + l + 1) != allocl) {
+					pr_debug("%s: p: %d, l: %d, a: %d\n",
+						pathp, (int)strlen(fn),
+						l, allocl);
+				}
+#endif
+				fn += strlen(fn);
+			}
+			*(fn++) = '/';
+			memcpy(fn, pathp, l);
+		} else
+			memcpy(np->full_name, pathp, l);
+		prev_pp = &np->properties;
+		**allnextpp = np;
+		*allnextpp = &np->allnext;
+		if (dad != NULL) {
+			np->parent = dad;
+			/* we temporarily use the next field as `last_child'*/
+			if (dad->next == NULL)
+				dad->child = np;
+			else
+				dad->next->sibling = np;
+			dad->next = np;
+		}
+		kref_init(&np->kref);
+	}
+	while (1) {
+		u32 sz, noff;
+		char *pname;
+
+		tag = *((u32 *)(*p));
+		if (tag == OF_DT_NOP) {
+			*p += 4;
+			continue;
+		}
+		if (tag != OF_DT_PROP)
+			break;
+		*p += 4;
+		sz = *((u32 *)(*p));
+		noff = *((u32 *)((*p) + 4));
+		*p += 8;
+		if (initial_boot_params->version < 0x10)
+			*p = _ALIGN(*p, sz >= 8 ? 8 : 4);
+
+		pname = find_flat_dt_string(noff);
+		if (pname == NULL) {
+			pr_info("Can't find property name in list !\n");
+			break;
+		}
+		if (strcmp(pname, "name") == 0)
+			has_name = 1;
+		l = strlen(pname) + 1;
+		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
+					__alignof__(struct property));
+		if (allnextpp) {
+			if (strcmp(pname, "linux,phandle") == 0) {
+				np->node = *((u32 *)*p);
+				if (np->linux_phandle == 0)
+					np->linux_phandle = np->node;
+			}
+			if (strcmp(pname, "ibm,phandle") == 0)
+				np->linux_phandle = *((u32 *)*p);
+			pp->name = pname;
+			pp->length = sz;
+			pp->value = (void *)*p;
+			*prev_pp = pp;
+			prev_pp = &pp->next;
+		}
+		*p = _ALIGN((*p) + sz, 4);
+	}
+	/* with version 0x10 we may not have the name property, recreate
+	 * it here from the unit name if absent
+	 */
+	if (!has_name) {
+		char *p1 = pathp, *ps = pathp, *pa = NULL;
+		int sz;
+
+		while (*p1) {
+			if ((*p1) == '@')
+				pa = p1;
+			if ((*p1) == '/')
+				ps = p1 + 1;
+			p1++;
+		}
+		if (pa < ps)
+			pa = p1;
+		sz = (pa - ps) + 1;
+		pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
+					__alignof__(struct property));
+		if (allnextpp) {
+			pp->name = "name";
+			pp->length = sz;
+			pp->value = pp + 1;
+			*prev_pp = pp;
+			prev_pp = &pp->next;
+			memcpy(pp->value, ps, sz - 1);
+			((char *)pp->value)[sz - 1] = 0;
+			pr_debug("fixed up name for %s -> %s\n", pathp,
+				(char *)pp->value);
+		}
+	}
+	if (allnextpp) {
+		*prev_pp = NULL;
+		np->name = of_get_property(np, "name", NULL);
+		np->type = of_get_property(np, "device_type", NULL);
+
+		if (!np->name)
+			np->name = "<NULL>";
+		if (!np->type)
+			np->type = "<NULL>";
+	}
+	while (tag == OF_DT_BEGIN_NODE) {
+		mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize);
+		tag = *((u32 *)(*p));
+	}
+	if (tag != OF_DT_END_NODE) {
+		pr_err("Weird tag at end of node: %x\n", tag);
+		return mem;
+	}
+	*p += 4;
+	return mem;
+}
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 81231e04e8f3..ace9068e07e8 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -69,6 +69,10 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 				 unsigned long *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern unsigned long of_get_flat_dt_root(void);
+extern unsigned long unflatten_dt_node(unsigned long mem, unsigned long *p,
+					struct device_node *dad,
+					struct device_node ***allnextpp,
+					unsigned long fpsize);
 
 /* Other Prototypes */
 extern void finish_device_tree(void);
-- 
cgit v1.2.3


From 41f880091c15b039ffcc8b3d831656b81517a6d3 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Mon, 23 Nov 2009 20:07:01 -0700
Subject: of/flattree: Merge unflatten_device_tree

Merge common code between PowerPC and MicroBlaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Tested-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/include/asm/prom.h |  1 -
 arch/microblaze/kernel/prom.c      | 49 -----------------------------------
 arch/powerpc/kernel/prom.c         | 50 ------------------------------------
 drivers/of/fdt.c                   | 52 ++++++++++++++++++++++++++++++++++++++
 include/linux/of.h                 |  3 +++
 include/linux/of_fdt.h             |  4 ---
 6 files changed, 55 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index ef3ec1d6ceb3..07d1063f9aae 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -37,7 +37,6 @@ extern struct device_node *of_chosen;
 
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
-extern struct device_node *allnodes;	/* temporary while merging */
 extern rwlock_t devtree_lock;	/* temporary while merging */
 
 /* For updating the device tree at runtime */
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 021770abfbd7..901d538c15ef 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -50,55 +50,6 @@ typedef u32 cell_t;
 /* export that to outside world */
 struct device_node *of_chosen;
 
-/**
- * unflattens the device-tree passed by the firmware, creating the
- * tree of struct device_node. It also fills the "name" and "type"
- * pointers of the nodes so the normal device-tree walking functions
- * can be used (this used to be done by finish_device_tree)
- */
-void __init unflatten_device_tree(void)
-{
-	unsigned long start, mem, size;
-	struct device_node **allnextp = &allnodes;
-
-	pr_debug(" -> unflatten_device_tree()\n");
-
-	/* First pass, scan for size */
-	start = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
-	size = unflatten_dt_node(0, &start, NULL, NULL, 0);
-	size = (size | 3) + 1;
-
-	pr_debug("  size is %lx, allocating...\n", size);
-
-	/* Allocate memory for the expanded device tree */
-	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
-	mem = (unsigned long) __va(mem);
-
-	((u32 *)mem)[size / 4] = 0xdeadbeef;
-
-	pr_debug("  unflattening %lx...\n", mem);
-
-	/* Second pass, do actual unflattening */
-	start = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
-	unflatten_dt_node(mem, &start, NULL, &allnextp, 0);
-	if (*((u32 *)start) != OF_DT_END)
-		printk(KERN_WARNING "Weird tag at end of tree: %08x\n",
-			*((u32 *)start));
-	if (((u32 *)mem)[size / 4] != 0xdeadbeef)
-		printk(KERN_WARNING "End of tree marker overwritten: %08x\n",
-			((u32 *)mem)[size / 4]);
-	*allnextp = NULL;
-
-	/* Get pointer to OF "/chosen" node for use everywhere */
-	of_chosen = of_find_node_by_path("/chosen");
-	if (of_chosen == NULL)
-		of_chosen = of_find_node_by_path("/chosen@0");
-
-	pr_debug(" <- unflatten_device_tree()\n");
-}
-
 #define early_init_dt_scan_drconf_memory(node) 0
 
 static int __init early_init_dt_scan_cpus(unsigned long node,
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index a102a0a33ed1..1280f3484ad3 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -73,8 +73,6 @@ unsigned long tce_alloc_start, tce_alloc_end;
 
 typedef u32 cell_t;
 
-extern struct device_node *allnodes;	/* temporary while merging */
-
 extern rwlock_t devtree_lock;	/* temporary while merging */
 
 /* export that to outside world */
@@ -119,54 +117,6 @@ static void __init move_device_tree(void)
 	DBG("<- move_device_tree\n");
 }
 
-/**
- * unflattens the device-tree passed by the firmware, creating the
- * tree of struct device_node. It also fills the "name" and "type"
- * pointers of the nodes so the normal device-tree walking functions
- * can be used (this used to be done by finish_device_tree)
- */
-void __init unflatten_device_tree(void)
-{
-	unsigned long start, mem, size;
-	struct device_node **allnextp = &allnodes;
-
-	DBG(" -> unflatten_device_tree()\n");
-
-	/* First pass, scan for size */
-	start = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
-	size = unflatten_dt_node(0, &start, NULL, NULL, 0);
-	size = (size | 3) + 1;
-
-	DBG("  size is %lx, allocating...\n", size);
-
-	/* Allocate memory for the expanded device tree */
-	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
-	mem = (unsigned long) __va(mem);
-
-	((u32 *)mem)[size / 4] = 0xdeadbeef;
-
-	DBG("  unflattening %lx...\n", mem);
-
-	/* Second pass, do actual unflattening */
-	start = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
-	unflatten_dt_node(mem, &start, NULL, &allnextp, 0);
-	if (*((u32 *)start) != OF_DT_END)
-		printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start));
-	if (((u32 *)mem)[size / 4] != 0xdeadbeef)
-		printk(KERN_WARNING "End of tree marker overwritten: %08x\n",
-		       ((u32 *)mem)[size / 4] );
-	*allnextp = NULL;
-
-	/* Get pointer to OF "/chosen" node for use everywhere */
-	of_chosen = of_find_node_by_path("/chosen");
-	if (of_chosen == NULL)
-		of_chosen = of_find_node_by_path("/chosen@0");
-
-	DBG(" <- unflatten_device_tree()\n");
-}
-
 /*
  * ibm,pa-features is a per-cpu property that contains a string of
  * attribute descriptors, each of which has a 2 byte header plus up
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 6852ecf6d1e1..43d236cbc17b 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -9,6 +9,8 @@
  * version 2 as published by the Free Software Foundation.
  */
 
+#include <linux/kernel.h>
+#include <linux/lmb.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 
@@ -366,3 +368,53 @@ unsigned long __init unflatten_dt_node(unsigned long mem,
 	*p += 4;
 	return mem;
 }
+
+/**
+ * unflatten_device_tree - create tree of device_nodes from flat blob
+ *
+ * unflattens the device-tree passed by the firmware, creating the
+ * tree of struct device_node. It also fills the "name" and "type"
+ * pointers of the nodes so the normal device-tree walking functions
+ * can be used.
+ */
+void __init unflatten_device_tree(void)
+{
+	unsigned long start, mem, size;
+	struct device_node **allnextp = &allnodes;
+
+	pr_debug(" -> unflatten_device_tree()\n");
+
+	/* First pass, scan for size */
+	start = ((unsigned long)initial_boot_params) +
+		initial_boot_params->off_dt_struct;
+	size = unflatten_dt_node(0, &start, NULL, NULL, 0);
+	size = (size | 3) + 1;
+
+	pr_debug("  size is %lx, allocating...\n", size);
+
+	/* Allocate memory for the expanded device tree */
+	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
+	mem = (unsigned long) __va(mem);
+
+	((u32 *)mem)[size / 4] = 0xdeadbeef;
+
+	pr_debug("  unflattening %lx...\n", mem);
+
+	/* Second pass, do actual unflattening */
+	start = ((unsigned long)initial_boot_params) +
+		initial_boot_params->off_dt_struct;
+	unflatten_dt_node(mem, &start, NULL, &allnextp, 0);
+	if (*((u32 *)start) != OF_DT_END)
+		pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start));
+	if (((u32 *)mem)[size / 4] != 0xdeadbeef)
+		pr_warning("End of tree marker overwritten: %08x\n",
+			   ((u32 *)mem)[size / 4]);
+	*allnextp = NULL;
+
+	/* Get pointer to OF "/chosen" node for use everywhere */
+	of_chosen = of_find_node_by_path("/chosen");
+	if (of_chosen == NULL)
+		of_chosen = of_find_node_by_path("/chosen@0");
+
+	pr_debug(" <- unflatten_device_tree()\n");
+}
diff --git a/include/linux/of.h b/include/linux/of.h
index e7facd8fbce8..bec215792c4f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -63,6 +63,9 @@ struct device_node {
 #endif
 };
 
+/* Pointer for first entry in chain of all nodes. */
+extern struct device_node *allnodes;
+
 static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
 {
 	return test_bit(flag, &n->_flags);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index ace9068e07e8..81231e04e8f3 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -69,10 +69,6 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 				 unsigned long *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern unsigned long of_get_flat_dt_root(void);
-extern unsigned long unflatten_dt_node(unsigned long mem, unsigned long *p,
-					struct device_node *dad,
-					struct device_node ***allnextpp,
-					unsigned long fpsize);
 
 /* Other Prototypes */
 extern void finish_device_tree(void);
-- 
cgit v1.2.3


From 2be09cb993826b52c9fc1d44747c20dd43a50038 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Mon, 23 Nov 2009 20:16:46 -0700
Subject: of: remove special case definition of of_read_ulong()

Special case of of_read_ulong() was defined for PPC32 to toss away
all but the last 32 bits when a large number value was read, and the
'normal' version for ppc64 just #defined of_read_ulong to of_read_number
which causes compiler warnings on MicroBlaze and other 32 bit
architectures because it returns a u64 instead of a ulong.

This patch fixes the problem by defining a common implementation of
of_read_ulong() that works everywhere.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Tested-by: Michal Simek <monstr@monstr.eu>
---
 include/linux/of.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index bec215792c4f..d4c014a35ea5 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -113,14 +113,11 @@ static inline u64 of_read_number(const u32 *cell, int size)
 }
 
 /* Like of_read_number, but we want an unsigned long result */
-#ifdef CONFIG_PPC32
 static inline unsigned long of_read_ulong(const u32 *cell, int size)
 {
-	return cell[size-1];
+	/* toss away upper bits if unsigned long is smaller than u64 */
+	return of_read_number(cell, size);
 }
-#else
-#define of_read_ulong(cell, size)	of_read_number(cell, size)
-#endif
 
 #include <asm/prom.h>
 
-- 
cgit v1.2.3


From 3cf602532c535ec655725e9833378e04c9fd7783 Mon Sep 17 00:00:00 2001
From: Amul Kumar Saha <amul.saha@samsung.com>
Date: Wed, 21 Oct 2009 17:00:05 +0530
Subject: mtd: OneNAND OTP support rework

What is OTP in OneNAND?
The device includes,
1. one block-sized OTP (One Time Programmable) area and
2. user-controlled 1st block OTP(Block 0)
that can be used to increase system security or to provide
identification capabilities.

What is done?
In OneNAND, one block of the NAND Array is set aside as an OTP
memory area, and 1st Block (Block 0) can be used as OTP area.
This area, available to the user, can be configured and locked
with secured user information. The OTP block can be read,
programmed and locked using the same operations as any other NAND
Flash Array memory block. After issuing an OTP-Lock, OTP block
cannot be erased. OTP block is fully-guaranteed to be a good
block.

Why it is done?
Locking the 1st Block OTP has the effect of a 'Write-protect' to
guard against accidental re-programming of data stored in the 1st
block and OTP Block.

Which problem it solves?
OTP support is provided in the existing implementation of
OneNAND/Flex-OneNAND driver, but it is not working with OneNAND
devices. Have observed the following in current OTP OneNAND Implmentation,
1. DataSheet specific sequence to lock the OTP Area is not followed.
2. Certain functions are quiet generic to cope with OTP specific activity.
This patch re-implements OTP support for OneNAND device.

How it is done?
For all blocks, 8th word is available to the user.
However, in case of OTP Block, 8th word of sector 0, page 0 is reserved as
OTP Locking Bit area. Therefore, in case of OTP Block, user usage on this
area is prohibited. Condition specific values are entered in the 8th word,
sector0, page 0 of the OTP block during the process of issuing an OTP-Lock.
The possible conditions are:
1. Only 1st Block Lock
2. Only OTP Block Lock
3. Lock both the 1st Block and the OTP Block

What Other feature additions have been done in this patch?
This patch adds feature for:
1. Only 1st Block Lock
2. Lock both the 1st Block and the OTP Blocks

Re-implemented OTP support for OneNAND
Added following features to OneNAND
	1. Lock only 1st Block in OneNAND
	2. Lock BOTH 1st Block and OTP Block in OneNAND

[comments were slightly tweaked by Artem]

Signed-off-by: Amul Kumar Saha <amul.saha@samsung.com>
Reviewed-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/onenand_base.c | 292 +++++++++++++++++++++++++++++++++----
 include/linux/mtd/onenand.h        |   4 +-
 2 files changed, 264 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 6e250f3a4a16..7bd6ad3ff30a 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1,17 +1,19 @@
 /*
  *  linux/drivers/mtd/onenand/onenand_base.c
  *
- *  Copyright (C) 2005-2007 Samsung Electronics
+ *  Copyright © 2005-2009 Samsung Electronics
+ *  Copyright © 2007 Nokia Corporation
+ *
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *
  *  Credits:
  *	Adrian Hunter <ext-adrian.hunter@nokia.com>:
  *	auto-placement support, read-while load support, various fixes
- *	Copyright (C) Nokia Corporation, 2007
  *
  *	Vishak G <vishak.g at samsung.com>, Rohit Hagargundgi <h.rohit at samsung.com>
  *	Flex-OneNAND support
- *	Copyright (C) Samsung Electronics, 2008
+ *	Amul Kumar Saha <amul.saha at samsung.com>
+ *	OTP support
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -43,6 +45,18 @@ MODULE_PARM_DESC(flex_bdry,	"SLC Boundary information for Flex-OneNAND"
 				"    : 0->Set boundary in unlocked status"
 				"    : 1->Set boundary in locked status");
 
+/* Default OneNAND/Flex-OneNAND OTP options*/
+static int otp;
+
+module_param(otp, int, 0400);
+MODULE_PARM_DESC(otp,	"Corresponding behaviour of OneNAND in OTP"
+			"Syntax : otp=LOCK_TYPE"
+			"LOCK_TYPE : Keys issued, for specific OTP Lock type"
+			"	   : 0 -> Default (No Blocks Locked)"
+			"	   : 1 -> OTP Block lock"
+			"	   : 2 -> 1st Block lock"
+			"	   : 3 -> BOTH OTP Block and 1st Block lock");
+
 /**
  *  onenand_oob_128 - oob info for Flex-Onenand with 4KB page
  *  For now, we expose only 64 out of 80 ecc bytes
@@ -2591,6 +2605,208 @@ static void onenand_unlock_all(struct mtd_info *mtd)
 
 #ifdef CONFIG_MTD_ONENAND_OTP
 
+/**
+ * onenand_otp_command - Send OTP specific command to OneNAND device
+ * @param mtd	 MTD device structure
+ * @param cmd	 the command to be sent
+ * @param addr	 offset to read from or write to
+ * @param len	 number of bytes to read or write
+ */
+static int onenand_otp_command(struct mtd_info *mtd, int cmd, loff_t addr,
+				size_t len)
+{
+	struct onenand_chip *this = mtd->priv;
+	int value, block, page;
+
+	/* Address translation */
+	switch (cmd) {
+	case ONENAND_CMD_OTP_ACCESS:
+		block = (int) (addr >> this->erase_shift);
+		page = -1;
+		break;
+
+	default:
+		block = (int) (addr >> this->erase_shift);
+		page = (int) (addr >> this->page_shift);
+
+		if (ONENAND_IS_2PLANE(this)) {
+			/* Make the even block number */
+			block &= ~1;
+			/* Is it the odd plane? */
+			if (addr & this->writesize)
+				block++;
+			page >>= 1;
+		}
+		page &= this->page_mask;
+		break;
+	}
+
+	if (block != -1) {
+		/* Write 'DFS, FBA' of Flash */
+		value = onenand_block_address(this, block);
+		this->write_word(value, this->base +
+				ONENAND_REG_START_ADDRESS1);
+	}
+
+	if (page != -1) {
+		/* Now we use page size operation */
+		int sectors = 4, count = 4;
+		int dataram;
+
+		switch (cmd) {
+		default:
+			if (ONENAND_IS_2PLANE(this) && cmd == ONENAND_CMD_PROG)
+				cmd = ONENAND_CMD_2X_PROG;
+			dataram = ONENAND_CURRENT_BUFFERRAM(this);
+			break;
+		}
+
+		/* Write 'FPA, FSA' of Flash */
+		value = onenand_page_address(page, sectors);
+		this->write_word(value, this->base +
+				ONENAND_REG_START_ADDRESS8);
+
+		/* Write 'BSA, BSC' of DataRAM */
+		value = onenand_buffer_address(dataram, sectors, count);
+		this->write_word(value, this->base + ONENAND_REG_START_BUFFER);
+	}
+
+	/* Interrupt clear */
+	this->write_word(ONENAND_INT_CLEAR, this->base + ONENAND_REG_INTERRUPT);
+
+	/* Write command */
+	this->write_word(cmd, this->base + ONENAND_REG_COMMAND);
+
+	return 0;
+}
+
+/**
+ * onenand_otp_write_oob_nolock - [Internal] OneNAND write out-of-band, specific to OTP
+ * @param mtd		MTD device structure
+ * @param to		offset to write to
+ * @param len		number of bytes to write
+ * @param retlen	pointer to variable to store the number of written bytes
+ * @param buf		the data to write
+ *
+ * OneNAND write out-of-band only for OTP
+ */
+static int onenand_otp_write_oob_nolock(struct mtd_info *mtd, loff_t to,
+				    struct mtd_oob_ops *ops)
+{
+	struct onenand_chip *this = mtd->priv;
+	int column, ret = 0, oobsize;
+	int written = 0;
+	u_char *oobbuf;
+	size_t len = ops->ooblen;
+	const u_char *buf = ops->oobbuf;
+	int block, value, status;
+
+	to += ops->ooboffs;
+
+	/* Initialize retlen, in case of early exit */
+	ops->oobretlen = 0;
+
+	oobsize = mtd->oobsize;
+
+	column = to & (mtd->oobsize - 1);
+
+	oobbuf = this->oob_buf;
+
+	/* Loop until all data write */
+	while (written < len) {
+		int thislen = min_t(int, oobsize, len - written);
+
+		cond_resched();
+
+		block = (int) (to >> this->erase_shift);
+		/*
+		 * Write 'DFS, FBA' of Flash
+		 * Add: F100h DQ=DFS, FBA
+		 */
+
+		value = onenand_block_address(this, block);
+		this->write_word(value, this->base +
+				ONENAND_REG_START_ADDRESS1);
+
+		/*
+		 * Select DataRAM for DDP
+		 * Add: F101h DQ=DBS
+		 */
+
+		value = onenand_bufferram_address(this, block);
+		this->write_word(value, this->base +
+				ONENAND_REG_START_ADDRESS2);
+		ONENAND_SET_NEXT_BUFFERRAM(this);
+
+		/*
+		 * Enter OTP access mode
+		 */
+		this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
+		this->wait(mtd, FL_OTPING);
+
+		/* We send data to spare ram with oobsize
+		 * to prevent byte access */
+		memcpy(oobbuf + column, buf, thislen);
+
+		/*
+		 * Write Data into DataRAM
+		 * Add: 8th Word
+		 * in sector0/spare/page0
+		 * DQ=XXFCh
+		 */
+		this->write_bufferram(mtd, ONENAND_SPARERAM,
+					oobbuf, 0, mtd->oobsize);
+
+		onenand_otp_command(mtd, ONENAND_CMD_PROGOOB, to, mtd->oobsize);
+		onenand_update_bufferram(mtd, to, 0);
+		if (ONENAND_IS_2PLANE(this)) {
+			ONENAND_SET_BUFFERRAM1(this);
+			onenand_update_bufferram(mtd, to + this->writesize, 0);
+		}
+
+		ret = this->wait(mtd, FL_WRITING);
+		if (ret) {
+			printk(KERN_ERR "%s: write failed %d\n", __func__, ret);
+			break;
+		}
+
+		/* Exit OTP access mode */
+		this->command(mtd, ONENAND_CMD_RESET, 0, 0);
+		this->wait(mtd, FL_RESETING);
+
+		status = this->read_word(this->base + ONENAND_REG_CTRL_STATUS);
+		status &= 0x60;
+
+		if (status == 0x60) {
+			printk(KERN_DEBUG "\nBLOCK\tSTATUS\n");
+			printk(KERN_DEBUG "1st Block\tLOCKED\n");
+			printk(KERN_DEBUG "OTP Block\tLOCKED\n");
+		} else if (status == 0x20) {
+			printk(KERN_DEBUG "\nBLOCK\tSTATUS\n");
+			printk(KERN_DEBUG "1st Block\tLOCKED\n");
+			printk(KERN_DEBUG "OTP Block\tUN-LOCKED\n");
+		} else if (status == 0x40) {
+			printk(KERN_DEBUG "\nBLOCK\tSTATUS\n");
+			printk(KERN_DEBUG "1st Block\tUN-LOCKED\n");
+			printk(KERN_DEBUG "OTP Block\tLOCKED\n");
+		} else {
+			printk(KERN_DEBUG "Reboot to check\n");
+		}
+
+		written += thislen;
+		if (written == len)
+			break;
+
+		to += mtd->writesize;
+		buf += thislen;
+		column = 0;
+	}
+
+	ops->oobretlen = written;
+
+	return ret;
+}
+
 /* Internal OTP operation */
 typedef int (*otp_op_t)(struct mtd_info *mtd, loff_t form, size_t len,
 		size_t *retlen, u_char *buf);
@@ -2693,11 +2909,11 @@ static int do_otp_lock(struct mtd_info *mtd, loff_t from, size_t len,
 	struct mtd_oob_ops ops;
 	int ret;
 
-	/* Enter OTP access mode */
-	this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
-	this->wait(mtd, FL_OTPING);
-
 	if (FLEXONENAND(this)) {
+
+		/* Enter OTP access mode */
+		this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
+		this->wait(mtd, FL_OTPING);
 		/*
 		 * For Flex-OneNAND, we write lock mark to 1st word of sector 4 of
 		 * main area of page 49.
@@ -2708,19 +2924,19 @@ static int do_otp_lock(struct mtd_info *mtd, loff_t from, size_t len,
 		ops.oobbuf = NULL;
 		ret = onenand_write_ops_nolock(mtd, mtd->writesize * 49, &ops);
 		*retlen = ops.retlen;
+
+		/* Exit OTP access mode */
+		this->command(mtd, ONENAND_CMD_RESET, 0, 0);
+		this->wait(mtd, FL_RESETING);
 	} else {
 		ops.mode = MTD_OOB_PLACE;
 		ops.ooblen = len;
 		ops.oobbuf = buf;
 		ops.ooboffs = 0;
-		ret = onenand_write_oob_nolock(mtd, from, &ops);
+		ret = onenand_otp_write_oob_nolock(mtd, from, &ops);
 		*retlen = ops.oobretlen;
 	}
 
-	/* Exit OTP access mode */
-	this->command(mtd, ONENAND_CMD_RESET, 0, 0);
-	this->wait(mtd, FL_RESETING);
-
 	return ret;
 }
 
@@ -2751,16 +2967,21 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
 	if (density < ONENAND_DEVICE_DENSITY_512Mb)
 		otp_pages = 20;
 	else
-		otp_pages = 10;
+		otp_pages = 50;
 
 	if (mode == MTD_OTP_FACTORY) {
 		from += mtd->writesize * otp_pages;
-		otp_pages = 64 - otp_pages;
+		otp_pages = ONENAND_PAGES_PER_BLOCK - otp_pages;
 	}
 
 	/* Check User/Factory boundary */
-	if (((mtd->writesize * otp_pages) - (from + len)) < 0)
-		return 0;
+	if (mode == MTD_OTP_USER) {
+		if (((mtd->writesize * otp_pages) - (from + len)) < 0)
+			return 0;
+	} else {
+		if (((mtd->writesize * otp_pages) - len) < 0)
+			return 0;
+	}
 
 	onenand_get_device(mtd, FL_OTPING);
 	while (len > 0 && otp_pages > 0) {
@@ -2783,13 +3004,12 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
 			*retlen += sizeof(struct otp_info);
 		} else {
 			size_t tmp_retlen;
-			int size = len;
 
 			ret = action(mtd, from, len, &tmp_retlen, buf);
 
-			buf += size;
-			len -= size;
-			*retlen += size;
+			buf += tmp_retlen;
+			len -= tmp_retlen;
+			*retlen += tmp_retlen;
 
 			if (ret)
 				break;
@@ -2902,20 +3122,10 @@ static int onenand_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	u_char *buf = FLEXONENAND(this) ? this->page_buf : this->oob_buf;
 	size_t retlen;
 	int ret;
+	unsigned int otp_lock_offset = ONENAND_OTP_LOCK_OFFSET;
 
 	memset(buf, 0xff, FLEXONENAND(this) ? this->writesize
 						 : mtd->oobsize);
-	/*
-	 * Note: OTP lock operation
-	 *       OTP block : 0xXXFC
-	 *       1st block : 0xXXF3 (If chip support)
-	 *       Both      : 0xXXF0 (If chip support)
-	 */
-	if (FLEXONENAND(this))
-		buf[FLEXONENAND_OTP_LOCK_OFFSET] = 0xFC;
-	else
-		buf[ONENAND_OTP_LOCK_OFFSET] = 0xFC;
-
 	/*
 	 * Write lock mark to 8th word of sector0 of page0 of the spare0.
 	 * We write 16 bytes spare area instead of 2 bytes.
@@ -2926,10 +3136,30 @@ static int onenand_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	from = 0;
 	len = FLEXONENAND(this) ? mtd->writesize : 16;
 
+	/*
+	 * Note: OTP lock operation
+	 *       OTP block : 0xXXFC			XX 1111 1100
+	 *       1st block : 0xXXF3 (If chip support)	XX 1111 0011
+	 *       Both      : 0xXXF0 (If chip support)	XX 1111 0000
+	 */
+	if (FLEXONENAND(this))
+		otp_lock_offset = FLEXONENAND_OTP_LOCK_OFFSET;
+
+	/* ONENAND_OTP_AREA | ONENAND_OTP_BLOCK0 | ONENAND_OTP_AREA_BLOCK0 */
+	if (otp == 1)
+		buf[otp_lock_offset] = 0xFC;
+	else if (otp == 2)
+		buf[otp_lock_offset] = 0xF3;
+	else if (otp == 3)
+		buf[otp_lock_offset] = 0xF0;
+	else if (otp != 0)
+		printk(KERN_DEBUG "[OneNAND] Invalid option selected for OTP\n");
+
 	ret = onenand_otp_walk(mtd, from, len, &retlen, buf, do_otp_lock, MTD_OTP_USER);
 
 	return ret ? : retlen;
 }
+
 #endif	/* CONFIG_MTD_ONENAND_OTP */
 
 /**
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index f57e29e17bb0..5509eb06b326 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -1,7 +1,7 @@
 /*
  *  linux/include/linux/mtd/onenand.h
  *
- *  Copyright (C) 2005-2007 Samsung Electronics
+ *  Copyright © 2005-2009 Samsung Electronics
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -137,6 +137,8 @@ struct onenand_chip {
 /*
  * Helper macros
  */
+#define ONENAND_PAGES_PER_BLOCK        (1<<6)
+
 #define ONENAND_CURRENT_BUFFERRAM(this)		(this->bufferram_index)
 #define ONENAND_NEXT_BUFFERRAM(this)		(this->bufferram_index ^ 1)
 #define ONENAND_SET_NEXT_BUFFERRAM(this)	(this->bufferram_index ^= 1)
-- 
cgit v1.2.3


From 1c63aca32903efc219fb9df72bae5344f3e54ed5 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Thu, 22 Oct 2009 16:53:32 +0900
Subject: mtd: Add __nand_calculate_ecc() to NAND ECC functions

Add __nand_calculate_ecc() which does not take struct mtd_info.
The built-in 256/512 software ECC calculation and correction tester
will use it.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ecc.c  | 25 ++++++++++++++++++++-----
 include/linux/mtd/nand_ecc.h | 10 ++++++++--
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index db7ae9d6a296..809fb53304ae 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -150,20 +150,19 @@ static const char addressbits[256] = {
 };
 
 /**
- * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
+ * __nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
  *			 block
- * @mtd:	MTD block structure
  * @buf:	input buffer with raw data
+ * @eccsize:	data bytes per ecc step (256 or 512)
  * @code:	output buffer with ECC
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+void __nand_calculate_ecc(const unsigned char *buf, unsigned int eccsize,
 		       unsigned char *code)
 {
 	int i;
 	const uint32_t *bp = (uint32_t *)buf;
 	/* 256 or 512 bytes/ecc  */
-	const uint32_t eccsize_mult =
-			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+	const uint32_t eccsize_mult = eccsize >> 8;
 	uint32_t cur;		/* current value in buffer */
 	/* rp0..rp15..rp17 are the various accumulated parities (per byte) */
 	uint32_t rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
@@ -412,6 +411,22 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 		    (invparity[par & 0x55] << 2) |
 		    (invparity[rp17] << 1) |
 		    (invparity[rp16] << 0);
+}
+EXPORT_SYMBOL(__nand_calculate_ecc);
+
+/**
+ * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
+ *			 block
+ * @mtd:	MTD block structure
+ * @buf:	input buffer with raw data
+ * @code:	output buffer with ECC
+ */
+int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+		       unsigned char *code)
+{
+	__nand_calculate_ecc(buf,
+			((struct nand_chip *)mtd->priv)->ecc.size, code);
+
 	return 0;
 }
 EXPORT_SYMBOL(nand_calculate_ecc);
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index 052ea8ca2434..41bc013571d0 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -16,7 +16,13 @@
 struct mtd_info;
 
 /*
- * Calculate 3 byte ECC code for 256 byte block
+ * Calculate 3 byte ECC code for eccsize byte block
+ */
+void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize,
+				u_char *ecc_code);
+
+/*
+ * Calculate 3 byte ECC code for 256/512 byte block
  */
 int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code);
 
@@ -27,7 +33,7 @@ int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc,
 			unsigned int eccsize);
 
 /*
- * Detect and correct a 1 bit error for 256 byte block
+ * Detect and correct a 1 bit error for 256/512 byte block
  */
 int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc);
 
-- 
cgit v1.2.3


From 72073027ee95d059eb5a064da4a978efab36d4ab Mon Sep 17 00:00:00 2001
From: Mika Korhonen <ext-mika.2.korhonen@nokia.com>
Date: Fri, 23 Oct 2009 07:50:43 +0200
Subject: mtd: OneNAND: multiblock erase support

Add support for multiblock erase command. OneNANDs (excluding Flex-OneNAND)
are capable of simultaneous erase of up to 64 eraseblocks which is much faster.

This changes the erase requests for regions covering multiple eraseblocks
to be performed using multiblock erase.

Signed-off-by: Mika Korhonen <ext-mika.2.korhonen@nokia.com>
Reviewed-by: Adrian Hunter <adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/omap2.c        |  22 ++++-
 drivers/mtd/onenand/onenand_base.c | 173 ++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/flashchip.h      |   4 +-
 include/linux/mtd/onenand_regs.h   |   2 +
 4 files changed, 194 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 0108ed42e877..2dafd0949be5 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -112,10 +112,24 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state)
 	unsigned long timeout;
 	u32 syscfg;
 
-	if (state == FL_RESETING) {
-		int i;
+	if (state == FL_RESETING || state == FL_PREPARING_ERASE ||
+	    state == FL_VERIFYING_ERASE) {
+		int i = 21;
+		unsigned int intr_flags = ONENAND_INT_MASTER;
+
+		switch (state) {
+		case FL_RESETING:
+			intr_flags |= ONENAND_INT_RESET;
+			break;
+		case FL_PREPARING_ERASE:
+			intr_flags |= ONENAND_INT_ERASE;
+			break;
+		case FL_VERIFYING_ERASE:
+			i = 101;
+			break;
+		}
 
-		for (i = 0; i < 20; i++) {
+		while (--i) {
 			udelay(1);
 			intr = read_reg(c, ONENAND_REG_INTERRUPT);
 			if (intr & ONENAND_INT_MASTER)
@@ -126,7 +140,7 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state)
 			wait_err("controller error", state, ctrl, intr);
 			return -EIO;
 		}
-		if (!(intr & ONENAND_INT_RESET)) {
+		if ((intr & intr_flags) != intr_flags) {
 			wait_err("timeout", state, ctrl, intr);
 			return -EIO;
 		}
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 894ebadc3e69..266f471901e5 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -34,6 +34,13 @@
 
 #include <asm/io.h>
 
+/*
+ * Multiblock erase if number of blocks to erase is 2 or more.
+ * Maximum number of blocks for simultaneous erase is 64.
+ */
+#define MB_ERASE_MIN_BLK_COUNT 2
+#define MB_ERASE_MAX_BLK_COUNT 64
+
 /* Default Flex-OneNAND boundary and lock respectively */
 static int flex_bdry[MAX_DIES * 2] = { -1, 0, -1, 0 };
 
@@ -353,6 +360,8 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 		break;
 
 	case ONENAND_CMD_ERASE:
+	case ONENAND_CMD_MULTIBLOCK_ERASE:
+	case ONENAND_CMD_ERASE_VERIFY:
 	case ONENAND_CMD_BUFFERRAM:
 	case ONENAND_CMD_OTP_ACCESS:
 		block = onenand_block(this, addr);
@@ -497,7 +506,7 @@ static int onenand_wait(struct mtd_info *mtd, int state)
 		if (interrupt & flags)
 			break;
 
-		if (state != FL_READING)
+		if (state != FL_READING && state != FL_PREPARING_ERASE)
 			cond_resched();
 	}
 	/* To get correct interrupt status in timeout case */
@@ -530,6 +539,18 @@ static int onenand_wait(struct mtd_info *mtd, int state)
 		return -EIO;
 	}
 
+	if (state == FL_PREPARING_ERASE && !(interrupt & ONENAND_INT_ERASE)) {
+		printk(KERN_ERR "%s: mb erase timeout! ctrl=0x%04x intr=0x%04x\n",
+		       __func__, ctrl, interrupt);
+		return -EIO;
+	}
+
+	if (!(interrupt & ONENAND_INT_MASTER)) {
+		printk(KERN_ERR "%s: timeout! ctrl=0x%04x intr=0x%04x\n",
+		       __func__, ctrl, interrupt);
+		return -EIO;
+	}
+
 	/* If there's controller error, it's a real error */
 	if (ctrl & ONENAND_CTRL_ERROR) {
 		printk(KERN_ERR "%s: controller error = 0x%04x\n",
@@ -2182,6 +2203,148 @@ static int onenand_block_isbad_nolock(struct mtd_info *mtd, loff_t ofs, int allo
 	return bbm->isbad_bbt(mtd, ofs, allowbbt);
 }
 
+
+static int onenand_multiblock_erase_verify(struct mtd_info *mtd,
+					   struct erase_info *instr)
+{
+	struct onenand_chip *this = mtd->priv;
+	loff_t addr = instr->addr;
+	int len = instr->len;
+	unsigned int block_size = (1 << this->erase_shift);
+	int ret = 0;
+
+	while (len) {
+		this->command(mtd, ONENAND_CMD_ERASE_VERIFY, addr, block_size);
+		ret = this->wait(mtd, FL_VERIFYING_ERASE);
+		if (ret) {
+			printk(KERN_ERR "%s: Failed verify, block %d\n",
+			       __func__, onenand_block(this, addr));
+			instr->state = MTD_ERASE_FAILED;
+			instr->fail_addr = addr;
+			return -1;
+		}
+		len -= block_size;
+		addr += block_size;
+	}
+	return 0;
+}
+
+/**
+ * onenand_multiblock_erase - [Internal] erase block(s) using multiblock erase
+ * @param mtd		MTD device structure
+ * @param instr		erase instruction
+ * @param region	erase region
+ *
+ * Erase one or more blocks up to 64 block at a time
+ */
+static int onenand_multiblock_erase(struct mtd_info *mtd,
+				    struct erase_info *instr,
+				    unsigned int block_size)
+{
+	struct onenand_chip *this = mtd->priv;
+	loff_t addr = instr->addr;
+	int len = instr->len;
+	int eb_count = 0;
+	int ret = 0;
+	int bdry_block = 0;
+
+	instr->state = MTD_ERASING;
+
+	if (ONENAND_IS_DDP(this)) {
+		loff_t bdry_addr = this->chipsize >> 1;
+		if (addr < bdry_addr && (addr + len) > bdry_addr)
+			bdry_block = bdry_addr >> this->erase_shift;
+	}
+
+	/* Pre-check bbs */
+	while (len) {
+		/* Check if we have a bad block, we do not erase bad blocks */
+		if (onenand_block_isbad_nolock(mtd, addr, 0)) {
+			printk(KERN_WARNING "%s: attempt to erase a bad block "
+			       "at addr 0x%012llx\n",
+			       __func__, (unsigned long long) addr);
+			instr->state = MTD_ERASE_FAILED;
+			return -EIO;
+		}
+		len -= block_size;
+		addr += block_size;
+	}
+
+	len = instr->len;
+	addr = instr->addr;
+
+	/* loop over 64 eb batches */
+	while (len) {
+		struct erase_info verify_instr = *instr;
+		int max_eb_count = MB_ERASE_MAX_BLK_COUNT;
+
+		verify_instr.addr = addr;
+		verify_instr.len = 0;
+
+		/* do not cross chip boundary */
+		if (bdry_block) {
+			int this_block = (addr >> this->erase_shift);
+
+			if (this_block < bdry_block) {
+				max_eb_count = min(max_eb_count,
+						   (bdry_block - this_block));
+			}
+		}
+
+		eb_count = 0;
+
+		while (len > block_size && eb_count < (max_eb_count - 1)) {
+			this->command(mtd, ONENAND_CMD_MULTIBLOCK_ERASE,
+				      addr, block_size);
+			onenand_invalidate_bufferram(mtd, addr, block_size);
+
+			ret = this->wait(mtd, FL_PREPARING_ERASE);
+			if (ret) {
+				printk(KERN_ERR "%s: Failed multiblock erase, "
+				       "block %d\n", __func__,
+				       onenand_block(this, addr));
+				instr->state = MTD_ERASE_FAILED;
+				instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
+				return -EIO;
+			}
+
+			len -= block_size;
+			addr += block_size;
+			eb_count++;
+		}
+
+		/* last block of 64-eb series */
+		cond_resched();
+		this->command(mtd, ONENAND_CMD_ERASE, addr, block_size);
+		onenand_invalidate_bufferram(mtd, addr, block_size);
+
+		ret = this->wait(mtd, FL_ERASING);
+		/* Check if it is write protected */
+		if (ret) {
+			printk(KERN_ERR "%s: Failed erase, block %d\n",
+			       __func__, onenand_block(this, addr));
+			instr->state = MTD_ERASE_FAILED;
+			instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
+			return -EIO;
+		}
+
+		len -= block_size;
+		addr += block_size;
+		eb_count++;
+
+		/* verify */
+		verify_instr.len = eb_count * block_size;
+		if (onenand_multiblock_erase_verify(mtd, &verify_instr)) {
+			instr->state = verify_instr.state;
+			instr->fail_addr = verify_instr.fail_addr;
+			return -EIO;
+		}
+
+	}
+	return 0;
+}
+
+
 /**
  * onenand_block_by_block_erase - [Internal] erase block(s) using regular erase
  * @param mtd		MTD device structure
@@ -2315,7 +2478,13 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 	/* Grab the lock and see if the device is available */
 	onenand_get_device(mtd, FL_ERASING);
 
-	ret = onenand_block_by_block_erase(mtd, instr, region, block_size);
+	if (region || instr->len < MB_ERASE_MIN_BLK_COUNT * block_size) {
+		/* region is set for Flex-OneNAND (no mb erase) */
+		ret = onenand_block_by_block_erase(mtd, instr,
+						   region, block_size);
+	} else {
+		ret = onenand_multiblock_erase(mtd, instr, block_size);
+	}
 
 	/* Deselect and wake up anyone waiting on the device */
 	onenand_release_device(mtd);
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index f350a4879f75..d0bf422ae374 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -41,9 +41,11 @@ typedef enum {
 	/* These 2 come from nand_state_t, which has been unified here */
 	FL_READING,
 	FL_CACHEDPRG,
-	/* These 2 come from onenand_state_t, which has been unified here */
+	/* These 4 come from onenand_state_t, which has been unified here */
 	FL_RESETING,
 	FL_OTPING,
+	FL_PREPARING_ERASE,
+	FL_VERIFYING_ERASE,
 
 	FL_UNKNOWN
 } flstate_t;
diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index acadbf53a69f..cd6f3b431195 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -131,6 +131,8 @@
 #define ONENAND_CMD_LOCK_TIGHT		(0x2C)
 #define ONENAND_CMD_UNLOCK_ALL		(0x27)
 #define ONENAND_CMD_ERASE		(0x94)
+#define ONENAND_CMD_MULTIBLOCK_ERASE	(0x95)
+#define ONENAND_CMD_ERASE_VERIFY	(0x71)
 #define ONENAND_CMD_RESET		(0xF0)
 #define ONENAND_CMD_OTP_ACCESS		(0x65)
 #define ONENAND_CMD_READID		(0x90)
-- 
cgit v1.2.3


From b1c6e6db5bb7acad82e1c64914c6a9404dae3ee1 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 2 Nov 2009 18:12:33 +0000
Subject: mtd: nand: add option to quieten off the no device found messgae

Add NAND_SCAN_SILENT_NODEV to chip->options to the user-worrying messages
'No NAND device found!!!'. This message often worries users (was three
exclamation marks really necessary?) and especially in systems such as the
Simtec Osiris where there may be optional NAND devices which are not
known until probe time.

Revised version of the original NAND_PROBE_SPECULATIVE patch after comments
by Artem Bityutskiy about adding a whole new call.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Simtec Linux Team <linux@simtec.co.uk>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 3 ++-
 include/linux/mtd/nand.h     | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index ba06473326d1..724cb2c9ad3f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2756,7 +2756,8 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
 	type = nand_get_flash_type(mtd, chip, busw, &nand_maf_id);
 
 	if (IS_ERR(type)) {
-		printk(KERN_WARNING "No NAND device found!!!\n");
+		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
+			printk(KERN_WARNING "No NAND device found.\n");
 		chip->select_chip(mtd, -1);
 		return PTR_ERR(type);
 	}
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 2476078a032f..ccab9dfc5217 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -170,7 +170,6 @@ typedef enum {
 /* Chip does not allow subpage writes */
 #define NAND_NO_SUBPAGE_WRITE	0x00000200
 
-
 /* Options valid for Samsung large page devices */
 #define NAND_SAMSUNG_LP_OPTIONS \
 	(NAND_NO_PADDING | NAND_CACHEPRG | NAND_COPYBACK)
@@ -196,6 +195,9 @@ typedef enum {
 /* This option is defined if the board driver allocates its own buffers
    (e.g. because it needs them DMA-coherent */
 #define NAND_OWN_BUFFERS	0x00040000
+/* Chip may not exist, so silence any errors in scan */
+#define NAND_SCAN_SILENT_NODEV	0x00080000
+
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
 #define NAND_CONTROLLER_ALLOC	0x80000000
-- 
cgit v1.2.3


From b2ef1a2bb2eb49cd7c75b22f1ea40ead0bdfdb8a Mon Sep 17 00:00:00 2001
From: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Date: Thu, 5 Nov 2009 15:53:43 +0100
Subject: mtd: move manufacturer to the common cfi.h header file

This patch moves the MANUFACTURER_ST and MANUFACTURER_INTEL to the
include/linux/mtd/cfi.h header file and renames them to CFI_MFR_ST and
CFI_MFR_INTEL. CFI_MFR_ST was already present there.

All references in drivers/mtd/chips/cfi_cmdset_0001.c are updated to reflect
this.

Signed-off-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0001.c | 20 ++++++++++----------
 include/linux/mtd/cfi.h             |  9 +++++----
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 94be67e880a7..5fbf29e1e64f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -43,11 +43,11 @@
 // debugging, turns off buffer write mode if set to 1
 #define FORCE_WORD_WRITE 0
 
-#define MANUFACTURER_INTEL	0x0089
+/* Intel chips */
 #define I82802AB	0x00ad
 #define I82802AC	0x00ac
 #define PF38F4476	0x881c
-#define MANUFACTURER_ST         0x0020
+/* STMicroelectronics chips */
 #define M50LPW080       0x002F
 #define M50FLW080A	0x0080
 #define M50FLW080B	0x0081
@@ -308,16 +308,16 @@ static struct cfi_fixup cfi_fixup_table[] = {
 #endif
 	{ CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct, NULL },
 	{ CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb, NULL },
-	{ MANUFACTURER_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock, NULL, },
+	{ CFI_MFR_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock, NULL, },
 	{ 0, 0, NULL, NULL }
 };
 
 static struct cfi_fixup jedec_fixup_table[] = {
-	{ MANUFACTURER_INTEL, I82802AB,   fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_INTEL, I82802AC,   fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_ST,    M50LPW080,  fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_ST,    M50FLW080A, fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_ST,    M50FLW080B, fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_INTEL, I82802AB,   fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_INTEL, I82802AC,   fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_ST,    M50LPW080,  fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_ST,    M50FLW080A, fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_ST,    M50FLW080B, fixup_use_fwh_lock, NULL, },
 	{ 0, 0, NULL, NULL }
 };
 static struct cfi_fixup fixup_table[] = {
@@ -333,7 +333,7 @@ static struct cfi_fixup fixup_table[] = {
 static void cfi_fixup_major_minor(struct cfi_private *cfi,
 						struct cfi_pri_intelext *extp)
 {
-	if (cfi->mfr == MANUFACTURER_INTEL &&
+	if (cfi->mfr == CFI_MFR_INTEL &&
 			cfi->id == PF38F4476 && extp->MinorVersion == '3')
 		extp->MinorVersion = '1';
 }
@@ -2249,7 +2249,7 @@ static int cfi_intelext_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
 
 	/* Some chips have OTP located in the _top_ partition only.
 	   For example: Intel 28F256L18T (T means top-parameter device) */
-	if (cfi->mfr == MANUFACTURER_INTEL) {
+	if (cfi->mfr == CFI_MFR_INTEL) {
 		switch (cfi->id) {
 		case 0x880b:
 		case 0x880c:
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 88d3d8fbf9f2..df89f4275232 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -518,10 +518,11 @@ struct cfi_fixup {
 #define CFI_MFR_ANY 0xffff
 #define CFI_ID_ANY  0xffff
 
-#define CFI_MFR_AMD 0x0001
-#define CFI_MFR_ATMEL 0x001F
-#define CFI_MFR_SAMSUNG 0x00EC
-#define CFI_MFR_ST  0x0020 	/* STMicroelectronics */
+#define CFI_MFR_AMD	0x0001
+#define CFI_MFR_INTEL	0x0089
+#define CFI_MFR_ATMEL	0x001F
+#define CFI_MFR_SAMSUNG	0x00EC
+#define CFI_MFR_ST	0x0020 /* STMicroelectronics */
 
 void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups);
 
-- 
cgit v1.2.3


From 456b565cc52fbcdaa2e19ffdf40d9dd3b726d603 Mon Sep 17 00:00:00 2001
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Fri, 16 Oct 2009 14:09:18 +0200
Subject: core: Add kernel message dumper to call on oopses and panics

The core functionality is implemented as per Linus suggestion from

  http://lists.infradead.org/pipermail/linux-mtd/2009-October/027620.html

(with the kmsg_dump implementation by Linus). A struct kmsg_dumper has
been added which contains a callback to dump the kernel log buffers on
crashes. The kmsg_dump function gets called from oops_exit() and panic()
and invokes this callbacks with the crash reason.

[dwmw2: Fix log_end handling]
Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Reviewed-by: Anders Grafstrom <anders.grafstrom@netinsight.net>
Reviewed-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/kmsg_dump.h |  44 +++++++++++++++++
 kernel/panic.c            |   3 ++
 kernel/printk.c           | 119 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+)
 create mode 100644 include/linux/kmsg_dump.h

(limited to 'include/linux')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
new file mode 100644
index 000000000000..7f089ec5ef32
--- /dev/null
+++ b/include/linux/kmsg_dump.h
@@ -0,0 +1,44 @@
+/*
+ * linux/include/kmsg_dump.h
+ *
+ * Copyright (C) 2009 Net Insight AB
+ *
+ * Author: Simon Kagstrom <simon.kagstrom@netinsight.net>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+#ifndef _LINUX_KMSG_DUMP_H
+#define _LINUX_KMSG_DUMP_H
+
+#include <linux/list.h>
+
+enum kmsg_dump_reason {
+	KMSG_DUMP_OOPS,
+	KMSG_DUMP_PANIC,
+};
+
+/**
+ * struct kmsg_dumper - kernel crash message dumper structure
+ * @dump:	The callback which gets called on crashes. The buffer is passed
+ * 		as two sections, where s1 (length l1) contains the older
+ * 		messages and s2 (length l2) contains the newer.
+ * @list:	Entry in the dumper list (private)
+ * @registered:	Flag that specifies if this is already registered
+ */
+struct kmsg_dumper {
+	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
+			const char *s1, unsigned long l1,
+			const char *s2, unsigned long l2);
+	struct list_head list;
+	int registered;
+};
+
+void kmsg_dump(enum kmsg_dump_reason reason);
+
+int kmsg_dump_register(struct kmsg_dumper *dumper);
+
+int kmsg_dump_unregister(struct kmsg_dumper *dumper);
+
+#endif /* _LINUX_KMSG_DUMP_H */
diff --git a/kernel/panic.c b/kernel/panic.c
index bcdef26e3332..8c43226a544d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -10,6 +10,7 @@
  */
 #include <linux/debug_locks.h>
 #include <linux/interrupt.h>
+#include <linux/kmsg_dump.h>
 #include <linux/kallsyms.h>
 #include <linux/notifier.h>
 #include <linux/module.h>
@@ -74,6 +75,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 	dump_stack();
 #endif
 
+	kmsg_dump(KMSG_DUMP_PANIC);
 	/*
 	 * If we have crashed and we have a crash kernel loaded let it handle
 	 * everything else.
@@ -338,6 +340,7 @@ void oops_exit(void)
 {
 	do_oops_enter_exit();
 	print_oops_end_marker();
+	kmsg_dump(KMSG_DUMP_OOPS);
 }
 
 #ifdef WANT_WARN_ON_SLOWPATH
diff --git a/kernel/printk.c b/kernel/printk.c
index f38b07f78a4e..051d1f50648f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -33,6 +33,7 @@
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
+#include <linux/kmsg_dump.h>
 
 #include <asm/uaccess.h>
 
@@ -1405,3 +1406,121 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
 #endif
+
+static DEFINE_SPINLOCK(dump_list_lock);
+static LIST_HEAD(dump_list);
+
+/**
+ * kmsg_dump_register - register a kernel log dumper.
+ * @dump: pointer to the kmsg_dumper structure
+ *
+ * Adds a kernel log dumper to the system. The dump callback in the
+ * structure will be called when the kernel oopses or panics and must be
+ * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
+ */
+int kmsg_dump_register(struct kmsg_dumper *dumper)
+{
+	unsigned long flags;
+	int err = -EBUSY;
+
+	/* The dump callback needs to be set */
+	if (!dumper->dump)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dump_list_lock, flags);
+	/* Don't allow registering multiple times */
+	if (!dumper->registered) {
+		dumper->registered = 1;
+		list_add_tail(&dumper->list, &dump_list);
+		err = 0;
+	}
+	spin_unlock_irqrestore(&dump_list_lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_register);
+
+/**
+ * kmsg_dump_unregister - unregister a kmsg dumper.
+ * @dump: pointer to the kmsg_dumper structure
+ *
+ * Removes a dump device from the system. Returns zero on success and
+ * %-EINVAL otherwise.
+ */
+int kmsg_dump_unregister(struct kmsg_dumper *dumper)
+{
+	unsigned long flags;
+	int err = -EINVAL;
+
+	spin_lock_irqsave(&dump_list_lock, flags);
+	if (dumper->registered) {
+		dumper->registered = 0;
+		list_del(&dumper->list);
+		err = 0;
+	}
+	spin_unlock_irqrestore(&dump_list_lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
+
+static const char const *kmsg_reasons[] = {
+	[KMSG_DUMP_OOPS]	= "oops",
+	[KMSG_DUMP_PANIC]	= "panic",
+};
+
+static const char *kmsg_to_str(enum kmsg_dump_reason reason)
+{
+	if (reason >= ARRAY_SIZE(kmsg_reasons) || reason < 0)
+		return "unknown";
+
+	return kmsg_reasons[reason];
+}
+
+/**
+ * kmsg_dump - dump kernel log to kernel message dumpers.
+ * @reason: the reason (oops, panic etc) for dumping
+ *
+ * Iterate through each of the dump devices and call the oops/panic
+ * callbacks with the log buffer.
+ */
+void kmsg_dump(enum kmsg_dump_reason reason)
+{
+	unsigned long end;
+	unsigned chars;
+	struct kmsg_dumper *dumper;
+	const char *s1, *s2;
+	unsigned long l1, l2;
+	unsigned long flags;
+
+	/* Theoretically, the log could move on after we do this, but
+	   there's not a lot we can do about that. The new messages
+	   will overwrite the start of what we dump. */
+	spin_lock_irqsave(&logbuf_lock, flags);
+	end = log_end & LOG_BUF_MASK;
+	chars = logged_chars;
+	spin_unlock_irqrestore(&logbuf_lock, flags);
+
+	if (logged_chars > end) {
+		s1 = log_buf + log_buf_len - logged_chars + end;
+		l1 = logged_chars - end;
+
+		s2 = log_buf;
+		l2 = end;
+	} else {
+		s1 = "";
+		l1 = 0;
+
+		s2 = log_buf + end - logged_chars;
+		l2 = logged_chars;
+	}
+
+	if (!spin_trylock_irqsave(&dump_list_lock, flags)) {
+		printk(KERN_ERR "dump_kmsg: dump list lock is held during %s, skipping dump\n",
+				kmsg_to_str(reason));
+		return;
+	}
+	list_for_each_entry(dumper, &dump_list, list)
+		dumper->dump(dumper, reason, s1, l1, s2, l2);
+	spin_unlock_irqrestore(&dump_list_lock, flags);
+}
-- 
cgit v1.2.3


From 595dd3d8bf953254d8d2f30f99c54fe09c470040 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 1 Dec 2009 10:52:02 -0800
Subject: kmsg_dump: fix build for CONFIG_PRINTK=n

kmsg_dump() fails to build when CONFIG_PRINTK=n; provide stubs
for the kmsg_dump*() functions when CONFIG_PRINTK=n.

kernel/printk.c: In function 'kmsg_dump':
kernel/printk.c:1501: error: 'log_buf_len' undeclared (first use in this function)
kernel/printk.c:1502: error: 'logged_chars' undeclared (first use in this function)
kernel/printk.c:1506: error: 'log_buf' undeclared (first use in this function)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/kmsg_dump.h | 16 ++++++++++++++++
 kernel/printk.c           |  2 +-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 7f089ec5ef32..e32aa268efac 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -35,10 +35,26 @@ struct kmsg_dumper {
 	int registered;
 };
 
+#ifdef CONFIG_PRINTK
 void kmsg_dump(enum kmsg_dump_reason reason);
 
 int kmsg_dump_register(struct kmsg_dumper *dumper);
 
 int kmsg_dump_unregister(struct kmsg_dumper *dumper);
+#else
+static inline void kmsg_dump(enum kmsg_dump_reason reason)
+{
+}
+
+static inline int kmsg_dump_register(struct kmsg_dumper *dumper)
+{
+	return -EINVAL;
+}
+
+static inline int kmsg_dump_unregister(struct kmsg_dumper *dumper)
+{
+	return -EINVAL;
+}
+#endif
 
 #endif /* _LINUX_KMSG_DUMP_H */
diff --git a/kernel/printk.c b/kernel/printk.c
index 051d1f50648f..2a564570f822 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1405,7 +1405,6 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 	return false;
 }
 EXPORT_SYMBOL(printk_timed_ratelimit);
-#endif
 
 static DEFINE_SPINLOCK(dump_list_lock);
 static LIST_HEAD(dump_list);
@@ -1524,3 +1523,4 @@ void kmsg_dump(enum kmsg_dump_reason reason)
 		dumper->dump(dumper, reason, s1, l1, s2, l2);
 	spin_unlock_irqrestore(&dump_list_lock, flags);
 }
+#endif
-- 
cgit v1.2.3


From d2eecb03936878ec574ade5532fa83df7d75dde7 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 7 Dec 2009 10:36:20 -0500
Subject: ext4: Use slab allocator for sub-page sized allocations

Now that the SLUB seems to be fixed so that it respects the requested
alignment, use kmem_cache_alloc() to allocator if the block size of
the buffer heads to be allocated is less than the page size.
Previously, we were using 16k page on a Power system for each buffer,
even when the file system was using 1k or 4k block size.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/journal.c    | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/jbd2.h |  11 +----
 2 files changed, 134 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ac0d027595d0..c03d4dce4d76 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -39,6 +39,8 @@
 #include <linux/seq_file.h>
 #include <linux/math64.h>
 #include <linux/hash.h>
+#include <linux/log2.h>
+#include <linux/vmalloc.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/jbd2.h>
@@ -93,6 +95,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
+static int jbd2_journal_create_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -1248,6 +1251,13 @@ int jbd2_journal_load(journal_t *journal)
 		}
 	}
 
+	/*
+	 * Create a slab for this blocksize
+	 */
+	err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
+	if (err)
+		return err;
+
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (jbd2_journal_recover(journal))
@@ -1806,6 +1816,127 @@ size_t journal_tag_bytes(journal_t *journal)
 		return JBD2_TAG_SIZE32;
 }
 
+/*
+ * JBD memory management
+ *
+ * These functions are used to allocate block-sized chunks of memory
+ * used for making copies of buffer_head data.  Very often it will be
+ * page-sized chunks of data, but sometimes it will be in
+ * sub-page-size chunks.  (For example, 16k pages on Power systems
+ * with a 4k block file system.)  For blocks smaller than a page, we
+ * use a SLAB allocator.  There are slab caches for each block size,
+ * which are allocated at mount time, if necessary, and we only free
+ * (all of) the slab caches when/if the jbd2 module is unloaded.  For
+ * this reason we don't need to a mutex to protect access to
+ * jbd2_slab[] allocating or releasing memory; only in
+ * jbd2_journal_create_slab().
+ */
+#define JBD2_MAX_SLABS 8
+static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
+static DECLARE_MUTEX(jbd2_slab_create_sem);
+
+static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
+	"jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
+	"jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
+};
+
+
+static void jbd2_journal_destroy_slabs(void)
+{
+	int i;
+
+	for (i = 0; i < JBD2_MAX_SLABS; i++) {
+		if (jbd2_slab[i])
+			kmem_cache_destroy(jbd2_slab[i]);
+		jbd2_slab[i] = NULL;
+	}
+}
+
+static int jbd2_journal_create_slab(size_t size)
+{
+	int i = order_base_2(size) - 10;
+	size_t slab_size;
+
+	if (size == PAGE_SIZE)
+		return 0;
+
+	if (i >= JBD2_MAX_SLABS)
+		return -EINVAL;
+
+	if (unlikely(i < 0))
+		i = 0;
+	down(&jbd2_slab_create_sem);
+	if (jbd2_slab[i]) {
+		up(&jbd2_slab_create_sem);
+		return 0;	/* Already created */
+	}
+
+	slab_size = 1 << (i+10);
+	jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
+					 slab_size, 0, NULL);
+	up(&jbd2_slab_create_sem);
+	if (!jbd2_slab[i]) {
+		printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static struct kmem_cache *get_slab(size_t size)
+{
+	int i = order_base_2(size) - 10;
+
+	BUG_ON(i >= JBD2_MAX_SLABS);
+	if (unlikely(i < 0))
+		i = 0;
+	BUG_ON(jbd2_slab[i] == 0);
+	return jbd2_slab[i];
+}
+
+void *jbd2_alloc(size_t size, gfp_t flags)
+{
+	void *ptr;
+
+	BUG_ON(size & (size-1)); /* Must be a power of 2 */
+
+	flags |= __GFP_REPEAT;
+	if (size == PAGE_SIZE)
+		ptr = (void *)__get_free_pages(flags, 0);
+	else if (size > PAGE_SIZE) {
+		int order = get_order(size);
+
+		if (order < 3)
+			ptr = (void *)__get_free_pages(flags, order);
+		else
+			ptr = vmalloc(size);
+	} else
+		ptr = kmem_cache_alloc(get_slab(size), flags);
+
+	/* Check alignment; SLUB has gotten this wrong in the past,
+	 * and this can lead to user data corruption! */
+	BUG_ON(((unsigned long) ptr) & (size-1));
+
+	return ptr;
+}
+
+void jbd2_free(void *ptr, size_t size)
+{
+	if (size == PAGE_SIZE) {
+		free_pages((unsigned long)ptr, 0);
+		return;
+	}
+	if (size > PAGE_SIZE) {
+		int order = get_order(size);
+
+		if (order < 3)
+			free_pages((unsigned long)ptr, order);
+		else
+			vfree(ptr);
+		return;
+	}
+	kmem_cache_free(get_slab(size), ptr);
+};
+
 /*
  * Journal_head storage management
  */
@@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void)
 	jbd2_journal_destroy_revoke_caches();
 	jbd2_journal_destroy_jbd2_journal_head_cache();
 	jbd2_journal_destroy_handle_cache();
+	jbd2_journal_destroy_slabs();
 }
 
 static int __init journal_init(void)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 638ce4554c76..8ada2a129d08 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -69,15 +69,8 @@ extern u8 jbd2_journal_enable_debug;
 #define jbd_debug(f, a...)	/**/
 #endif
 
-static inline void *jbd2_alloc(size_t size, gfp_t flags)
-{
-	return (void *)__get_free_pages(flags, get_order(size));
-}
-
-static inline void jbd2_free(void *ptr, size_t size)
-{
-	free_pages((unsigned long)ptr, get_order(size));
-};
+extern void *jbd2_alloc(size_t size, gfp_t flags);
+extern void jbd2_free(void *ptr, size_t size);
 
 #define JBD2_MIN_JOURNAL_BLOCKS 1024
 
-- 
cgit v1.2.3


From 85438592f179c126ad4cb9a280046d4f0a501e6d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 18 Nov 2009 17:53:21 +0900
Subject: percpu: remove compile warnings caused by __verify_pcpu_ptr()

If percpu pointer is const, __verify_pcpu_ptr() triggers warnings like
the following.

 drivers/net/loopback.c: In function 'loopback_get_stats':
 drivers/net/loopback.c:109: warning: initialization discards qualifiers from pointer target type

Fix it by adding const to the verification target pointer used in
__verify_pcpu_ptr().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 include/linux/percpu-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 1fa36eb54b6a..68567c0b3a5d 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -24,7 +24,7 @@
  * input parameter is a percpu pointer.
  */
 #define __verify_pcpu_ptr(ptr)	do {					\
-	void __percpu *__vpp_verify = (typeof(ptr))NULL;		\
+	const void __percpu *__vpp_verify = (typeof(ptr))NULL;		\
 	(void)__vpp_verify;						\
 } while (0)
 
-- 
cgit v1.2.3


From 876fba43cc810e3c37ce26995933f9547b83cb0e Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 11 Nov 2009 15:22:15 +0100
Subject: ACPI: add const to acpi_check_resource_conflict()

acpi_check_resource_conflict() doesn't change the resource
it operates on, so the res parameter can be marked const.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c   | 2 +-
 include/linux/acpi.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 7c1c59ea9ec6..02e8464e480f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1118,7 +1118,7 @@ __setup("acpi_enforce_resources=", acpi_enforce_resources_setup);
 
 /* Check for resource conflicts between ACPI OperationRegions and native
  * drivers */
-int acpi_check_resource_conflict(struct resource *res)
+int acpi_check_resource_conflict(const struct resource *res)
 {
 	struct acpi_res_list *res_list_elem;
 	int ioport;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dfcd920c3e54..c920d2def4d3 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -240,7 +240,7 @@ extern int pnpacpi_disabled;
 #define PXM_INVAL	(-1)
 #define NID_INVAL	(-1)
 
-int acpi_check_resource_conflict(struct resource *res);
+int acpi_check_resource_conflict(const struct resource *res);
 
 int acpi_check_region(resource_size_t start, resource_size_t n,
 		      const char *name);
-- 
cgit v1.2.3


From cc9b2e9f6603190c009e5d2629ce8e3f99571346 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Thu, 26 Nov 2009 09:50:20 -0600
Subject: [SCSI] enclosure: fix oops while iterating enclosure_status array

Based on patch originally by Jeff Mahoney <jeffm@suse.com>

 enclosure_status is expected to be a NULL terminated array of strings
 but isn't actually NULL terminated. When writing an invalid value to
 /sys/class/enclosure/.../.../status, it goes off the end of the array
 and Oopses.


Fix by making the assumption true and adding NULL at the end.

Reported-by: Artur Wojcik <artur.wojcik@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/misc/enclosure.c  | 1 +
 include/linux/enclosure.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index e9eae4a78402..1eac626e710a 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -391,6 +391,7 @@ static const char *const enclosure_status [] = {
 	[ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed",
 	[ENCLOSURE_STATUS_UNKNOWN] = "unknown",
 	[ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable",
+	[ENCLOSURE_STATUS_MAX] = NULL,
 };
 
 static const char *const enclosure_type [] = {
diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h
index 90d1c2184112..9a33c5f7e126 100644
--- a/include/linux/enclosure.h
+++ b/include/linux/enclosure.h
@@ -42,6 +42,8 @@ enum enclosure_status {
 	ENCLOSURE_STATUS_NOT_INSTALLED,
 	ENCLOSURE_STATUS_UNKNOWN,
 	ENCLOSURE_STATUS_UNAVAILABLE,
+	/* last element for counting purposes */
+	ENCLOSURE_STATUS_MAX
 };
 
 /* SFF-8485 activity light settings */
-- 
cgit v1.2.3


From f7b3a8355ba6cad251297844a0bdd08898ea36e0 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 24 Nov 2009 03:26:58 -0700
Subject: of/flattree: Merge early_init_dt_check_for_initrd()

Merge common code between PowerPC and Microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/microblaze/kernel/prom.c | 32 --------------------------------
 arch/powerpc/kernel/prom.c    | 30 ------------------------------
 drivers/of/fdt.c              | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/of_fdt.h        |  1 +
 4 files changed, 38 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index a38e3733a09c..7959495b1d00 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -113,38 +113,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	return 0;
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-static void __init early_init_dt_check_for_initrd(unsigned long node)
-{
-	unsigned long l;
-	u32 *prop;
-
-	pr_debug("Looking for initrd properties... ");
-
-	prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l);
-	if (prop) {
-		initrd_start = (unsigned long)
-					__va((u32)of_read_ulong(prop, l/4));
-
-		prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l);
-		if (prop) {
-			initrd_end = (unsigned long)
-					__va((u32)of_read_ulong(prop, 1/4));
-			initrd_below_start_ok = 1;
-		} else {
-			initrd_start = 0;
-		}
-	}
-
-	pr_debug("initrd_start=0x%lx  initrd_end=0x%lx\n",
-					initrd_start, initrd_end);
-}
-#else
-static inline void early_init_dt_check_for_initrd(unsigned long node)
-{
-}
-#endif /* CONFIG_BLK_DEV_INITRD */
-
 static int __init early_init_dt_scan_chosen(unsigned long node,
 				const char *uname, int depth, void *data)
 {
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7f8856655144..1ecd6c6ecabd 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -373,36 +373,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	return 0;
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-static void __init early_init_dt_check_for_initrd(unsigned long node)
-{
-	unsigned long l;
-	u32 *prop;
-
-	DBG("Looking for initrd properties... ");
-
-	prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l);
-	if (prop) {
-		initrd_start = (unsigned long)__va(of_read_ulong(prop, l/4));
-
-		prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l);
-		if (prop) {
-			initrd_end = (unsigned long)
-					__va(of_read_ulong(prop, l/4));
-			initrd_below_start_ok = 1;
-		} else {
-			initrd_start = 0;
-		}
-	}
-
-	DBG("initrd_start=0x%lx  initrd_end=0x%lx\n", initrd_start, initrd_end);
-}
-#else
-static inline void early_init_dt_check_for_initrd(unsigned long node)
-{
-}
-#endif /* CONFIG_BLK_DEV_INITRD */
-
 static int __init early_init_dt_scan_chosen(unsigned long node,
 					    const char *uname, int depth, void *data)
 {
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 43d236cbc17b..6ad98e85dc93 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -11,6 +11,7 @@
 
 #include <linux/kernel.h>
 #include <linux/lmb.h>
+#include <linux/initrd.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 
@@ -369,6 +370,42 @@ unsigned long __init unflatten_dt_node(unsigned long mem,
 	return mem;
 }
 
+#ifdef CONFIG_BLK_DEV_INITRD
+/**
+ * early_init_dt_check_for_initrd - Decode initrd location from flat tree
+ * @node: reference to node containing initrd location ('chosen')
+ */
+void __init early_init_dt_check_for_initrd(unsigned long node)
+{
+	unsigned long len;
+	u32 *prop;
+
+	pr_debug("Looking for initrd properties... ");
+
+	prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len);
+	if (prop) {
+		initrd_start = (unsigned long)
+				__va(of_read_ulong(prop, len/4));
+
+		prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len);
+		if (prop) {
+			initrd_end = (unsigned long)
+				__va(of_read_ulong(prop, len/4));
+			initrd_below_start_ok = 1;
+		} else {
+			initrd_start = 0;
+		}
+	}
+
+	pr_debug("initrd_start=0x%lx  initrd_end=0x%lx\n",
+		 initrd_start, initrd_end);
+}
+#else
+inline void early_init_dt_check_for_initrd(unsigned long node)
+{
+}
+#endif /* CONFIG_BLK_DEV_INITRD */
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 81231e04e8f3..ec2db8278c3f 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -69,6 +69,7 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 				 unsigned long *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern unsigned long of_get_flat_dt_root(void);
+extern void early_init_dt_check_for_initrd(unsigned long node);
 
 /* Other Prototypes */
 extern void finish_device_tree(void);
-- 
cgit v1.2.3


From f00abd94918c9780f9d2d961fc0e419c11457922 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 24 Nov 2009 03:27:10 -0700
Subject: of/flattree: Merge earlyinit_dt_scan_root()

Merge common code between PowerPC and Microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/microblaze/kernel/prom.c | 23 -----------------------
 arch/powerpc/kernel/prom.c    | 24 ------------------------
 drivers/of/fdt.c              | 26 ++++++++++++++++++++++++++
 include/linux/of_fdt.h        |  6 ++++++
 4 files changed, 32 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 7959495b1d00..189179a9b554 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -42,9 +42,6 @@
 #include <asm/sections.h>
 #include <asm/pci-bridge.h>
 
-static int __initdata dt_root_addr_cells;
-static int __initdata dt_root_size_cells;
-
 typedef u32 cell_t;
 
 /* export that to outside world */
@@ -158,26 +155,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	return 1;
 }
 
-static int __init early_init_dt_scan_root(unsigned long node,
-				const char *uname, int depth, void *data)
-{
-	u32 *prop;
-
-	if (depth != 0)
-		return 0;
-
-	prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
-	dt_root_size_cells = (prop == NULL) ? 1 : *prop;
-	pr_debug("dt_root_size_cells = %x\n", dt_root_size_cells);
-
-	prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
-	dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
-	pr_debug("dt_root_addr_cells = %x\n", dt_root_addr_cells);
-
-	/* break now */
-	return 1;
-}
-
 static u64 __init dt_mem_next_cell(int s, cell_t **cellp)
 {
 	cell_t *p = *cellp;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1ecd6c6ecabd..78f65a4d8b03 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -61,10 +61,6 @@
 #define DBG(fmt...)
 #endif
 
-
-static int __initdata dt_root_addr_cells;
-static int __initdata dt_root_size_cells;
-
 #ifdef CONFIG_PPC64
 int __initdata iommu_is_off;
 int __initdata iommu_force_on;
@@ -436,26 +432,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	return 1;
 }
 
-static int __init early_init_dt_scan_root(unsigned long node,
-					  const char *uname, int depth, void *data)
-{
-	u32 *prop;
-
-	if (depth != 0)
-		return 0;
-
-	prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
-	dt_root_size_cells = (prop == NULL) ? 1 : *prop;
-	DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
-
-	prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
-	dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
-	DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
-	
-	/* break now */
-	return 1;
-}
-
 static u64 __init dt_mem_next_cell(int s, cell_t **cellp)
 {
 	cell_t *p = *cellp;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 6ad98e85dc93..be200be47269 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -15,6 +15,9 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 
+int __initdata dt_root_addr_cells;
+int __initdata dt_root_size_cells;
+
 struct boot_param_header *initial_boot_params;
 
 char *find_flat_dt_string(u32 offset)
@@ -406,6 +409,29 @@ inline void early_init_dt_check_for_initrd(unsigned long node)
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
+/**
+ * early_init_dt_scan_root - fetch the top level address and size cells
+ */
+int __init early_init_dt_scan_root(unsigned long node, const char *uname,
+				   int depth, void *data)
+{
+	u32 *prop;
+
+	if (depth != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
+	dt_root_size_cells = (prop == NULL) ? 1 : *prop;
+	pr_debug("dt_root_size_cells = %x\n", dt_root_size_cells);
+
+	prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
+	dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
+	pr_debug("dt_root_addr_cells = %x\n", dt_root_addr_cells);
+
+	/* break now */
+	return 1;
+}
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index ec2db8278c3f..828c3cdaea78 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -58,6 +58,8 @@ struct boot_param_header {
 };
 
 /* TBD: Temporary export of fdt globals - remove when code fully merged */
+extern int __initdata dt_root_addr_cells;
+extern int __initdata dt_root_size_cells;
 extern struct boot_param_header *initial_boot_params;
 
 /* For scanning the flat device-tree at boot time */
@@ -71,6 +73,10 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern unsigned long of_get_flat_dt_root(void);
 extern void early_init_dt_check_for_initrd(unsigned long node);
 
+/* Early flat tree scan hooks */
+extern int early_init_dt_scan_root(unsigned long node, const char *uname,
+				   int depth, void *data);
+
 /* Other Prototypes */
 extern void finish_device_tree(void);
 extern void unflatten_device_tree(void);
-- 
cgit v1.2.3


From 83f7a06eb479e2aeb83536e77a2cb14cc2285e32 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 24 Nov 2009 03:37:56 -0700
Subject: of/flattree: merge dt_mem_next_cell

Merge common code between PowerPC and Microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/microblaze/kernel/prom.c | 8 --------
 arch/powerpc/kernel/prom.c    | 8 --------
 drivers/of/fdt.c              | 8 ++++++++
 include/linux/of_fdt.h        | 1 +
 4 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 189179a9b554..e0f4c34ed0f2 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -155,14 +155,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	return 1;
 }
 
-static u64 __init dt_mem_next_cell(int s, cell_t **cellp)
-{
-	cell_t *p = *cellp;
-
-	*cellp = p + s;
-	return of_read_number(p, s);
-}
-
 static int __init early_init_dt_scan_memory(unsigned long node,
 				const char *uname, int depth, void *data)
 {
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 78f65a4d8b03..048e3a3e9876 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -432,14 +432,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	return 1;
 }
 
-static u64 __init dt_mem_next_cell(int s, cell_t **cellp)
-{
-	cell_t *p = *cellp;
-
-	*cellp = p + s;
-	return of_read_number(p, s);
-}
-
 #ifdef CONFIG_PPC_PSERIES
 /*
  * Interpret the ibm,dynamic-memory property in the
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index be200be47269..ebce509b0886 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -432,6 +432,14 @@ int __init early_init_dt_scan_root(unsigned long node, const char *uname,
 	return 1;
 }
 
+u64 __init dt_mem_next_cell(int s, u32 **cellp)
+{
+	u32 *p = *cellp;
+
+	*cellp = p + s;
+	return of_read_number(p, s);
+}
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 828c3cdaea78..d1a37e56031e 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -72,6 +72,7 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern unsigned long of_get_flat_dt_root(void);
 extern void early_init_dt_check_for_initrd(unsigned long node);
+extern u64 dt_mem_next_cell(int s, u32 **cellp);
 
 /* Early flat tree scan hooks */
 extern int early_init_dt_scan_root(unsigned long node, const char *uname,
-- 
cgit v1.2.3


From 86e032213424958b45564d0cc96b3316641a49d3 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 10 Dec 2009 23:42:21 -0700
Subject: of/flattree: merge early_init_dt_scan_chosen()

Merge common code between PowerPC and Microblaze.  This patch
splits the arch-specific stuff out into a new function,
early_init_dt_scan_chosen_arch().

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/microblaze/kernel/prom.c | 44 ++---------------------------------------
 arch/powerpc/kernel/prom.c    | 46 ++++++++++---------------------------------
 drivers/of/fdt.c              | 38 +++++++++++++++++++++++++++++++++++
 include/linux/of_fdt.h        |  3 +++
 4 files changed, 53 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 50d8b09d5e3f..5505bcffd7dd 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -108,49 +108,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	return 0;
 }
 
-static int __init early_init_dt_scan_chosen(unsigned long node,
-				const char *uname, int depth, void *data)
+void __init early_init_dt_scan_chosen_arch(unsigned long node)
 {
-	unsigned long l;
-	char *p;
-
-	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
-
-	if (depth != 1 ||
-		(strcmp(uname, "chosen") != 0 &&
-				strcmp(uname, "chosen@0") != 0))
-		return 0;
-
-#ifdef CONFIG_KEXEC
-	lprop = (u64 *)of_get_flat_dt_prop(node,
-				"linux,crashkernel-base", NULL);
-	if (lprop)
-		crashk_res.start = *lprop;
-
-	lprop = (u64 *)of_get_flat_dt_prop(node,
-				"linux,crashkernel-size", NULL);
-	if (lprop)
-		crashk_res.end = crashk_res.start + *lprop - 1;
-#endif
-
-	early_init_dt_check_for_initrd(node);
-
-	/* Retreive command line */
-	p = of_get_flat_dt_prop(node, "bootargs", &l);
-	if (p != NULL && l > 0)
-		strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE));
-
-#ifdef CONFIG_CMDLINE
-#ifndef CONFIG_CMDLINE_FORCE
-	if (p == NULL || l == 0 || (l == 1 && (*p) == 0))
-#endif
-		strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif /* CONFIG_CMDLINE */
-
-	pr_debug("Command line is: %s\n", cmd_line);
-
-	/* break now */
-	return 1;
+	/* No Microblaze specific code here */
 }
 
 static int __init early_init_dt_scan_memory(unsigned long node,
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4e3181cded44..877fad9b3745 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -367,18 +367,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	return 0;
 }
 
-static int __init early_init_dt_scan_chosen(unsigned long node,
-					    const char *uname, int depth, void *data)
+void __init early_init_dt_scan_chosen_arch(unsigned long node)
 {
 	unsigned long *lprop;
-	unsigned long l;
-	char *p;
-
-	DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
-
-	if (depth != 1 ||
-	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
-		return 0;
 
 #ifdef CONFIG_PPC64
 	/* check if iommu is forced on or off */
@@ -389,17 +380,17 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 #endif
 
 	/* mem=x on the command line is the preferred mechanism */
- 	lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
- 	if (lprop)
- 		memory_limit = *lprop;
+	lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
+	if (lprop)
+		memory_limit = *lprop;
 
 #ifdef CONFIG_PPC64
- 	lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
- 	if (lprop)
- 		tce_alloc_start = *lprop;
- 	lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
- 	if (lprop)
- 		tce_alloc_end = *lprop;
+	lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+	if (lprop)
+		tce_alloc_start = *lprop;
+	lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+	if (lprop)
+		tce_alloc_end = *lprop;
 #endif
 
 #ifdef CONFIG_KEXEC
@@ -411,23 +402,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
 	if (lprop)
 		crashk_res.end = crashk_res.start + *lprop - 1;
 #endif
-
-	early_init_dt_check_for_initrd(node);
-
-	/* Retreive command line */
- 	p = of_get_flat_dt_prop(node, "bootargs", &l);
-	if (p != NULL && l > 0)
-		strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE));
-
-#ifdef CONFIG_CMDLINE
-	if (p == NULL || l == 0 || (l == 1 && (*p) == 0))
-		strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif /* CONFIG_CMDLINE */
-
-	DBG("Command line is: %s\n", cmd_line);
-
-	/* break now */
-	return 1;
 }
 
 #ifdef CONFIG_PPC_PSERIES
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index ebce509b0886..616a4767a950 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -15,6 +15,10 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 
+#ifdef CONFIG_PPC
+#include <asm/machdep.h>
+#endif /* CONFIG_PPC */
+
 int __initdata dt_root_addr_cells;
 int __initdata dt_root_size_cells;
 
@@ -440,6 +444,40 @@ u64 __init dt_mem_next_cell(int s, u32 **cellp)
 	return of_read_number(p, s);
 }
 
+int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
+				     int depth, void *data)
+{
+	unsigned long l;
+	char *p;
+
+	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
+
+	if (depth != 1 ||
+	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
+		return 0;
+
+	early_init_dt_check_for_initrd(node);
+
+	/* Retreive command line */
+	p = of_get_flat_dt_prop(node, "bootargs", &l);
+	if (p != NULL && l > 0)
+		strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE));
+
+#ifdef CONFIG_CMDLINE
+#ifndef CONFIG_CMDLINE_FORCE
+	if (p == NULL || l == 0 || (l == 1 && (*p) == 0))
+#endif
+		strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#endif /* CONFIG_CMDLINE */
+
+	early_init_dt_scan_chosen_arch(node);
+
+	pr_debug("Command line is: %s\n", cmd_line);
+
+	/* break now */
+	return 1;
+}
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index d1a37e56031e..8118d4559dd5 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -71,6 +71,9 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 				 unsigned long *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern unsigned long of_get_flat_dt_root(void);
+extern void early_init_dt_scan_chosen_arch(unsigned long node);
+extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
+				     int depth, void *data);
 extern void early_init_dt_check_for_initrd(unsigned long node);
 extern u64 dt_mem_next_cell(int s, u32 **cellp);
 
-- 
cgit v1.2.3


From a88f6667078412e5eff37ead68a043ee0ec9f1da Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 10 Dec 2009 18:35:15 +0100
Subject: dmaengine: clarify the meaning of the DMA_CTRL_ACK flag

DMA_CTRL_ACK's description applies to its clear state, not to its set state.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2b9f2ac7ed60..78784982b33e 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -74,7 +74,7 @@ enum dma_transaction_type {
  *  control completion, and communicate status.
  * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
  *  this transaction
- * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
+ * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
  *  acknowledges receipt, i.e. has has a chance to establish any dependency
  *  chains
  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
-- 
cgit v1.2.3


From 03889384cee7a198a79447c1ea6aca2c8e54d155 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 Dec 2009 09:48:22 -0500
Subject: tracing: Add trace_dump_stack()

I've been asked a few times about how to find out what is calling
some location in the kernel. One way is to use dynamic function tracing
and implement the func_stack_trace. But this only finds out who is
calling a particular function. It does not tell you who is calling
that function and entering a specific if conditional.

I have myself implemented a quick version of trace_dump_stack() for
this purpose a few times, and just needed it now. This is when I realized
that this would be a good tool to have in the kernel like trace_printk().

Using trace_dump_stack() is similar to dump_stack() except that it
writes to the trace buffer instead and can be used in critical locations.

For example:

@@ -5485,8 +5485,12 @@ need_resched_nonpreemptible:
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely(signal_pending_state(prev->state, prev)))
 			prev->state = TASK_RUNNING;
-		else
+		else {
 			deactivate_task(rq, prev, 1);
+			trace_printk("Deactivating task %s:%d\n",
+				     prev->comm, prev->pid);
+			trace_dump_stack();
+		}
 		switch_count = &prev->nvcsw;
 	}

Produces:

           <...>-3249  [001]   296.105269: schedule: Deactivating task ntpd:3249
           <...>-3249  [001]   296.105270: <stack trace>
 => schedule
 => schedule_hrtimeout_range
 => poll_schedule_timeout
 => do_select
 => core_sys_select
 => sys_select
 => system_call_fastpath

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kernel.h |  2 ++
 kernel/trace/trace.c   | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fa4c590cf12..5ad4199fb073 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -492,6 +492,8 @@ extern int
 __trace_printk(unsigned long ip, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 
+extern void trace_dump_stack(void);
+
 /*
  * The double __builtin_constant_p is because gcc will give us an error
  * if we try to allocate the static variable to fmt if it is not a
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88bd9ae2a9ed..f531301b7a3b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1151,6 +1151,22 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 	__ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
 
+/**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+void trace_dump_stack(void)
+{
+	unsigned long flags;
+
+	if (tracing_disabled || tracing_selftest_running)
+		return 0;
+
+	local_save_flags(flags);
+
+	/* skipping 3 traces, seems to get us at the caller of this function */
+	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+}
+
 void
 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
-- 
cgit v1.2.3


From 967c9ef9b8c3bdec1bd3a380edac19e0b9fbeadc Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 11 Dec 2009 22:00:57 -0800
Subject: Input: i8042 - allow installing platform filters for incoming data

Some hardware (such as Dell laptops) signal a variety of events through
the i8042 controller, even if these don't map to keyboard events. Add
support for drivers to filter the i8042 event stream in order to respond
to these events and (if appropriate) block them from entering the input
stream.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/i8042.c | 58 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/i8042.h       | 18 +++++++++++++-
 2 files changed, 72 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 634da68f7f35..d84a36e545f6 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -126,6 +126,8 @@ static unsigned char i8042_suppress_kbd_ack;
 static struct platform_device *i8042_platform_device;
 
 static irqreturn_t i8042_interrupt(int irq, void *dev_id);
+static bool (*i8042_platform_filter)(unsigned char data, unsigned char str,
+				     struct serio *serio);
 
 void i8042_lock_chip(void)
 {
@@ -139,6 +141,48 @@ void i8042_unlock_chip(void)
 }
 EXPORT_SYMBOL(i8042_unlock_chip);
 
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio))
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&i8042_lock, flags);
+
+	if (i8042_platform_filter) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	i8042_platform_filter = filter;
+
+out:
+	spin_unlock_irqrestore(&i8042_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(i8042_install_filter);
+
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *port))
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&i8042_lock, flags);
+
+	if (i8042_platform_filter != filter) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	i8042_platform_filter = NULL;
+
+out:
+	spin_unlock_irqrestore(&i8042_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(i8042_remove_filter);
+
 /*
  * The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to
  * be ready for reading values from it / writing values to it.
@@ -373,7 +417,8 @@ static void i8042_stop(struct serio *serio)
  * It is called from i8042_interrupt and thus is running with interrupts
  * off and i8042_lock held.
  */
-static bool i8042_filter(unsigned char data, unsigned char str)
+static bool i8042_filter(unsigned char data, unsigned char str,
+			 struct serio *serio)
 {
 	if (unlikely(i8042_suppress_kbd_ack)) {
 		if ((~str & I8042_STR_AUXDATA) &&
@@ -384,6 +429,11 @@ static bool i8042_filter(unsigned char data, unsigned char str)
 		}
 	}
 
+	if (i8042_platform_filter && i8042_platform_filter(data, str, serio)) {
+		dbg("Filtered out by platfrom filter\n");
+		return true;
+	}
+
 	return false;
 }
 
@@ -396,6 +446,7 @@ static bool i8042_filter(unsigned char data, unsigned char str)
 static irqreturn_t i8042_interrupt(int irq, void *dev_id)
 {
 	struct i8042_port *port;
+	struct serio *serio;
 	unsigned long flags;
 	unsigned char str, data;
 	unsigned int dfl;
@@ -462,18 +513,19 @@ static irqreturn_t i8042_interrupt(int irq, void *dev_id)
 	}
 
 	port = &i8042_ports[port_no];
+	serio = port->exists ? port->serio : NULL;
 
 	dbg("%02x <- i8042 (interrupt, %d, %d%s%s)",
 	    data, port_no, irq,
 	    dfl & SERIO_PARITY ? ", bad parity" : "",
 	    dfl & SERIO_TIMEOUT ? ", timeout" : "");
 
-	filtered = i8042_filter(data, str);
+	filtered = i8042_filter(data, str, serio);
 
 	spin_unlock_irqrestore(&i8042_lock, flags);
 
 	if (likely(port->exists && !filtered))
-		serio_interrupt(port->serio, data, dfl);
+		serio_interrupt(serio, data, dfl);
 
  out:
 	return IRQ_RETVAL(ret);
diff --git a/include/linux/i8042.h b/include/linux/i8042.h
index 60c3360ef6ad..9bf6870ee5f4 100644
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -39,6 +39,10 @@ void i8042_lock_chip(void);
 void i8042_unlock_chip(void);
 int i8042_command(unsigned char *param, int command);
 bool i8042_check_port_owner(const struct serio *);
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio));
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *serio));
 
 #else
 
@@ -52,7 +56,7 @@ void i8042_unlock_chip(void)
 
 int i8042_command(unsigned char *param, int command)
 {
-	return -ENOSYS;
+	return -ENODEV;
 }
 
 bool i8042_check_port_owner(const struct serio *serio)
@@ -60,6 +64,18 @@ bool i8042_check_port_owner(const struct serio *serio)
 	return false;
 }
 
+int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str,
+					struct serio *serio))
+{
+	return -ENODEV;
+}
+
+int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str,
+				       struct serio *serio))
+{
+	return -ENODEV;
+}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From 01fc0ac198eabcbf460e1ed058860a935b6c2c9a Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 19 Apr 2009 21:57:19 +0200
Subject: kbuild: move bounds.h to include/generated

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore                 | 1 -
 Kbuild                     | 2 +-
 Makefile                   | 2 +-
 include/linux/mmzone.h     | 2 +-
 include/linux/page-flags.h | 2 +-
 kernel/bounds.c            | 2 +-
 6 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/.gitignore b/.gitignore
index 946c7ec5c922..36d9cd6d4281 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,7 +52,6 @@ include/linux/autoconf.h
 include/linux/compile.h
 include/linux/version.h
 include/linux/utsrelease.h
-include/linux/bounds.h
 include/generated
 
 # stgit generated dirs
diff --git a/Kbuild b/Kbuild
index f056b4feee51..1165d7a5ca4a 100644
--- a/Kbuild
+++ b/Kbuild
@@ -8,7 +8,7 @@
 #####
 # 1) Generate bounds.h
 
-bounds-file := include/linux/bounds.h
+bounds-file := include/generated/bounds.h
 
 always  := $(bounds-file)
 targets := $(bounds-file) kernel/bounds.s
diff --git a/Makefile b/Makefile
index 07711786dc95..b58e9312ce30 100644
--- a/Makefile
+++ b/Makefile
@@ -1197,7 +1197,7 @@ MRPROPER_DIRS  += include/config include2 usr/include include/generated
 MRPROPER_FILES += .config .config.old include/asm .version .old_version \
                   include/linux/autoconf.h include/linux/version.h      \
                   include/linux/utsrelease.h                            \
-                  include/linux/bounds.h include/asm*/asm-offsets.h     \
+                  include/asm*/asm-offsets.h                            \
 		  Module.symvers Module.markers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6f7561730d88..30fe668c2542 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -15,7 +15,7 @@
 #include <linux/seqlock.h>
 #include <linux/nodemask.h>
 #include <linux/pageblock-flags.h>
-#include <linux/bounds.h>
+#include <generated/bounds.h>
 #include <asm/atomic.h>
 #include <asm/page.h>
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6b202b173955..ef36725aa515 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -8,7 +8,7 @@
 #include <linux/types.h>
 #ifndef __GENERATING_BOUNDS_H
 #include <linux/mm_types.h>
-#include <linux/bounds.h>
+#include <generated/bounds.h>
 #endif /* !__GENERATING_BOUNDS_H */
 
 /*
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 3c5301381837..98a51f26c136 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -12,7 +12,7 @@
 
 void foo(void)
 {
-	/* The enum constants to put into include/linux/bounds.h */
+	/* The enum constants to put into include/generated/bounds.h */
 	DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
 	DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
 	/* End of constants */
-- 
cgit v1.2.3


From 98b8788ae91694499d1995035625bea16a4db0c4 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 18 Oct 2009 00:39:40 +0200
Subject: drop explicit include of autoconf.h

kbuild.h forces include of autoconf.h on the
commandline using -include - so we do not need to
include the file explicit.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/cris/arch-v32/kernel/head.S                | 1 -
 arch/cris/kernel/asm-offsets.c                  | 1 -
 arch/cris/kernel/vmlinux.lds.S                  | 1 -
 arch/ia64/kvm/asm-offsets.c                     | 1 -
 drivers/accessibility/braille/braille_console.c | 1 -
 drivers/hid/hid-lg.h                            | 2 --
 drivers/platform/x86/compal-laptop.c            | 1 -
 drivers/staging/iio/ring_sw.h                   | 1 -
 include/linux/mmdebug.h                         | 2 --
 9 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S
index 3db478eb5155..76266f80a5f1 100644
--- a/arch/cris/arch-v32/kernel/head.S
+++ b/arch/cris/arch-v32/kernel/head.S
@@ -10,7 +10,6 @@
  * The macros found in mmu_defs_asm.h uses the ## concatenation operator, so
  * -traditional must not be used when assembling this file.
  */
-#include <linux/autoconf.h>
 #include <arch/memmap.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/intr_vect.h>
diff --git a/arch/cris/kernel/asm-offsets.c b/arch/cris/kernel/asm-offsets.c
index ddd6fbbe75de..dd7b8e983221 100644
--- a/arch/cris/kernel/asm-offsets.c
+++ b/arch/cris/kernel/asm-offsets.c
@@ -1,6 +1,5 @@
 #include <linux/sched.h>
 #include <asm/thread_info.h>
-#include <linux/autoconf.h>
 
 /*
  * Generate definitions needed by assembly language modules.
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index bbfda67d2907..d49d17d2a14f 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -8,7 +8,6 @@
  * the kernel has booted.
  */
 
-#include <linux/autoconf.h>
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
 
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 0c3564a7a033..9324c875caf5 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -22,7 +22,6 @@
  *
  */
 
-#include <linux/autoconf.h>
 #include <linux/kvm_host.h>
 #include <linux/kbuild.h>
 
diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c
index d672cfe7ca59..cb423f5aef24 100644
--- a/drivers/accessibility/braille/braille_console.c
+++ b/drivers/accessibility/braille/braille_console.c
@@ -21,7 +21,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/autoconf.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/hid/hid-lg.h b/drivers/hid/hid-lg.h
index 27ae750ca878..bf31592eaf79 100644
--- a/drivers/hid/hid-lg.h
+++ b/drivers/hid/hid-lg.h
@@ -1,8 +1,6 @@
 #ifndef __HID_LG_H
 #define __HID_LG_H
 
-#include <linux/autoconf.h>
-
 #ifdef CONFIG_LOGITECH_FF
 int lgff_init(struct hid_device *hdev);
 #else
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 11003bba10d3..1a387e79f719 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -51,7 +51,6 @@
 #include <linux/dmi.h>
 #include <linux/backlight.h>
 #include <linux/platform_device.h>
-#include <linux/autoconf.h>
 
 #define COMPAL_DRIVER_VERSION "0.2.6"
 
diff --git a/drivers/staging/iio/ring_sw.h b/drivers/staging/iio/ring_sw.h
index f0b86f02cd80..fd677f008365 100644
--- a/drivers/staging/iio/ring_sw.h
+++ b/drivers/staging/iio/ring_sw.h
@@ -29,7 +29,6 @@
  * driver requests - some may support multiple options */
 
 
-#include <linux/autoconf.h>
 #include "iio.h"
 #include "ring_generic.h"
 
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 8a5509877192..ee24ef8ab616 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -1,8 +1,6 @@
 #ifndef LINUX_MM_DEBUG_H
 #define LINUX_MM_DEBUG_H 1
 
-#include <linux/autoconf.h>
-
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
 #else
-- 
cgit v1.2.3


From 273b281fa22c293963ee3e6eec418f5dda2dbc83 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Sun, 18 Oct 2009 00:52:28 +0200
Subject: kbuild: move utsrelease.h to include/generated

Fix up all users of utsrelease.h

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 .gitignore                                   | 1 -
 Makefile                                     | 5 ++---
 arch/alpha/boot/bootp.c                      | 2 +-
 arch/alpha/boot/bootpz.c                     | 2 +-
 arch/alpha/boot/main.c                       | 2 +-
 arch/frv/kernel/setup.c                      | 2 +-
 arch/powerpc/platforms/52xx/efika.c          | 2 +-
 arch/powerpc/platforms/amigaone/setup.c      | 2 +-
 arch/powerpc/platforms/chrp/setup.c          | 2 +-
 arch/powerpc/platforms/powermac/bootx_init.c | 2 +-
 arch/x86/boot/header.S                       | 2 +-
 arch/x86/boot/version.c                      | 2 +-
 drivers/staging/panel/panel.c                | 2 +-
 include/linux/vermagic.h                     | 2 +-
 init/version.c                               | 2 +-
 kernel/kexec.c                               | 2 +-
 kernel/trace/trace.c                         | 2 +-
 17 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/.gitignore b/.gitignore
index c6c19ea6ea96..002d5304968b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,7 +47,6 @@ Module.symvers
 #
 include/config
 include/linux/version.h
-include/linux/utsrelease.h
 include/generated
 
 # stgit generated dirs
diff --git a/Makefile b/Makefile
index 3bdd932e3d88..860224d7cbcf 100644
--- a/Makefile
+++ b/Makefile
@@ -968,7 +968,7 @@ endif
 # prepare2 creates a makefile if using a separate output directory
 prepare2: prepare3 outputmakefile
 
-prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
+prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \
                    include/config/auto.conf
 	$(cmd_crmodverdir)
 
@@ -1005,7 +1005,7 @@ endef
 include/linux/version.h: $(srctree)/Makefile FORCE
 	$(call filechk,version.h)
 
-include/linux/utsrelease.h: include/config/kernel.release FORCE
+include/generated/utsrelease.h: include/config/kernel.release FORCE
 	$(call filechk,utsrelease.h)
 
 PHONY += headerdep
@@ -1151,7 +1151,6 @@ CLEAN_FILES +=	vmlinux System.map \
 MRPROPER_DIRS  += include/config usr/include include/generated
 MRPROPER_FILES += .config .config.old .version .old_version             \
                   include/linux/version.h                               \
-                  include/linux/utsrelease.h                            \
 		  Module.symvers Module.markers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c
index 3af21c789339..3c8d1b25c661 100644
--- a/arch/alpha/boot/bootp.c
+++ b/arch/alpha/boot/bootp.c
@@ -9,7 +9,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c
index 1036b515e20c..ade3f129dc27 100644
--- a/arch/alpha/boot/bootpz.c
+++ b/arch/alpha/boot/bootpz.c
@@ -11,7 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 89f3be071ae5..644b7db55438 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -7,7 +7,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index 55e4fab7c0bc..75cf7f4b2fa8 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -10,7 +10,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index bcc69e1f77c1..45c0cb9b67e6 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/pci.h>
 #include <linux/of.h>
 #include <asm/prom.h>
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 9290a7a442d0..fb4eb0df054c 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/seq_file.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 
 #include <asm/machdep.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index cd4ad9aea760..0a6f5ab8aab3 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -23,7 +23,7 @@
 #include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/adb.h>
 #include <linux/module.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index cf660916ae0b..9dd789a7370d 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/page.h>
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b31cc54b4641..93e689f4bd86 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -16,7 +16,7 @@
  */
 
 #include <asm/segment.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <asm/boot.h>
 #include <asm/e820.h>
 #include <asm/page_types.h>
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index 4d88763e39cb..2b15aa488ffb 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -13,7 +13,7 @@
  */
 
 #include "boot.h"
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <generated/compile.h>
 
 const char kernel_version[] =
diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c
index 4ce399b6d237..f98a52448eae 100644
--- a/drivers/staging/panel/panel.c
+++ b/drivers/staging/panel/panel.c
@@ -55,7 +55,7 @@
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 
 #include <linux/io.h>
 #include <asm/uaccess.h>
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 79b9837d9ca0..cf97b5b9d1fe 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -1,4 +1,4 @@
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/module.h>
 
 /* Simply sanity version stamp for modules. */
diff --git a/init/version.c b/init/version.c
index 82328aaca1ef..adff586401a5 100644
--- a/init/version.c
+++ b/init/version.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/version.h>
 
 #ifndef CONFIG_KALLSYMS
diff --git a/kernel/kexec.c b/kernel/kexec.c
index f336e2107f98..83f54e2a6eed 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,7 @@
 #include <linux/hardirq.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/utsname.h>
 #include <linux/numa.h>
 #include <linux/suspend.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 88bd9ae2a9ed..bfb1b64bfa9d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -12,7 +12,7 @@
  *  Copyright (C) 2004 William Lee Irwin III
  */
 #include <linux/ring_buffer.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
 #include <linux/stacktrace.h>
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
-- 
cgit v1.2.3


From 7a77080dbec28ab2bceb422398601dcc53c142ad Mon Sep 17 00:00:00 2001
From: Jie Zhang <jie.zhang@analog.com>
Date: Thu, 12 Nov 2009 17:59:56 +0800
Subject: net: add net_tstamp.h to headers_install

include/linux/net_tstamp.h is userspace API for hardware time stamping
of network packets. It should be exported to userspace.

Signed-off-by: Jie Zhang <jie.zhang@analog.com>
Signed-off-by: Barry Song <barry.song@analog.com>
Signed-off-by: Patrick Ohly <patrick.ohly@gmx.de>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index f72914db2a11..756f831cbdd5 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -118,6 +118,7 @@ header-y += mtio.h
 header-y += ncp_no.h
 header-y += neighbour.h
 header-y += net_dropmon.h
+header-y += net_tstamp.h
 header-y += netfilter_arp.h
 header-y += netrom.h
 header-y += nfs2.h
-- 
cgit v1.2.3


From c5df7f775148723de39274537a886e9502eef336 Mon Sep 17 00:00:00 2001
From: Albert Herranz <albert_herranz@yahoo.es>
Date: Sat, 12 Dec 2009 06:31:54 +0000
Subject: powerpc: allow ioremap within reserved memory regions

Add a flag to let a platform ioremap memory regions marked as reserved.

This flag will be used later by the Nintendo Wii support code to allow
ioremapping the I/O region sitting between MEM1 and MEM2 and marked
as reserved RAM in the patch "wii: use both mem1 and mem2 as ram".

This will no longer be needed when proper discontig memory support
for 32-bit PowerPC is added to the kernel.

Signed-off-by: Albert Herranz <albert_herranz@yahoo.es>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/mm/init_32.c    | 5 +++++
 arch/powerpc/mm/mmu_decl.h   | 1 +
 arch/powerpc/mm/pgtable_32.c | 4 +++-
 include/linux/lmb.h          | 1 +
 lib/lmb.c                    | 7 ++++++-
 5 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 703c7c2e0d9f..4ec900af332f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -82,6 +82,11 @@ extern struct task_struct *current_set[NR_CPUS];
 int __map_without_bats;
 int __map_without_ltlbs;
 
+/*
+ * This tells the system to allow ioremapping memory marked as reserved.
+ */
+int __allow_ioremap_reserved;
+
 /* max amount of low RAM to map in */
 unsigned long __max_low_memory = MAX_LOW_MEM;
 
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9aa39fe74f8a..34dacc32250d 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -115,6 +115,7 @@ extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
 extern void invalidate_tlbcam_entry(int index);
 
 extern int __map_without_bats;
+extern int __allow_ioremap_reserved;
 extern unsigned long ioremap_base;
 extern unsigned int rtas_data, rtas_size;
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index b55bbe87acb8..177e4038b43c 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -26,6 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/lmb.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -191,7 +192,8 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	 * Don't allow anybody to remap normal RAM that we're using.
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
-	if (mem_init_done && (p < virt_to_phys(high_memory))) {
+	if (mem_init_done && (p < virt_to_phys(high_memory)) &&
+	    !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) {
 		printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n",
 		       (unsigned long long)p, __builtin_return_address(0));
 		return NULL;
diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index 2442e3f3d033..ef82b8fcbddb 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -54,6 +54,7 @@ extern u64 __init lmb_phys_mem_size(void);
 extern u64 lmb_end_of_DRAM(void);
 extern void __init lmb_enforce_memory_limit(u64 memory_limit);
 extern int __init lmb_is_reserved(u64 addr);
+extern int lmb_is_region_reserved(u64 base, u64 size);
 extern int lmb_find(struct lmb_property *res);
 
 extern void lmb_dump_all(void);
diff --git a/lib/lmb.c b/lib/lmb.c
index 0343c05609f0..9cee17142b2c 100644
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -263,7 +263,7 @@ long __init lmb_reserve(u64 base, u64 size)
 	return lmb_add_region(_rgn, base, size);
 }
 
-long __init lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
+long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
 {
 	unsigned long i;
 
@@ -493,6 +493,11 @@ int __init lmb_is_reserved(u64 addr)
 	return 0;
 }
 
+int lmb_is_region_reserved(u64 base, u64 size)
+{
+	return lmb_overlaps_region(&lmb.reserved, base, size);
+}
+
 /*
  * Given a <base, len>, find which memory regions belong to this range.
  * Adjust the request and return a contiguous chunk.
-- 
cgit v1.2.3


From 87d9b4e1c52867a45331a9a5495f6448e0c68b23 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:14:20 +0800
Subject: tracing: Extract duplicate ftrace_raw_init_event_foo()

Use a generic trace_event_raw_init() function for all event's raw_init
callbacks (but kprobes) instead of defining the same version for each
of these.
This shrinks the kernel code:

   text    data     bss     dec     hex filename
5355293 1961928 7103260 14420481         dc0a01 vmlinux.o.old
5346802 1961864 7103260 14411926         dbe896 vmlinux.o

raw_init can't be removed, because ftrace events and kprobe events
use different raw_init callbacks. Though it's possible to totally
remove raw_init, I choose to leave it as it is for now.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <4B1DC48C.7080603@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h |  1 +
 include/linux/syscalls.h     |  4 ++--
 include/trace/ftrace.h       | 35 ++++-------------------------------
 kernel/trace/trace_events.c  | 14 ++++++++++++++
 4 files changed, 21 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 38f8d6553831..ea44b8911094 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -158,6 +158,7 @@ enum {
 	FILTER_PTR_STRING,
 };
 
+extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bc70c5810fec..94ac28437bef 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -145,7 +145,7 @@ struct perf_event_attr;
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &enter_syscall_print_##sname,	\
-		.raw_init		= init_syscall_trace,		\
+		.raw_init		= trace_event_raw_init,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
@@ -167,7 +167,7 @@ struct perf_event_attr;
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &exit_syscall_print_##sname,	\
-		.raw_init		= init_syscall_trace,		\
+		.raw_init		= trace_event_raw_init,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c4eca380204e..6055b0604c86 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -623,23 +623,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *	.trace			= ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused)
- * {
- *	int id;
- *
- *	id = register_ftrace_event(&ftrace_event_type_<call>);
- *	if (!id)
- *		return -ENODEV;
- *	event_<call>.id = id;
- *	return 0;
- * }
- *
  * static struct ftrace_event_call __used
  * __attribute__((__aligned__(4)))
  * __attribute__((section("_ftrace_events"))) event_<call> = {
  *	.name			= "<call>",
  *	.system			= "<system>",
- *	.raw_init		= ftrace_raw_init_event_<call>,
+ *	.raw_init		= trace_event_raw_init,
  *	.regfunc		= ftrace_reg_event_<call>,
  *	.unregfunc		= ftrace_unreg_event_<call>,
  *	.show_format		= ftrace_format_<call>,
@@ -647,9 +636,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *
  */
 
-#undef TP_FMT
-#define TP_FMT(fmt, args...)	fmt "\n", ##args
-
 #ifdef CONFIG_EVENT_PROFILE
 
 #define _TRACE_PROFILE_INIT(call)					\
@@ -744,19 +730,7 @@ static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\
 									\
 static struct trace_event ftrace_event_type_##call = {			\
 	.trace			= ftrace_raw_output_##call,		\
-};									\
-									\
-static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\
-{									\
-	int id;								\
-									\
-	id = register_ftrace_event(&ftrace_event_type_##call);		\
-	if (!id)							\
-		return -ENODEV;						\
-	event_##call.id = id;						\
-	INIT_LIST_HEAD(&event_##call.fields);				\
-	return 0;							\
-}
+};
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
@@ -776,7 +750,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.event			= &ftrace_event_type_##call,		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##template,		\
@@ -793,7 +767,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.name			= #call,				\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.event			= &ftrace_event_type_##call,		\
-	.raw_init		= ftrace_raw_init_event_##call,		\
+	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.show_format		= ftrace_format_##call,			\
@@ -953,7 +927,6 @@ end:									\
 	perf_swevent_put_recursion_context(rctx);			\
 end_recursion:								\
 	local_irq_restore(irq_flags);					\
-									\
 }
 
 #undef DEFINE_EVENT
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d18315dc836..8ed66e0d476b 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -105,6 +105,20 @@ void trace_destroy_fields(struct ftrace_event_call *call)
 	}
 }
 
+int trace_event_raw_init(struct ftrace_event_call *call)
+{
+	int id;
+
+	id = register_ftrace_event(call->event);
+	if (!id)
+		return -ENODEV;
+	call->id = id;
+	INIT_LIST_HEAD(&call->fields);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trace_event_raw_init);
+
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 					int enable)
 {
-- 
cgit v1.2.3


From 614a71a26ba3d97e9fa85649db69a682b78e407d Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:14:36 +0800
Subject: tracing: Pull up calls to trace_define_common_fields()

Call trace_define_common_fields() in event_create_dir() only.
This avoids trace events to handle it from their define_fields
callbacks and shrinks the kernel code size:

   text    data     bss     dec     hex filename
5346802 1961864 7103260 14411926         dbe896 vmlinux.o.old
5345151 1961864 7103260 14410275         dbe223 vmlinux.o

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
LKML-Reference: <4B1DC49C.8000107@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h  | 1 -
 include/trace/ftrace.h        | 4 ----
 kernel/trace/trace_events.c   | 7 ++++---
 kernel/trace/trace_export.c   | 4 ----
 kernel/trace/trace_kprobe.c   | 8 --------
 kernel/trace/trace_syscalls.c | 8 --------
 6 files changed, 4 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ea44b8911094..db97c64ce0e9 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -159,7 +159,6 @@ enum {
 };
 
 extern int trace_event_raw_init(struct ftrace_event_call *call);
-extern int trace_define_common_fields(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 6055b0604c86..2af2f7a2c1bd 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -436,10 +436,6 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	struct ftrace_raw_##call field;					\
 	int ret;							\
 									\
-	ret = trace_define_common_fields(event_call);			\
-	if (ret)							\
-		return ret;						\
-									\
 	tstruct;							\
 									\
 	return ret;							\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 8ed66e0d476b..97b0b3aa166d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
 	if (ret)							\
 		return ret;
 
-int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(struct ftrace_event_call *call)
 {
 	int ret;
 	struct trace_entry ent;
@@ -91,7 +91,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
 void trace_destroy_fields(struct ftrace_event_call *call)
 {
@@ -927,7 +926,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 		 		  id);
 
 	if (call->define_fields) {
-		ret = call->define_fields(call);
+		ret = trace_define_common_fields(call);
+		if (!ret)
+			ret = call->define_fields(call);
 		if (ret < 0) {
 			pr_warning("Could not initialize trace point"
 				   " events/%s\n", call->name);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index dff8c84ddf17..458e5bfe26d0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -184,10 +184,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
 	struct struct_name field;					\
 	int ret;							\
 									\
-	ret = trace_define_common_fields(event_call);			\
-	if (ret)							\
-		return ret;						\
-									\
 	tstruct;							\
 									\
 	return ret;							\
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..e3c80e925896 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1113,10 +1113,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
 	struct kprobe_trace_entry field;
 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
 
-	ret = trace_define_common_fields(event_call);
-	if (!ret)
-		return ret;
-
 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
 	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
 	/* Set argument names as fields */
@@ -1131,10 +1127,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
 	struct kretprobe_trace_entry field;
 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
 
-	ret = trace_define_common_fields(event_call);
-	if (!ret)
-		return ret;
-
 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
 	DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 57501d90096a..b957edd0ca3b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -217,10 +217,6 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
 	int i;
 	int offset = offsetof(typeof(trace), args);
 
-	ret = trace_define_common_fields(call);
-	if (ret)
-		return ret;
-
 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
 	if (ret)
 		return ret;
@@ -241,10 +237,6 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
 	struct syscall_trace_exit trace;
 	int ret;
 
-	ret = trace_define_common_fields(call);
-	if (ret)
-		return ret;
-
 	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
 	if (ret)
 		return ret;
-- 
cgit v1.2.3


From e00bf2ec60605eb95687b7a0c3b83c87c48541dc Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 8 Dec 2009 11:17:29 +0800
Subject: tracing: Change event->profile_count to be int type

Like total_profile_count, struct ftrace_event_call::profile_count
is protected by event_mutex, so it doesn't need to be atomic_t.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B1DC549.5010705@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h       | 2 +-
 include/linux/syscalls.h           | 2 --
 include/trace/ftrace.h             | 1 -
 kernel/trace/trace_event_profile.c | 6 +++---
 kernel/trace/trace_kprobe.c        | 1 -
 5 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index db97c64ce0e9..2233c98d80df 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -131,7 +131,7 @@ struct ftrace_event_call {
 	void			*mod;
 	void			*data;
 
-	atomic_t		profile_count;
+	int			profile_count;
 	int			(*profile_enable)(struct ftrace_event_call *);
 	void			(*profile_disable)(struct ftrace_event_call *);
 };
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 94ac28437bef..72d69860d901 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -102,12 +102,10 @@ struct perf_event_attr;
 #ifdef CONFIG_EVENT_PROFILE
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
-	.profile_count = ATOMIC_INIT(-1),				       \
 	.profile_enable = prof_sysenter_enable,				       \
 	.profile_disable = prof_sysenter_disable,
 
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
-	.profile_count = ATOMIC_INIT(-1),				       \
 	.profile_enable = prof_sysexit_enable,				       \
 	.profile_disable = prof_sysexit_disable,
 #else
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0c21af85211c..73523151a731 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -629,7 +629,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 #ifdef CONFIG_EVENT_PROFILE
 
 #define _TRACE_PROFILE_INIT(call)					\
-	.profile_count = ATOMIC_INIT(-1),				\
 	.profile_enable = ftrace_profile_enable_##call,			\
 	.profile_disable = ftrace_profile_disable_##call,
 
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index d9c60f80aa0d..9e25573242cf 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -25,7 +25,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 	char *buf;
 	int ret = -ENOMEM;
 
-	if (atomic_inc_return(&event->profile_count))
+	if (event->profile_count++ > 0)
 		return 0;
 
 	if (!total_profile_count) {
@@ -56,7 +56,7 @@ fail_buf_nmi:
 		perf_trace_buf = NULL;
 	}
 fail_buf:
-	atomic_dec(&event->profile_count);
+	event->profile_count--;
 
 	return ret;
 }
@@ -83,7 +83,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
 	char *buf, *nmi_buf;
 
-	if (!atomic_add_negative(-1, &event->profile_count))
+	if (--event->profile_count > 0)
 		return;
 
 	event->profile_disable(event);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index e3c80e925896..6ed223447a3f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1426,7 +1426,6 @@ static int register_probe_event(struct trace_probe *tp)
 	call->unregfunc = probe_event_disable;
 
 #ifdef CONFIG_EVENT_PROFILE
-	atomic_set(&call->profile_count, -1);
 	call->profile_enable = probe_profile_enable;
 	call->profile_disable = probe_profile_disable;
 #endif
-- 
cgit v1.2.3


From c7cd606f60e7679c7f9eee7010f02a6f000209c1 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Sat, 12 Dec 2009 04:13:21 +0000
Subject: can: Fix data length code handling in rx path

A valid CAN dataframe can have a data length code (DLC) of 0 .. 8 data bytes.

When reading the CAN controllers register the 4-bit value may contain values
from 0 .. 15 which may exceed the reserved space in the socket buffer!

The ISO 11898-1 Chapter 8.4.2.3 (DLC field) says that register values > 8
should be reduced to 8 without any error reporting or frame drop.

This patch introduces a new helper macro to cast a given 4-bit data length
code (dlc) to __u8 and ensure the DLC value to be max. 8 bytes.

The different handlings in the rx path of the CAN netdevice drivers are fixed.

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        |  2 +-
 drivers/net/can/bfin_can.c        |  2 +-
 drivers/net/can/mcp251x.c         | 13 +++----------
 drivers/net/can/mscan/mscan.c     |  3 ++-
 drivers/net/can/sja1000/sja1000.c | 18 ++++++++----------
 drivers/net/can/ti_hecc.c         |  2 +-
 drivers/net/can/usb/ems_usb.c     |  2 +-
 include/linux/can/dev.h           |  9 +++++++++
 8 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index cbe3fce53e3b..d0ec17878ffc 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -474,7 +474,7 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb,
 	reg_msr = at91_read(priv, AT91_MSR(mb));
 	if (reg_msr & AT91_MSR_MRTR)
 		cf->can_id |= CAN_RTR_FLAG;
-	cf->can_dlc = min_t(__u8, (reg_msr >> 16) & 0xf, 8);
+	cf->can_dlc = get_can_dlc((reg_msr >> 16) & 0xf);
 
 	*(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb));
 	*(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb));
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index c7fc1de28173..0ec1524523cc 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -392,7 +392,7 @@ static void bfin_can_rx(struct net_device *dev, u16 isrc)
 		cf->can_id |= CAN_RTR_FLAG;
 
 	/* get data length code */
-	cf->can_dlc = bfin_read16(&reg->chl[obj].dlc);
+	cf->can_dlc = get_can_dlc(bfin_read16(&reg->chl[obj].dlc) & 0xF);
 
 	/* get payload */
 	for (i = 0; i < 8; i += 2) {
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 78b1b69b2921..9c5a1537939c 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -403,9 +403,8 @@ static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf,
 
 		for (i = 1; i < RXBDAT_OFF; i++)
 			buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i);
-		len = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK;
-		if (len > 8)
-			len = 8;
+
+		len = get_can_dlc(buf[RXBDLC_OFF] & RXBDLC_LEN_MASK);
 		for (; i < (RXBDAT_OFF + len); i++)
 			buf[i] = mcp251x_read_reg(spi, RXBCTRL(buf_idx) + i);
 	} else {
@@ -455,13 +454,7 @@ static void mcp251x_hw_rx(struct spi_device *spi, int buf_idx)
 			(buf[RXBSIDL_OFF] >> RXBSIDL_SHIFT);
 	}
 	/* Data length */
-	frame->can_dlc = buf[RXBDLC_OFF] & RXBDLC_LEN_MASK;
-	if (frame->can_dlc > 8) {
-		dev_warn(&spi->dev, "invalid frame recevied\n");
-		priv->net->stats.rx_errors++;
-		dev_kfree_skb(skb);
-		return;
-	}
+	frame->can_dlc = get_can_dlc(buf[RXBDLC_OFF] & RXBDLC_LEN_MASK);
 	memcpy(frame->data, buf + RXBDAT_OFF, frame->can_dlc);
 
 	priv->net->stats.rx_packets++;
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index bb06dfb58f25..07346f880ca6 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -297,7 +297,8 @@ static void mscan_get_rx_frame(struct net_device *dev, struct can_frame *frame)
 	frame->can_id |= can_id >> 1;
 	if (can_id & 1)
 		frame->can_id |= CAN_RTR_FLAG;
-	frame->can_dlc = in_8(&regs->rx.dlr) & 0xf;
+
+	frame->can_dlc = get_can_dlc(in_8(&regs->rx.dlr) & 0xf);
 
 	if (!(frame->can_id & CAN_RTR_FLAG)) {
 		void __iomem *data = &regs->rx.dsr1_0;
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index b4ba88a31075..542a4f7255b4 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -293,15 +293,14 @@ static void sja1000_rx(struct net_device *dev)
 	uint8_t fi;
 	uint8_t dreg;
 	canid_t id;
-	uint8_t dlc;
 	int i;
 
+	/* create zero'ed CAN frame buffer */
 	skb = alloc_can_skb(dev, &cf);
 	if (skb == NULL)
 		return;
 
 	fi = priv->read_reg(priv, REG_FI);
-	dlc = fi & 0x0F;
 
 	if (fi & FI_FF) {
 		/* extended frame format (EFF) */
@@ -318,16 +317,15 @@ static void sja1000_rx(struct net_device *dev)
 		    | (priv->read_reg(priv, REG_ID2) >> 5);
 	}
 
-	if (fi & FI_RTR)
+	if (fi & FI_RTR) {
 		id |= CAN_RTR_FLAG;
+	} else {
+		cf->can_dlc = get_can_dlc(fi & 0x0F);
+		for (i = 0; i < cf->can_dlc; i++)
+			cf->data[i] = priv->read_reg(priv, dreg++);
+	}
 
 	cf->can_id = id;
-	cf->can_dlc = dlc;
-	for (i = 0; i < dlc; i++)
-		cf->data[i] = priv->read_reg(priv, dreg++);
-
-	while (i < 8)
-		cf->data[i++] = 0;
 
 	/* release receive buffer */
 	priv->write_reg(priv, REG_CMR, CMD_RRB);
@@ -335,7 +333,7 @@ static void sja1000_rx(struct net_device *dev)
 	netif_rx(skb);
 
 	stats->rx_packets++;
-	stats->rx_bytes += dlc;
+	stats->rx_bytes += cf->can_dlc;
 }
 
 static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 07e8016b17ec..5c993c2da528 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -552,7 +552,7 @@ static int ti_hecc_rx_pkt(struct ti_hecc_priv *priv, int mbxno)
 	data = hecc_read_mbx(priv, mbxno, HECC_CANMCF);
 	if (data & HECC_CANMCF_RTR)
 		cf->can_id |= CAN_RTR_FLAG;
-	cf->can_dlc = data & 0xF;
+	cf->can_dlc = get_can_dlc(data & 0xF);
 	data = hecc_read_mbx(priv, mbxno, HECC_CANMDL);
 	*(u32 *)(cf->data) = cpu_to_be32(data);
 	if (cf->can_dlc > 4) {
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 591eb0eb1c2b..efbb05c71bf4 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -316,7 +316,7 @@ static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
 		return;
 
 	cf->can_id = le32_to_cpu(msg->msg.can_msg.id);
-	cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
+	cf->can_dlc = get_can_dlc(msg->msg.can_msg.length & 0xF);
 
 	if (msg->type == CPC_MSG_TYPE_EXT_CAN_FRAME ||
 	    msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME)
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 1ed2a5cc03f5..3db7767d2a17 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -51,6 +51,15 @@ struct can_priv {
 	struct sk_buff **echo_skb;
 };
 
+/*
+ * get_can_dlc(value) - helper macro to cast a given data length code (dlc)
+ * to __u8 and ensure the dlc value to be max. 8 bytes.
+ *
+ * To be used in the CAN netdriver receive path to ensure conformance with
+ * ISO 11898-1 Chapter 8.4.2.3 (DLC field)
+ */
+#define get_can_dlc(i)	(min_t(__u8, (i), 8))
+
 struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
 void free_candev(struct net_device *dev);
 
-- 
cgit v1.2.3


From 4056c9a344d60ee96471a5f3b0a3c8a90371c8fd Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:04 +0200
Subject: nfsd: Remove unused dprintk

This doesn't appear to be useful.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsfh.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 8f641c908450..2973e1135343 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -20,7 +20,6 @@
 # include <linux/fs.h>
 #endif
 #include <linux/nfsd/const.h>
-#include <linux/nfsd/debug.h>
 
 /*
  * This is the old "dentry style" Linux NFSv2 file handle.
@@ -329,9 +328,6 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
 	struct dentry	*dentry = fhp->fh_dentry;
 	struct inode	*inode;
 
-	dfprintk(FILEOP, "nfsd: fh_lock(%s) locked = %d\n",
-			SVCFH_fmt(fhp), fhp->fh_locked);
-
 	BUG_ON(!dentry);
 
 	if (fhp->fh_locked) {
-- 
cgit v1.2.3


From a600ffcbb3743cf1296bee2a41d4824c719d7181 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:35 +0200
Subject: sunrpc: Clean never used include files

Remove include of two headers never used by this file.
Doing so exposed a missing #include <linux/types.h> in
include/linux/sunrpc/rpc_rdma.h.

I did not see any other users dependency but if exist they
should be fixed since these headers are totally irrelevant
to here.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/debug.h    | 3 ---
 include/linux/sunrpc/rpc_rdma.h | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 10709cbe96fd..c2786f20016f 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -28,9 +28,6 @@
 
 #ifdef __KERNEL__
 
-#include <linux/timer.h>
-#include <linux/workqueue.h>
-
 /*
  * Enable RPC debugging/profiling.
  */
diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index 87b895d5c786..b78f16b1dea3 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -40,6 +40,8 @@
 #ifndef _LINUX_SUNRPC_RPC_RDMA_H
 #define _LINUX_SUNRPC_RPC_RDMA_H
 
+#include <linux/types.h>
+
 struct rpcrdma_segment {
 	__be32 rs_handle;	/* Registered memory handle */
 	__be32 rs_length;	/* Length of the chunk in bytes */
-- 
cgit v1.2.3


From d703158229329af7152d159753f849aa7bd55ee6 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:47 +0200
Subject: nfsd: Fix independence of a few nfsd related headers

An header should be compilation independent, .i.e pull in
any header who's declarations are directly used by this header.
And not let users re-include all it's dependencies all over
again.

[At the end of the day what's the use of a header if it does
 not have more then one user?]

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfs_xdr.h | 1 +
 include/linux/nfsacl.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 62f63fb0c4c8..00a0c8170816 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -2,6 +2,7 @@
 #define _LINUX_NFS_XDR_H
 
 #include <linux/nfsacl.h>
+#include <linux/nfs3.h>
 
 /*
  * To change the maximum rsize and wsize supported by the NFS client, adjust
diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h
index 43011b69297c..f321b578edeb 100644
--- a/include/linux/nfsacl.h
+++ b/include/linux/nfsacl.h
@@ -29,6 +29,7 @@
 #ifdef __KERNEL__
 
 #include <linux/posix_acl.h>
+#include <linux/sunrpc/xdr.h>
 
 /* Maximum number of ACL entries over NFS */
 #define NFS_ACL_MAX_ENTRIES	1024
-- 
cgit v1.2.3


From 72579ac9cd68081108277c31781b127d0f420d61 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:28:59 +0200
Subject: nfsd: Headers Independence and include cleanups

* Add includes that are directly used by headers
* Remove includes that are not needed

These are the changes made:

[xdr.h]
struct nfsd_readdirres has an embedded struct readdir_cd from nfsd.h
fixing that we can drop other includes

[xdr4.h]
embedded types defined both at state.h and nfsd.h

[syscall.h]
After export.h fix none of these stuff is needed.
fix extra space in # include <> statement

[stats.h]
does not need <linux/nfs4.h> but was export to user-mode
so I don't touch it

[state.h]
embedded types from nfsfh.h like struct knfsd_fh. bringing that
eliminates the need for all other includes

[nfsfh.h]
directly manipulating types from sunrpc/svc.h.
Removed Other unused headers.

[nfsd.h]
removed unused headers include

[export.h]
lots of sunrpc/svc.h types and a single prototype declaration
with pointer from nfsfh.h, but all users of export.h do need
nfsfh.h any way. remove now un-needed include.

[const.h]
Unfixed (not independent)

[cache.h]
could do with a forward declaration of "struct svc_rqst;"
from sunrpc/svc.h but all users absolutely will need
sunrpc/svc.h it is easier overall this way.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/cache.h   | 3 +--
 include/linux/nfsd/export.h  | 2 +-
 include/linux/nfsd/nfsd.h    | 4 ----
 include/linux/nfsd/nfsfh.h   | 3 +--
 include/linux/nfsd/state.h   | 4 +---
 include/linux/nfsd/syscall.h | 8 +-------
 include/linux/nfsd/xdr.h     | 3 +--
 include/linux/nfsd/xdr4.h    | 3 ++-
 8 files changed, 8 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
index 3a3f58934f5e..a165425dea41 100644
--- a/include/linux/nfsd/cache.h
+++ b/include/linux/nfsd/cache.h
@@ -10,8 +10,7 @@
 #ifndef NFSCACHE_H
 #define NFSCACHE_H
 
-#include <linux/in.h>
-#include <linux/uio.h>
+#include <linux/sunrpc/svc.h>
 
 /*
  * Representation of a reply cache entry.
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index a6d9ef2bb34a..ef3d416fcf67 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -12,7 +12,7 @@
 
 # include <linux/types.h>
 #ifdef __KERNEL__
-# include <linux/in.h>
+# include <linux/nfsd/nfsfh.h>
 #endif
 
 /*
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index e4518d090a8c..74f67c2aca34 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -11,13 +11,9 @@
 #define LINUX_NFSD_NFSD_H
 
 #include <linux/types.h>
-#include <linux/unistd.h>
-#include <linux/fs.h>
-#include <linux/posix_acl.h>
 #include <linux/mount.h>
 
 #include <linux/nfsd/debug.h>
-#include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/export.h>
 #include <linux/nfsd/stats.h>
 /*
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 2973e1135343..49523edbc510 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -16,8 +16,7 @@
 
 # include <linux/types.h>
 #ifdef __KERNEL__
-# include <linux/string.h>
-# include <linux/fs.h>
+# include <linux/sunrpc/svc.h>
 #endif
 #include <linux/nfsd/const.h>
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 5aadf8aa3a97..2af75686e0d3 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -37,9 +37,7 @@
 #ifndef _NFSD4_STATE_H
 #define _NFSD4_STATE_H
 
-#include <linux/list.h>
-#include <linux/kref.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsfh.h>
 
 typedef struct {
 	u32             cl_boot;
diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h
index 7a3b565b898f..812bc1e160dc 100644
--- a/include/linux/nfsd/syscall.h
+++ b/include/linux/nfsd/syscall.h
@@ -9,14 +9,8 @@
 #ifndef NFSD_SYSCALL_H
 #define NFSD_SYSCALL_H
 
-# include <linux/types.h>
-#ifdef __KERNEL__
-# include <linux/in.h>
-#endif 
-#include <linux/posix_types.h>
-#include <linux/nfsd/const.h>
+#include <linux/types.h>
 #include <linux/nfsd/export.h>
-#include <linux/nfsd/nfsfh.h>
 
 /*
  * Version of the syscall interface
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
index a0132ef58f21..58f824d854c2 100644
--- a/include/linux/nfsd/xdr.h
+++ b/include/linux/nfsd/xdr.h
@@ -7,9 +7,8 @@
 #ifndef LINUX_NFSD_H
 #define LINUX_NFSD_H
 
-#include <linux/fs.h>
 #include <linux/vfs.h>
-#include <linux/nfs.h>
+#include <linux/nfsd/nfsd.h>
 
 struct nfsd_fhandle {
 	struct svc_fh		fh;
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 73164c2b3d29..1bf266239c7e 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -39,7 +39,8 @@
 #ifndef _LINUX_NFSD_XDR4_H
 #define _LINUX_NFSD_XDR4_H
 
-#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/nfsd.h>
 
 #define NFSD4_MAX_TAGLEN	128
 #define XDR_LEN(n)                     (((n) + 3) & ~3)
-- 
cgit v1.2.3


From 9a74af21330c8d46efa977d088a62cc1bfa954e9 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 3 Dec 2009 20:30:56 +0200
Subject: nfsd: Move private headers to source directory

Lots of include/linux/nfsd/* headers are only used by
nfsd module. Move them to the source directory

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/auth.c             |   2 +-
 fs/nfsd/cache.h            |  85 +++++++
 fs/nfsd/export.c           |   3 +-
 fs/nfsd/lockd.c            |   2 +-
 fs/nfsd/nfs2acl.c          |   7 +-
 fs/nfsd/nfs3acl.c          |   7 +-
 fs/nfsd/nfs3proc.c         |   4 +-
 fs/nfsd/nfs3xdr.c          |   2 +-
 fs/nfsd/nfs4callback.c     |   4 +-
 fs/nfsd/nfs4proc.c         |   4 +-
 fs/nfsd/nfs4recover.c      |   5 +-
 fs/nfsd/nfs4state.c        |   2 +-
 fs/nfsd/nfs4xdr.c          |   3 +-
 fs/nfsd/nfscache.c         |   4 +-
 fs/nfsd/nfsctl.c           |   5 +-
 fs/nfsd/nfsd.h             | 335 +++++++++++++++++++++++++++
 fs/nfsd/nfsfh.c            |   2 +-
 fs/nfsd/nfsproc.c          |   4 +-
 fs/nfsd/nfssvc.c           |   4 +-
 fs/nfsd/nfsxdr.c           |   2 +-
 fs/nfsd/state.h            | 409 ++++++++++++++++++++++++++++++++
 fs/nfsd/stats.c            |   4 +-
 fs/nfsd/vfs.c              |  18 +-
 fs/nfsd/xdr.h              | 176 ++++++++++++++
 fs/nfsd/xdr3.h             | 346 +++++++++++++++++++++++++++
 fs/nfsd/xdr4.h             | 564 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/cache.h |  85 -------
 include/linux/nfsd/nfsd.h  | 335 ---------------------------
 include/linux/nfsd/state.h | 409 --------------------------------
 include/linux/nfsd/xdr.h   | 176 --------------
 include/linux/nfsd/xdr3.h  | 346 ---------------------------
 include/linux/nfsd/xdr4.h  | 564 ---------------------------------------------
 32 files changed, 1963 insertions(+), 1955 deletions(-)
 create mode 100644 fs/nfsd/cache.h
 create mode 100644 fs/nfsd/nfsd.h
 create mode 100644 fs/nfsd/state.h
 create mode 100644 fs/nfsd/xdr.h
 create mode 100644 fs/nfsd/xdr3.h
 create mode 100644 fs/nfsd/xdr4.h
 delete mode 100644 include/linux/nfsd/cache.h
 delete mode 100644 include/linux/nfsd/nfsd.h
 delete mode 100644 include/linux/nfsd/state.h
 delete mode 100644 include/linux/nfsd/xdr.h
 delete mode 100644 include/linux/nfsd/xdr3.h
 delete mode 100644 include/linux/nfsd/xdr4.h

(limited to 'include/linux')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index ad354d284cf8..71209d4993d0 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -5,7 +5,7 @@
  */
 
 #include <linux/sched.h>
-#include <linux/nfsd/nfsd.h>
+#include "nfsd.h"
 #include "auth.h"
 
 int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 000000000000..a165425dea41
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,85 @@
+/*
+ * include/linux/nfsd/cache.h
+ *
+ * Request reply cache. This was heavily inspired by the
+ * implementation in 4.3BSD/4.4BSD.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef NFSCACHE_H
+#define NFSCACHE_H
+
+#include <linux/sunrpc/svc.h>
+
+/*
+ * Representation of a reply cache entry.
+ */
+struct svc_cacherep {
+	struct hlist_node	c_hash;
+	struct list_head	c_lru;
+
+	unsigned char		c_state,	/* unused, inprog, done */
+				c_type,		/* status, buffer */
+				c_secure : 1;	/* req came from port < 1024 */
+	struct sockaddr_in	c_addr;
+	__be32			c_xid;
+	u32			c_prot;
+	u32			c_proc;
+	u32			c_vers;
+	unsigned long		c_timestamp;
+	union {
+		struct kvec	u_vec;
+		__be32		u_status;
+	}			c_u;
+};
+
+#define c_replvec		c_u.u_vec
+#define c_replstat		c_u.u_status
+
+/* cache entry states */
+enum {
+	RC_UNUSED,
+	RC_INPROG,
+	RC_DONE
+};
+
+/* return values */
+enum {
+	RC_DROPIT,
+	RC_REPLY,
+	RC_DOIT,
+	RC_INTR
+};
+
+/*
+ * Cache types.
+ * We may want to add more types one day, e.g. for diropres and
+ * attrstat replies. Using cache entries with fixed length instead
+ * of buffer pointers may be more efficient.
+ */
+enum {
+	RC_NOCACHE,
+	RC_REPLSTAT,
+	RC_REPLBUFF,
+};
+
+/*
+ * If requests are retransmitted within this interval, they're dropped.
+ */
+#define RC_DELAY		(HZ/5)
+
+int	nfsd_reply_cache_init(void);
+void	nfsd_reply_cache_shutdown(void);
+int	nfsd_cache_lookup(struct svc_rqst *, int);
+void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+
+#ifdef CONFIG_NFSD_V4
+void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
+#else  /* CONFIG_NFSD_V4 */
+static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
+{
+}
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 68e63f441444..cb3dae2fcd86 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -18,10 +18,11 @@
 #include <linux/module.h>
 #include <linux/exportfs.h>
 
-#include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/syscall.h>
 #include <net/ipv6.h>
 
+#include "nfsd.h"
+
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
 typedef struct auth_domain	svc_client;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 801ef7104ff4..6f12777ed227 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -9,8 +9,8 @@
  */
 
 #include <linux/file.h>
-#include <linux/nfsd/nfsd.h>
 #include <linux/lockd/bind.h>
+#include "nfsd.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index a54628de7715..874e2a94bf4f 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -6,10 +6,11 @@
  * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
 #include <linux/nfsacl.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 2f5c61bea908..c6011ddbadc0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -6,10 +6,11 @@
  * Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
 #include <linux/nfsacl.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define RETURN_STATUS(st)	{ resp->status = (st); return (st); }
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b694b4304544..90b19ca75b34 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -10,8 +10,8 @@
 #include <linux/ext2_fs.h>
 #include <linux/magic.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
+#include "cache.h"
+#include "xdr3.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 623e13aa6259..c523bb88c10b 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/namei.h>
-#include <linux/nfsd/xdr3.h>
+#include "xdr3.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4fe396071b61..f7a315827638 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -34,8 +34,8 @@
  */
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
+#include "nfsd.h"
+#include "state.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 61f682c77e7f..e2b5666f25d1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -36,8 +36,8 @@
  */
 #include <linux/file.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr4.h>
+#include "cache.h"
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 48742f243c25..6744e7f2da0e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,12 +33,13 @@
 *
 */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
 #include <linux/file.h>
 #include <linux/namei.h>
 #include <linux/crypto.h>
 #include <linux/sched.h>
+
+#include "nfsd.h"
+#include "state.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1fe6e29fd500..2923e6c1da18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -36,11 +36,11 @@
 
 #include <linux/file.h>
 #include <linux/smp_lock.h>
-#include <linux/nfsd/xdr4.h>
 #include <linux/namei.h>
 #include <linux/swap.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/clnt.h>
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2fa96821f5b5..cab978031100 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -43,10 +43,11 @@
 #include <linux/namei.h>
 #include <linux/statfs.h>
 #include <linux/utsname.h>
-#include <linux/nfsd/xdr4.h>
 #include <linux/nfsd_idmap.h>
 #include <linux/nfs4_acl.h>
 #include <linux/sunrpc/svcauth_gss.h>
+
+#include "xdr4.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 96694b8345ef..18aa9729a380 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -10,8 +10,8 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
+#include "nfsd.h"
+#include "cache.h"
 
 /* Size of reply cache. Common values are:
  * 4.3BSD:	128
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e4f49fd6af44..0415680d3f58 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -11,12 +11,13 @@
 
 #include <linux/nfsd_idmap.h>
 #include <linux/sunrpc/svcsock.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/nfsd/syscall.h>
 #include <linux/lockd/lockd.h>
 #include <linux/sunrpc/clnt.h>
 
+#include "nfsd.h"
+#include "cache.h"
+
 /*
  *	We have a single directory with 9 nodes in it.
  */
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 000000000000..74f67c2aca34
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,335 @@
+/*
+ * linux/include/linux/nfsd/nfsd.h
+ *
+ * Hodge-podge collection of knfsd-related stuff.
+ * I will sort this out later.
+ *
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_NFSD_H
+#define LINUX_NFSD_NFSD_H
+
+#include <linux/types.h>
+#include <linux/mount.h>
+
+#include <linux/nfsd/debug.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/stats.h>
+/*
+ * nfsd version
+ */
+#define NFSD_SUPPORTED_MINOR_VERSION	1
+
+struct readdir_cd {
+	__be32			err;	/* 0, nfserr, or nfserr_eof */
+};
+
+
+extern struct svc_program	nfsd_program;
+extern struct svc_version	nfsd_version2, nfsd_version3,
+				nfsd_version4;
+extern u32			nfsd_supported_minorversion;
+extern struct mutex		nfsd_mutex;
+extern struct svc_serv		*nfsd_serv;
+extern spinlock_t		nfsd_drc_lock;
+extern unsigned int		nfsd_drc_max_mem;
+extern unsigned int		nfsd_drc_mem_used;
+
+extern const struct seq_operations nfs_exports_op;
+
+/*
+ * Function prototypes.
+ */
+int		nfsd_svc(unsigned short port, int nrservs);
+int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
+
+int		nfsd_nrthreads(void);
+int		nfsd_nrpools(void);
+int		nfsd_get_nrthreads(int n, int *);
+int		nfsd_set_nrthreads(int n, int *);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+#ifdef CONFIG_NFSD_V2_ACL
+extern struct svc_version nfsd_acl_version2;
+#else
+#define nfsd_acl_version2 NULL
+#endif
+#ifdef CONFIG_NFSD_V3_ACL
+extern struct svc_version nfsd_acl_version3;
+#else
+#define nfsd_acl_version3 NULL
+#endif
+#endif
+
+enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
+int nfsd_vers(int vers, enum vers_op change);
+int nfsd_minorversion(u32 minorversion, enum vers_op change);
+void nfsd_reset_versions(void);
+int nfsd_create_serv(void);
+
+extern int nfsd_max_blksize;
+
+/* 
+ * NFSv4 State
+ */
+#ifdef CONFIG_NFSD_V4
+extern unsigned int max_delegations;
+int nfs4_state_init(void);
+void nfsd4_free_slabs(void);
+int nfs4_state_start(void);
+void nfs4_state_shutdown(void);
+time_t nfs4_lease_time(void);
+void nfs4_reset_lease(time_t leasetime);
+int nfs4_reset_recoverydir(char *recdir);
+#else
+static inline int nfs4_state_init(void) { return 0; }
+static inline void nfsd4_free_slabs(void) { }
+static inline int nfs4_state_start(void) { return 0; }
+static inline void nfs4_state_shutdown(void) { }
+static inline time_t nfs4_lease_time(void) { return 0; }
+static inline void nfs4_reset_lease(time_t leasetime) { }
+static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
+#endif
+
+/*
+ * lockd binding
+ */
+void		nfsd_lockd_init(void);
+void		nfsd_lockd_shutdown(void);
+
+
+/*
+ * These macros provide pre-xdr'ed values for faster operation.
+ */
+#define	nfs_ok			cpu_to_be32(NFS_OK)
+#define	nfserr_perm		cpu_to_be32(NFSERR_PERM)
+#define	nfserr_noent		cpu_to_be32(NFSERR_NOENT)
+#define	nfserr_io		cpu_to_be32(NFSERR_IO)
+#define	nfserr_nxio		cpu_to_be32(NFSERR_NXIO)
+#define	nfserr_eagain		cpu_to_be32(NFSERR_EAGAIN)
+#define	nfserr_acces		cpu_to_be32(NFSERR_ACCES)
+#define	nfserr_exist		cpu_to_be32(NFSERR_EXIST)
+#define	nfserr_xdev		cpu_to_be32(NFSERR_XDEV)
+#define	nfserr_nodev		cpu_to_be32(NFSERR_NODEV)
+#define	nfserr_notdir		cpu_to_be32(NFSERR_NOTDIR)
+#define	nfserr_isdir		cpu_to_be32(NFSERR_ISDIR)
+#define	nfserr_inval		cpu_to_be32(NFSERR_INVAL)
+#define	nfserr_fbig		cpu_to_be32(NFSERR_FBIG)
+#define	nfserr_nospc		cpu_to_be32(NFSERR_NOSPC)
+#define	nfserr_rofs		cpu_to_be32(NFSERR_ROFS)
+#define	nfserr_mlink		cpu_to_be32(NFSERR_MLINK)
+#define	nfserr_opnotsupp	cpu_to_be32(NFSERR_OPNOTSUPP)
+#define	nfserr_nametoolong	cpu_to_be32(NFSERR_NAMETOOLONG)
+#define	nfserr_notempty		cpu_to_be32(NFSERR_NOTEMPTY)
+#define	nfserr_dquot		cpu_to_be32(NFSERR_DQUOT)
+#define	nfserr_stale		cpu_to_be32(NFSERR_STALE)
+#define	nfserr_remote		cpu_to_be32(NFSERR_REMOTE)
+#define	nfserr_wflush		cpu_to_be32(NFSERR_WFLUSH)
+#define	nfserr_badhandle	cpu_to_be32(NFSERR_BADHANDLE)
+#define	nfserr_notsync		cpu_to_be32(NFSERR_NOT_SYNC)
+#define	nfserr_badcookie	cpu_to_be32(NFSERR_BAD_COOKIE)
+#define	nfserr_notsupp		cpu_to_be32(NFSERR_NOTSUPP)
+#define	nfserr_toosmall		cpu_to_be32(NFSERR_TOOSMALL)
+#define	nfserr_serverfault	cpu_to_be32(NFSERR_SERVERFAULT)
+#define	nfserr_badtype		cpu_to_be32(NFSERR_BADTYPE)
+#define	nfserr_jukebox		cpu_to_be32(NFSERR_JUKEBOX)
+#define	nfserr_denied		cpu_to_be32(NFSERR_DENIED)
+#define	nfserr_deadlock		cpu_to_be32(NFSERR_DEADLOCK)
+#define nfserr_expired          cpu_to_be32(NFSERR_EXPIRED)
+#define	nfserr_bad_cookie	cpu_to_be32(NFSERR_BAD_COOKIE)
+#define	nfserr_same		cpu_to_be32(NFSERR_SAME)
+#define	nfserr_clid_inuse	cpu_to_be32(NFSERR_CLID_INUSE)
+#define	nfserr_stale_clientid	cpu_to_be32(NFSERR_STALE_CLIENTID)
+#define	nfserr_resource		cpu_to_be32(NFSERR_RESOURCE)
+#define	nfserr_moved		cpu_to_be32(NFSERR_MOVED)
+#define	nfserr_nofilehandle	cpu_to_be32(NFSERR_NOFILEHANDLE)
+#define	nfserr_minor_vers_mismatch	cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
+#define nfserr_share_denied	cpu_to_be32(NFSERR_SHARE_DENIED)
+#define nfserr_stale_stateid	cpu_to_be32(NFSERR_STALE_STATEID)
+#define nfserr_old_stateid	cpu_to_be32(NFSERR_OLD_STATEID)
+#define nfserr_bad_stateid	cpu_to_be32(NFSERR_BAD_STATEID)
+#define nfserr_bad_seqid	cpu_to_be32(NFSERR_BAD_SEQID)
+#define	nfserr_symlink		cpu_to_be32(NFSERR_SYMLINK)
+#define	nfserr_not_same		cpu_to_be32(NFSERR_NOT_SAME)
+#define	nfserr_restorefh	cpu_to_be32(NFSERR_RESTOREFH)
+#define	nfserr_attrnotsupp	cpu_to_be32(NFSERR_ATTRNOTSUPP)
+#define	nfserr_bad_xdr		cpu_to_be32(NFSERR_BAD_XDR)
+#define	nfserr_openmode		cpu_to_be32(NFSERR_OPENMODE)
+#define	nfserr_locks_held	cpu_to_be32(NFSERR_LOCKS_HELD)
+#define	nfserr_op_illegal	cpu_to_be32(NFSERR_OP_ILLEGAL)
+#define	nfserr_grace		cpu_to_be32(NFSERR_GRACE)
+#define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
+#define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
+#define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
+#define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
+#define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
+#define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
+#define nfserr_badiomode		cpu_to_be32(NFS4ERR_BADIOMODE)
+#define nfserr_badlayout		cpu_to_be32(NFS4ERR_BADLAYOUT)
+#define nfserr_bad_session_digest	cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
+#define nfserr_badsession		cpu_to_be32(NFS4ERR_BADSESSION)
+#define nfserr_badslot			cpu_to_be32(NFS4ERR_BADSLOT)
+#define nfserr_complete_already		cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
+#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
+#define nfserr_deleg_already_wanted	cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
+#define nfserr_back_chan_busy		cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
+#define nfserr_layouttrylater		cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
+#define nfserr_layoutunavailable	cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
+#define nfserr_nomatching_layout	cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
+#define nfserr_recallconflict		cpu_to_be32(NFS4ERR_RECALLCONFLICT)
+#define nfserr_unknown_layouttype	cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
+#define nfserr_seq_misordered		cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
+#define nfserr_sequence_pos		cpu_to_be32(NFS4ERR_SEQUENCE_POS)
+#define nfserr_req_too_big		cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
+#define nfserr_rep_too_big		cpu_to_be32(NFS4ERR_REP_TOO_BIG)
+#define nfserr_rep_too_big_to_cache	cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
+#define nfserr_retry_uncached_rep	cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
+#define nfserr_unsafe_compound		cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
+#define nfserr_too_many_ops		cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
+#define nfserr_op_not_in_session	cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
+#define nfserr_hash_alg_unsupp		cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
+#define nfserr_clientid_busy		cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
+#define nfserr_pnfs_io_hole		cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
+#define nfserr_seq_false_retry		cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
+#define nfserr_bad_high_slot		cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
+#define nfserr_deadsession		cpu_to_be32(NFS4ERR_DEADSESSION)
+#define nfserr_encr_alg_unsupp		cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
+#define nfserr_pnfs_no_layout		cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
+#define nfserr_not_only_op		cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
+#define nfserr_wrong_cred		cpu_to_be32(NFS4ERR_WRONG_CRED)
+#define nfserr_wrong_type		cpu_to_be32(NFS4ERR_WRONG_TYPE)
+#define nfserr_dirdeleg_unavail		cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
+#define nfserr_reject_deleg		cpu_to_be32(NFS4ERR_REJECT_DELEG)
+#define nfserr_returnconflict		cpu_to_be32(NFS4ERR_RETURNCONFLICT)
+#define nfserr_deleg_revoked		cpu_to_be32(NFS4ERR_DELEG_REVOKED)
+
+/* error codes for internal use */
+/* if a request fails due to kmalloc failure, it gets dropped.
+ *  Client should resend eventually
+ */
+#define	nfserr_dropit		cpu_to_be32(30000)
+/* end-of-file indicator in readdir */
+#define	nfserr_eof		cpu_to_be32(30001)
+/* replay detected */
+#define	nfserr_replay_me	cpu_to_be32(11001)
+/* nfs41 replay detected */
+#define	nfserr_replay_cache	cpu_to_be32(11002)
+
+/* Check for dir entries '.' and '..' */
+#define isdotent(n, l)	(l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
+
+/*
+ * Time of server startup
+ */
+extern struct timeval	nfssvc_boot;
+
+#ifdef CONFIG_NFSD_V4
+
+/* before processing a COMPOUND operation, we have to check that there
+ * is enough space in the buffer for XDR encode to succeed.  otherwise,
+ * we might process an operation with side effects, and be unable to
+ * tell the client that the operation succeeded.
+ *
+ * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an "ordinary" _successful_ operation.  (GETATTR,
+ * READ, READDIR, and READLINK have their own buffer checks.)  if we
+ * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
+ *
+ * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
+ * care is taken to ensure that we never fall below this level for any
+ * reason.
+ */
+#define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
+#define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
+
+#define NFSD_LEASE_TIME                 (nfs4_lease_time())
+#define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
+
+/*
+ * The following attributes are currently not supported by the NFSv4 server:
+ *    ARCHIVE       (deprecated anyway)
+ *    HIDDEN        (unlikely to be supported any time soon)
+ *    MIMETYPE      (unlikely to be supported any time soon)
+ *    QUOTA_*       (will be supported in a forthcoming patch)
+ *    SYSTEM        (unlikely to be supported any time soon)
+ *    TIME_BACKUP   (unlikely to be supported any time soon)
+ *    TIME_CREATE   (unlikely to be supported any time soon)
+ */
+#define NFSD4_SUPPORTED_ATTRS_WORD0                                                         \
+(FATTR4_WORD0_SUPPORTED_ATTRS   | FATTR4_WORD0_TYPE         | FATTR4_WORD0_FH_EXPIRE_TYPE   \
+ | FATTR4_WORD0_CHANGE          | FATTR4_WORD0_SIZE         | FATTR4_WORD0_LINK_SUPPORT     \
+ | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR   | FATTR4_WORD0_FSID             \
+ | FATTR4_WORD0_UNIQUE_HANDLES  | FATTR4_WORD0_LEASE_TIME   | FATTR4_WORD0_RDATTR_ERROR     \
+ | FATTR4_WORD0_ACLSUPPORT      | FATTR4_WORD0_CANSETTIME   | FATTR4_WORD0_CASE_INSENSITIVE \
+ | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED                             \
+ | FATTR4_WORD0_FILEHANDLE      | FATTR4_WORD0_FILEID       | FATTR4_WORD0_FILES_AVAIL      \
+ | FATTR4_WORD0_FILES_FREE      | FATTR4_WORD0_FILES_TOTAL  | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS      \
+ | FATTR4_WORD0_MAXFILESIZE     | FATTR4_WORD0_MAXLINK      | FATTR4_WORD0_MAXNAME          \
+ | FATTR4_WORD0_MAXREAD         | FATTR4_WORD0_MAXWRITE     | FATTR4_WORD0_ACL)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD1                                                         \
+(FATTR4_WORD1_MODE              | FATTR4_WORD1_NO_TRUNC     | FATTR4_WORD1_NUMLINKS         \
+ | FATTR4_WORD1_OWNER	        | FATTR4_WORD1_OWNER_GROUP  | FATTR4_WORD1_RAWDEV           \
+ | FATTR4_WORD1_SPACE_AVAIL     | FATTR4_WORD1_SPACE_FREE   | FATTR4_WORD1_SPACE_TOTAL      \
+ | FATTR4_WORD1_SPACE_USED      | FATTR4_WORD1_TIME_ACCESS  | FATTR4_WORD1_TIME_ACCESS_SET  \
+ | FATTR4_WORD1_TIME_DELTA   | FATTR4_WORD1_TIME_METADATA    \
+ | FATTR4_WORD1_TIME_MODIFY     | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD2 0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+	NFSD4_SUPPORTED_ATTRS_WORD0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+	NFSD4_SUPPORTED_ATTRS_WORD1
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
+
+static inline u32 nfsd_suppattrs0(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
+			    : NFSD4_SUPPORTED_ATTRS_WORD0;
+}
+
+static inline u32 nfsd_suppattrs1(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
+			    : NFSD4_SUPPORTED_ATTRS_WORD1;
+}
+
+static inline u32 nfsd_suppattrs2(u32 minorversion)
+{
+	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
+			    : NFSD4_SUPPORTED_ATTRS_WORD2;
+}
+
+/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
+#define NFSD_WRITEONLY_ATTRS_WORD1							    \
+(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
+
+/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
+#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
+(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
+#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
+(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD2 0
+
+#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
+	NFSD_WRITEABLE_ATTRS_WORD0
+/*
+ * we currently store the exclusive create verifier in the v_{a,m}time
+ * attributes so the client can't set these at create time using EXCLUSIVE4_1
+ */
+#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
+	(NFSD_WRITEABLE_ATTRS_WORD1 & \
+	 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
+#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
+	NFSD_WRITEABLE_ATTRS_WORD2
+
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 739948165034..0eb1c59f5ab8 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -12,7 +12,7 @@
 #include <linux/exportfs.h>
 
 #include <linux/sunrpc/svcauth_gss.h>
-#include <linux/nfsd/nfsd.h>
+#include "nfsd.h"
 #include "vfs.h"
 #include "auth.h"
 
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index b6bd9e0d7cd0..21a5f793c3d1 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -9,8 +9,8 @@
 
 #include <linux/namei.h>
 
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
+#include "cache.h"
+#include "xdr.h"
 #include "vfs.h"
 
 typedef struct svc_rqst	svc_rqst;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b2d7ffac0357..b520ce10bd15 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -15,11 +15,11 @@
 
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfsacl.h>
 #include <linux/seq_file.h>
+#include "nfsd.h"
+#include "cache.h"
 #include "vfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 5e0603da39e7..3bec831704af 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -6,7 +6,7 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
-#include <linux/nfsd/xdr.h>
+#include "xdr.h"
 #include "auth.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
new file mode 100644
index 000000000000..2af75686e0d3
--- /dev/null
+++ b/fs/nfsd/state.h
@@ -0,0 +1,409 @@
+/*
+ *  linux/include/nfsd/state.h
+ *
+ *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson <andros@umich.edu>
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NFSD4_STATE_H
+#define _NFSD4_STATE_H
+
+#include <linux/nfsd/nfsfh.h>
+
+typedef struct {
+	u32             cl_boot;
+	u32             cl_id;
+} clientid_t;
+
+typedef struct {
+	u32             so_boot;
+	u32             so_stateownerid;
+	u32             so_fileid;
+} stateid_opaque_t;
+
+typedef struct {
+	u32                     si_generation;
+	stateid_opaque_t        si_opaque;
+} stateid_t;
+#define si_boot           si_opaque.so_boot
+#define si_stateownerid   si_opaque.so_stateownerid
+#define si_fileid         si_opaque.so_fileid
+
+#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
+#define STATEID_VAL(s) \
+	(s)->si_boot, \
+	(s)->si_stateownerid, \
+	(s)->si_fileid, \
+	(s)->si_generation
+
+struct nfsd4_cb_sequence {
+	/* args/res */
+	u32			cbs_minorversion;
+	struct nfs4_client	*cbs_clp;
+};
+
+struct nfs4_delegation {
+	struct list_head	dl_perfile;
+	struct list_head	dl_perclnt;
+	struct list_head	dl_recall_lru;  /* delegation recalled */
+	atomic_t		dl_count;       /* ref count */
+	struct nfs4_client	*dl_client;
+	struct nfs4_file	*dl_file;
+	struct file_lock	*dl_flock;
+	struct file		*dl_vfs_file;
+	u32			dl_type;
+	time_t			dl_time;
+/* For recall: */
+	u32			dl_ident;
+	stateid_t		dl_stateid;
+	struct knfsd_fh		dl_fh;
+	int			dl_retries;
+};
+
+/* client delegation callback info */
+struct nfs4_cb_conn {
+	/* SETCLIENTID info */
+	struct sockaddr_storage	cb_addr;
+	size_t			cb_addrlen;
+	u32                     cb_prog;
+	u32			cb_minorversion;
+	u32                     cb_ident;	/* minorversion 0 only */
+	/* RPC client info */
+	atomic_t		cb_set;     /* successful CB_NULL call */
+	struct rpc_clnt *       cb_client;
+};
+
+/* Maximum number of slots per session. 160 is useful for long haul TCP */
+#define NFSD_MAX_SLOTS_PER_SESSION     160
+/* Maximum number of operations per session compound */
+#define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum  session per slot cache size */
+#define NFSD_SLOT_CACHE_SIZE		1024
+/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
+#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
+#define NFSD_MAX_MEM_PER_SESSION  \
+		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
+
+struct nfsd4_slot {
+	bool	sl_inuse;
+	bool	sl_cachethis;
+	u16	sl_opcnt;
+	u32	sl_seqid;
+	__be32	sl_status;
+	u32	sl_datalen;
+	char	sl_data[];
+};
+
+struct nfsd4_channel_attrs {
+	u32		headerpadsz;
+	u32		maxreq_sz;
+	u32		maxresp_sz;
+	u32		maxresp_cached;
+	u32		maxops;
+	u32		maxreqs;
+	u32		nr_rdma_attrs;
+	u32		rdma_attrs;
+};
+
+struct nfsd4_create_session {
+	clientid_t			clientid;
+	struct nfs4_sessionid		sessionid;
+	u32				seqid;
+	u32				flags;
+	struct nfsd4_channel_attrs	fore_channel;
+	struct nfsd4_channel_attrs	back_channel;
+	u32				callback_prog;
+	u32				uid;
+	u32				gid;
+};
+
+/* The single slot clientid cache structure */
+struct nfsd4_clid_slot {
+	u32				sl_seqid;
+	__be32				sl_status;
+	struct nfsd4_create_session	sl_cr_ses;
+};
+
+struct nfsd4_session {
+	struct kref		se_ref;
+	struct list_head	se_hash;	/* hash by sessionid */
+	struct list_head	se_perclnt;
+	u32			se_flags;
+	struct nfs4_client	*se_client;	/* for expire_client */
+	struct nfs4_sessionid	se_sessionid;
+	struct nfsd4_channel_attrs se_fchannel;
+	struct nfsd4_channel_attrs se_bchannel;
+	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
+};
+
+static inline void
+nfsd4_put_session(struct nfsd4_session *ses)
+{
+	extern void free_session(struct kref *kref);
+	kref_put(&ses->se_ref, free_session);
+}
+
+static inline void
+nfsd4_get_session(struct nfsd4_session *ses)
+{
+	kref_get(&ses->se_ref);
+}
+
+/* formatted contents of nfs4_sessionid */
+struct nfsd4_sessionid {
+	clientid_t	clientid;
+	u32		sequence;
+	u32		reserved;
+};
+
+#define HEXDIR_LEN     33 /* hex version of 16 byte md5 of cl_name plus '\0' */
+
+/*
+ * struct nfs4_client - one per client.  Clientids live here.
+ * 	o Each nfs4_client is hashed by clientid.
+ *
+ * 	o Each nfs4_clients is also hashed by name 
+ * 	  (the opaque quantity initially sent by the client to identify itself).
+ * 	  
+ *	o cl_perclient list is used to ensure no dangling stateowner references
+ *	  when we expire the nfs4_client
+ */
+struct nfs4_client {
+	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
+	struct list_head	cl_strhash; 	/* hash by cl_name */
+	struct list_head	cl_openowners;
+	struct list_head	cl_delegations;
+	struct list_head        cl_lru;         /* tail queue */
+	struct xdr_netobj	cl_name; 	/* id generated by client */
+	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
+	nfs4_verifier		cl_verifier; 	/* generated by client */
+	time_t                  cl_time;        /* time of last lease renewal */
+	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
+	u32			cl_flavor;	/* setclientid pseudoflavor */
+	char			*cl_principal;	/* setclientid principal name */
+	struct svc_cred		cl_cred; 	/* setclientid principal */
+	clientid_t		cl_clientid;	/* generated by server */
+	nfs4_verifier		cl_confirm;	/* generated by server */
+	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
+	atomic_t		cl_count;	/* ref count */
+	u32			cl_firststate;	/* recovery dir creation */
+
+	/* for nfs41 */
+	struct list_head	cl_sessions;
+	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
+	u32			cl_exchange_flags;
+	struct nfs4_sessionid	cl_sessionid;
+
+	/* for nfs41 callbacks */
+	/* We currently support a single back channel with a single slot */
+	unsigned long		cl_cb_slot_busy;
+	u32			cl_cb_seq_nr;
+	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+						/* wait here for slots */
+};
+
+/* struct nfs4_client_reset
+ * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
+ * upon lease reset, or from upcall to state_daemon (to read in state
+ * from non-volitile storage) upon reboot.
+ */
+struct nfs4_client_reclaim {
+	struct list_head	cr_strhash;	/* hash by cr_name */
+	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
+};
+
+static inline void
+update_stateid(stateid_t *stateid)
+{
+	stateid->si_generation++;
+}
+
+/* A reasonable value for REPLAY_ISIZE was estimated as follows:  
+ * The OPEN response, typically the largest, requires 
+ *   4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) +  8(verifier) + 
+ *   4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + 
+ *   20(deleg. space limit) + ~32(deleg. ace) = 112 bytes 
+ */
+
+#define NFSD4_REPLAY_ISIZE       112 
+
+/*
+ * Replay buffer, where the result of the last seqid-mutating operation 
+ * is cached. 
+ */
+struct nfs4_replay {
+	__be32			rp_status;
+	unsigned int		rp_buflen;
+	char			*rp_buf;
+	unsigned		intrp_allocated;
+	struct knfsd_fh		rp_openfh;
+	char			rp_ibuf[NFSD4_REPLAY_ISIZE];
+};
+
+/*
+* nfs4_stateowner can either be an open_owner, or a lock_owner
+*
+*    so_idhash:  stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
+*         for lock_owner
+*    so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
+*         for lock_owner
+*    so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
+*         struct is reaped.
+*    so_perfilestate: heads the list of nfs4_stateid (either open or lock) 
+*         and is used to ensure no dangling nfs4_stateid references when we 
+*         release a stateowner.
+*    so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
+*         close is called to reap associated byte-range locks
+*    so_close_lru: (open) stateowner is placed on this list instead of being
+*         reaped (when so_perfilestate is empty) to hold the last close replay.
+*         reaped by laundramat thread after lease period.
+*/
+struct nfs4_stateowner {
+	struct kref		so_ref;
+	struct list_head        so_idhash;   /* hash by so_id */
+	struct list_head        so_strhash;   /* hash by op_name */
+	struct list_head        so_perclient;
+	struct list_head        so_stateids;
+	struct list_head        so_perstateid; /* for lockowners only */
+	struct list_head	so_close_lru; /* tail queue */
+	time_t			so_time; /* time of placement on so_close_lru */
+	int			so_is_open_owner; /* 1=openowner,0=lockowner */
+	u32                     so_id;
+	struct nfs4_client *    so_client;
+	/* after increment in ENCODE_SEQID_OP_TAIL, represents the next
+	 * sequence id expected from the client: */
+	u32                     so_seqid;
+	struct xdr_netobj       so_owner;     /* open owner name */
+	int                     so_confirmed; /* successful OPEN_CONFIRM? */
+	struct nfs4_replay	so_replay;
+};
+
+/*
+*  nfs4_file: a file opened by some number of (open) nfs4_stateowners.
+*    o fi_perfile list is used to search for conflicting 
+*      share_acces, share_deny on the file.
+*/
+struct nfs4_file {
+	atomic_t		fi_ref;
+	struct list_head        fi_hash;    /* hash by "struct inode *" */
+	struct list_head        fi_stateids;
+	struct list_head	fi_delegations;
+	struct inode		*fi_inode;
+	u32                     fi_id;      /* used with stateowner->so_id 
+					     * for stateid_hashtbl hash */
+	bool			fi_had_conflict;
+};
+
+/*
+* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+*
+* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
+*
+* 	st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
+* 	st_perfile: file_hashtbl[] entry.
+* 	st_perfile_state: nfs4_stateowner->so_perfilestate
+*       st_perlockowner: (open stateid) list of lock nfs4_stateowners
+* 	st_access_bmap: used only for open stateid
+* 	st_deny_bmap: used only for open stateid
+*	st_openstp: open stateid lock stateid was derived from
+*
+* XXX: open stateids and lock stateids have diverged sufficiently that
+* we should consider defining separate structs for the two cases.
+*/
+
+struct nfs4_stateid {
+	struct list_head              st_hash; 
+	struct list_head              st_perfile;
+	struct list_head              st_perstateowner;
+	struct list_head              st_lockowners;
+	struct nfs4_stateowner      * st_stateowner;
+	struct nfs4_file            * st_file;
+	stateid_t                     st_stateid;
+	struct file                 * st_vfs_file;
+	unsigned long                 st_access_bmap;
+	unsigned long                 st_deny_bmap;
+	struct nfs4_stateid         * st_openstp;
+};
+
+/* flags for preprocess_seqid_op() */
+#define HAS_SESSION             0x00000001
+#define CONFIRM                 0x00000002
+#define OPEN_STATE              0x00000004
+#define LOCK_STATE              0x00000008
+#define RD_STATE	        0x00000010
+#define WR_STATE	        0x00000020
+#define CLOSE_STATE             0x00000040
+
+#define seqid_mutating_err(err)                       \
+	(((err) != nfserr_stale_clientid) &&    \
+	((err) != nfserr_bad_seqid) &&          \
+	((err) != nfserr_stale_stateid) &&      \
+	((err) != nfserr_bad_stateid))
+
+struct nfsd4_compound_state;
+
+extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
+		stateid_t *stateid, int flags, struct file **filp);
+extern void nfs4_lock_state(void);
+extern void nfs4_unlock_state(void);
+extern int nfs4_in_grace(void);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
+extern void put_nfs4_client(struct nfs4_client *clp);
+extern void nfs4_free_stateowner(struct kref *kref);
+extern int set_callback_cred(void);
+extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
+extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+extern void nfsd4_init_recdir(char *recdir_name);
+extern int nfsd4_recdir_load(void);
+extern void nfsd4_shutdown_recdir(void);
+extern int nfs4_client_to_reclaim(const char *name);
+extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
+extern void nfsd4_recdir_purge_old(void);
+extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+
+static inline void
+nfs4_put_stateowner(struct nfs4_stateowner *so)
+{
+	kref_put(&so->so_ref, nfs4_free_stateowner);
+}
+
+static inline void
+nfs4_get_stateowner(struct nfs4_stateowner *so)
+{
+	kref_get(&so->so_ref);
+}
+
+#endif   /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index e3e411e9fe4a..3fc69dfd3091 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -25,11 +25,11 @@
 
 #include <linux/seq_file.h>
 #include <linux/module.h>
-
 #include <linux/sunrpc/stats.h>
-#include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/stats.h>
 
+#include "nfsd.h"
+
 struct nfsd_stats	nfsdstats;
 struct svc_stat		nfsd_svcstats = {
 	.program	= &nfsd_program,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 81ce108c114e..04bdba12d21b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -22,23 +22,25 @@
 #include <linux/fcntl.h>
 #include <linux/namei.h>
 #include <linux/delay.h>
-#include <linux/nfsd/nfsd.h>
-#ifdef CONFIG_NFSD_V3
-#include <linux/nfsd/xdr3.h>
-#endif /* CONFIG_NFSD_V3 */
 #include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
+#include <linux/jhash.h>
+#include <linux/ima.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_NFSD_V3
+#include "xdr3.h"
+#endif /* CONFIG_NFSD_V3 */
+
 #ifdef CONFIG_NFSD_V4
 #include <linux/nfs4_acl.h>
 #include <linux/nfsd_idmap.h>
 #endif /* CONFIG_NFSD_V4 */
-#include <linux/jhash.h>
-#include <linux/ima.h>
-#include "vfs.h"
 
-#include <asm/uaccess.h>
+#include "nfsd.h"
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
 
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
new file mode 100644
index 000000000000..235ee5c3be54
--- /dev/null
+++ b/fs/nfsd/xdr.h
@@ -0,0 +1,176 @@
+/*
+ * linux/include/linux/nfsd/xdr.h
+ *
+ * XDR types for nfsd. This is mainly a typing exercise.
+ */
+
+#ifndef LINUX_NFSD_H
+#define LINUX_NFSD_H
+
+#include <linux/vfs.h>
+#include "nfsd.h"
+
+struct nfsd_fhandle {
+	struct svc_fh		fh;
+};
+
+struct nfsd_sattrargs {
+	struct svc_fh		fh;
+	struct iattr		attrs;
+};
+
+struct nfsd_diropargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+};
+
+struct nfsd_readargs {
+	struct svc_fh		fh;
+	__u32			offset;
+	__u32			count;
+	int			vlen;
+};
+
+struct nfsd_writeargs {
+	svc_fh			fh;
+	__u32			offset;
+	int			len;
+	int			vlen;
+};
+
+struct nfsd_createargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	struct iattr		attrs;
+};
+
+struct nfsd_renameargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd_readlinkargs {
+	struct svc_fh		fh;
+	char *			buffer;
+};
+	
+struct nfsd_linkargs {
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd_symlinkargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	char *			tname;
+	unsigned int		tlen;
+	struct iattr		attrs;
+};
+
+struct nfsd_readdirargs {
+	struct svc_fh		fh;
+	__u32			cookie;
+	__u32			count;
+	__be32 *		buffer;
+};
+
+struct nfsd_attrstat {
+	struct svc_fh		fh;
+	struct kstat		stat;
+};
+
+struct nfsd_diropres  {
+	struct svc_fh		fh;
+	struct kstat		stat;
+};
+
+struct nfsd_readlinkres {
+	int			len;
+};
+
+struct nfsd_readres {
+	struct svc_fh		fh;
+	unsigned long		count;
+	struct kstat		stat;
+};
+
+struct nfsd_readdirres {
+	int			count;
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+};
+
+struct nfsd_statfsres {
+	struct kstatfs		stats;
+};
+
+/*
+ * Storage requirements for XDR arguments and results.
+ */
+union nfsd_xdrstore {
+	struct nfsd_sattrargs	sattr;
+	struct nfsd_diropargs	dirop;
+	struct nfsd_readargs	read;
+	struct nfsd_writeargs	write;
+	struct nfsd_createargs	create;
+	struct nfsd_renameargs	rename;
+	struct nfsd_linkargs	link;
+	struct nfsd_symlinkargs	symlink;
+	struct nfsd_readdirargs	readdir;
+};
+
+#define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
+
+
+int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
+				struct nfsd_sattrargs *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
+				struct nfsd_diropargs *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readargs *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
+				struct nfsd_writeargs *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
+				struct nfsd_createargs *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
+				struct nfsd_renameargs *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readlinkargs *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_linkargs *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd_symlinkargs *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd_readdirargs *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+
+int nfssvc_encode_entry(void *, const char *name,
+			int namlen, loff_t offset, u64 ino, unsigned int);
+
+int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+
+/* Helper functions for NFSv2 ACL code */
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
+#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
new file mode 100644
index 000000000000..b330756973cf
--- /dev/null
+++ b/fs/nfsd/xdr3.h
@@ -0,0 +1,346 @@
+/*
+ * linux/include/linux/nfsd/xdr3.h
+ *
+ * XDR types for NFSv3 in nfsd.
+ *
+ * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LINUX_NFSD_XDR3_H
+#define _LINUX_NFSD_XDR3_H
+
+#include "xdr.h"
+
+struct nfsd3_sattrargs {
+	struct svc_fh		fh;
+	struct iattr		attrs;
+	int			check_guard;
+	time_t			guardtime;
+};
+
+struct nfsd3_diropargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+};
+
+struct nfsd3_accessargs {
+	struct svc_fh		fh;
+	unsigned int		access;
+};
+
+struct nfsd3_readargs {
+	struct svc_fh		fh;
+	__u64			offset;
+	__u32			count;
+	int			vlen;
+};
+
+struct nfsd3_writeargs {
+	svc_fh			fh;
+	__u64			offset;
+	__u32			count;
+	int			stable;
+	__u32			len;
+	int			vlen;
+};
+
+struct nfsd3_createargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	int			createmode;
+	struct iattr		attrs;
+	__be32 *		verf;
+};
+
+struct nfsd3_mknodargs {
+	struct svc_fh		fh;
+	char *			name;
+	unsigned int		len;
+	__u32			ftype;
+	__u32			major, minor;
+	struct iattr		attrs;
+};
+
+struct nfsd3_renameargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd3_readlinkargs {
+	struct svc_fh		fh;
+	char *			buffer;
+};
+
+struct nfsd3_linkargs {
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+	char *			tname;
+	unsigned int		tlen;
+};
+
+struct nfsd3_symlinkargs {
+	struct svc_fh		ffh;
+	char *			fname;
+	unsigned int		flen;
+	char *			tname;
+	unsigned int		tlen;
+	struct iattr		attrs;
+};
+
+struct nfsd3_readdirargs {
+	struct svc_fh		fh;
+	__u64			cookie;
+	__u32			dircount;
+	__u32			count;
+	__be32 *		verf;
+	__be32 *		buffer;
+};
+
+struct nfsd3_commitargs {
+	struct svc_fh		fh;
+	__u64			offset;
+	__u32			count;
+};
+
+struct nfsd3_getaclargs {
+	struct svc_fh		fh;
+	int			mask;
+};
+
+struct posix_acl;
+struct nfsd3_setaclargs {
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
+struct nfsd3_attrstat {
+	__be32			status;
+	struct svc_fh		fh;
+	struct kstat            stat;
+};
+
+/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
+struct nfsd3_diropres  {
+	__be32			status;
+	struct svc_fh		dirfh;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_accessres {
+	__be32			status;
+	struct svc_fh		fh;
+	__u32			access;
+};
+
+struct nfsd3_readlinkres {
+	__be32			status;
+	struct svc_fh		fh;
+	__u32			len;
+};
+
+struct nfsd3_readres {
+	__be32			status;
+	struct svc_fh		fh;
+	unsigned long		count;
+	int			eof;
+};
+
+struct nfsd3_writeres {
+	__be32			status;
+	struct svc_fh		fh;
+	unsigned long		count;
+	int			committed;
+};
+
+struct nfsd3_renameres {
+	__be32			status;
+	struct svc_fh		ffh;
+	struct svc_fh		tfh;
+};
+
+struct nfsd3_linkres {
+	__be32			status;
+	struct svc_fh		tfh;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_readdirres {
+	__be32			status;
+	struct svc_fh		fh;
+	int			count;
+	__be32			verf[2];
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+	__be32 *		offset1;
+	struct svc_rqst *	rqstp;
+
+};
+
+struct nfsd3_fsstatres {
+	__be32			status;
+	struct kstatfs		stats;
+	__u32			invarsec;
+};
+
+struct nfsd3_fsinfores {
+	__be32			status;
+	__u32			f_rtmax;
+	__u32			f_rtpref;
+	__u32			f_rtmult;
+	__u32			f_wtmax;
+	__u32			f_wtpref;
+	__u32			f_wtmult;
+	__u32			f_dtpref;
+	__u64			f_maxfilesize;
+	__u32			f_properties;
+};
+
+struct nfsd3_pathconfres {
+	__be32			status;
+	__u32			p_link_max;
+	__u32			p_name_max;
+	__u32			p_no_trunc;
+	__u32			p_chown_restricted;
+	__u32			p_case_insensitive;
+	__u32			p_case_preserving;
+};
+
+struct nfsd3_commitres {
+	__be32			status;
+	struct svc_fh		fh;
+};
+
+struct nfsd3_getaclres {
+	__be32			status;
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
+/* dummy type for release */
+struct nfsd3_fhandle_pair {
+	__u32			dummy;
+	struct svc_fh		fh1;
+	struct svc_fh		fh2;
+};
+
+/*
+ * Storage requirements for XDR arguments and results.
+ */
+union nfsd3_xdrstore {
+	struct nfsd3_sattrargs		sattrargs;
+	struct nfsd3_diropargs		diropargs;
+	struct nfsd3_readargs		readargs;
+	struct nfsd3_writeargs		writeargs;
+	struct nfsd3_createargs		createargs;
+	struct nfsd3_renameargs		renameargs;
+	struct nfsd3_linkargs		linkargs;
+	struct nfsd3_symlinkargs	symlinkargs;
+	struct nfsd3_readdirargs	readdirargs;
+	struct nfsd3_diropres 		diropres;
+	struct nfsd3_accessres		accessres;
+	struct nfsd3_readlinkres	readlinkres;
+	struct nfsd3_readres		readres;
+	struct nfsd3_writeres		writeres;
+	struct nfsd3_renameres		renameres;
+	struct nfsd3_linkres		linkres;
+	struct nfsd3_readdirres		readdirres;
+	struct nfsd3_fsstatres		fsstatres;
+	struct nfsd3_fsinfores		fsinfores;
+	struct nfsd3_pathconfres	pathconfres;
+	struct nfsd3_commitres		commitres;
+	struct nfsd3_getaclres		getaclres;
+};
+
+#define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
+
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_sattrargs *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropargs *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_accessargs *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readargs *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_writeargs *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_createargs *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_createargs *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_mknodargs *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_renameargs *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readlinkargs *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_linkargs *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_symlinkargs *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirargs *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirargs *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
+				struct nfsd3_commitargs *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropres *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
+				struct nfsd3_accessres *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
+				struct nfsd3_readlinkres *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
+				struct nfsd3_diropres *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
+				struct nfsd3_renameres *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
+				struct nfsd3_linkres *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
+				struct nfsd3_readdirres *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
+				struct nfsd3_fsstatres *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
+				struct nfsd3_fsinfores *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
+				struct nfsd3_pathconfres *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
+				struct nfsd3_commitres *);
+
+int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
+				struct nfsd3_attrstat *);
+int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
+				struct nfsd3_fhandle_pair *);
+int nfs3svc_encode_entry(void *, const char *name,
+				int namlen, loff_t offset, u64 ino,
+				unsigned int);
+int nfs3svc_encode_entry_plus(void *, const char *name,
+				int namlen, loff_t offset, u64 ino,
+				unsigned int);
+/* Helper functions for NFSv3 ACL code */
+__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
+				struct svc_fh *fhp);
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
+
+#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
new file mode 100644
index 000000000000..83202a1cf07b
--- /dev/null
+++ b/fs/nfsd/xdr4.h
@@ -0,0 +1,564 @@
+/*
+ *  include/linux/nfsd/xdr4.h
+ *
+ *  Server-side types for NFSv4.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Kendrick Smith <kmsmith@umich.edu>
+ *  Andy Adamson   <andros@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _LINUX_NFSD_XDR4_H
+#define _LINUX_NFSD_XDR4_H
+
+#include "state.h"
+#include "nfsd.h"
+
+#define NFSD4_MAX_TAGLEN	128
+#define XDR_LEN(n)                     (((n) + 3) & ~3)
+
+struct nfsd4_compound_state {
+	struct svc_fh		current_fh;
+	struct svc_fh		save_fh;
+	struct nfs4_stateowner	*replay_owner;
+	/* For sessions DRC */
+	struct nfsd4_session	*session;
+	struct nfsd4_slot	*slot;
+	__be32			*datap;
+	size_t			iovlen;
+	u32			minorversion;
+	u32			status;
+};
+
+static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
+{
+	return cs->slot != NULL;
+}
+
+struct nfsd4_change_info {
+	u32		atomic;
+	bool		change_supported;
+	u32		before_ctime_sec;
+	u32		before_ctime_nsec;
+	u64		before_change;
+	u32		after_ctime_sec;
+	u32		after_ctime_nsec;
+	u64		after_change;
+};
+
+struct nfsd4_access {
+	u32		ac_req_access;      /* request */
+	u32		ac_supported;       /* response */
+	u32		ac_resp_access;     /* response */
+};
+
+struct nfsd4_close {
+	u32		cl_seqid;           /* request */
+	stateid_t	cl_stateid;         /* request+response */
+	struct nfs4_stateowner * cl_stateowner;	/* response */
+};
+
+struct nfsd4_commit {
+	u64		co_offset;          /* request */
+	u32		co_count;           /* request */
+	nfs4_verifier	co_verf;            /* response */
+};
+
+struct nfsd4_create {
+	u32		cr_namelen;         /* request */
+	char *		cr_name;            /* request */
+	u32		cr_type;            /* request */
+	union {                             /* request */
+		struct {
+			u32 namelen;
+			char *name;
+		} link;   /* NF4LNK */
+		struct {
+			u32 specdata1;
+			u32 specdata2;
+		} dev;    /* NF4BLK, NF4CHR */
+	} u;
+	u32		cr_bmval[3];        /* request */
+	struct iattr	cr_iattr;           /* request */
+	struct nfsd4_change_info  cr_cinfo; /* response */
+	struct nfs4_acl *cr_acl;
+};
+#define cr_linklen	u.link.namelen
+#define cr_linkname	u.link.name
+#define cr_specdata1	u.dev.specdata1
+#define cr_specdata2	u.dev.specdata2
+
+struct nfsd4_delegreturn {
+	stateid_t	dr_stateid;
+};
+
+struct nfsd4_getattr {
+	u32		ga_bmval[3];        /* request */
+	struct svc_fh	*ga_fhp;            /* response */
+};
+
+struct nfsd4_link {
+	u32		li_namelen;         /* request */
+	char *		li_name;            /* request */
+	struct nfsd4_change_info  li_cinfo; /* response */
+};
+
+struct nfsd4_lock_denied {
+	clientid_t	ld_clientid;
+	struct nfs4_stateowner   *ld_sop;
+	u64             ld_start;
+	u64             ld_length;
+	u32             ld_type;
+};
+
+struct nfsd4_lock {
+	/* request */
+	u32             lk_type;
+	u32             lk_reclaim;         /* boolean */
+	u64             lk_offset;
+	u64             lk_length;
+	u32             lk_is_new;
+	union {
+		struct {
+			u32             open_seqid;
+			stateid_t       open_stateid;
+			u32             lock_seqid;
+			clientid_t      clientid;
+			struct xdr_netobj owner;
+		} new;
+		struct {
+			stateid_t       lock_stateid;
+			u32             lock_seqid;
+		} old;
+	} v;
+
+	/* response */
+	union {
+		struct {
+			stateid_t               stateid;
+		} ok;
+		struct nfsd4_lock_denied        denied;
+	} u;
+	/* The lk_replay_owner is the open owner in the open_to_lock_owner
+	 * case and the lock owner otherwise: */
+	struct nfs4_stateowner *lk_replay_owner;
+};
+#define lk_new_open_seqid       v.new.open_seqid
+#define lk_new_open_stateid     v.new.open_stateid
+#define lk_new_lock_seqid       v.new.lock_seqid
+#define lk_new_clientid         v.new.clientid
+#define lk_new_owner            v.new.owner
+#define lk_old_lock_stateid     v.old.lock_stateid
+#define lk_old_lock_seqid       v.old.lock_seqid
+
+#define lk_rflags       u.ok.rflags
+#define lk_resp_stateid u.ok.stateid
+#define lk_denied       u.denied
+
+
+struct nfsd4_lockt {
+	u32				lt_type;
+	clientid_t			lt_clientid;
+	struct xdr_netobj		lt_owner;
+	u64				lt_offset;
+	u64				lt_length;
+	struct nfs4_stateowner * 	lt_stateowner;
+	struct nfsd4_lock_denied  	lt_denied;
+};
+
+ 
+struct nfsd4_locku {
+	u32             lu_type;
+	u32             lu_seqid;
+	stateid_t       lu_stateid;
+	u64             lu_offset;
+	u64             lu_length;
+	struct nfs4_stateowner  *lu_stateowner;
+};
+
+
+struct nfsd4_lookup {
+	u32		lo_len;             /* request */
+	char *		lo_name;            /* request */
+};
+
+struct nfsd4_putfh {
+	u32		pf_fhlen;           /* request */
+	char		*pf_fhval;          /* request */
+};
+
+struct nfsd4_open {
+	u32		op_claim_type;      /* request */
+	struct xdr_netobj op_fname;	    /* request - everything but CLAIM_PREV */
+	u32		op_delegate_type;   /* request - CLAIM_PREV only */
+	stateid_t       op_delegate_stateid; /* request - response */
+	u32		op_create;     	    /* request */
+	u32		op_createmode;      /* request */
+	u32		op_bmval[3];        /* request */
+	struct iattr	iattr;              /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
+	nfs4_verifier	verf;               /* EXCLUSIVE4 */
+	clientid_t	op_clientid;        /* request */
+	struct xdr_netobj op_owner;           /* request */
+	u32		op_seqid;           /* request */
+	u32		op_share_access;    /* request */
+	u32		op_share_deny;      /* request */
+	stateid_t	op_stateid;         /* response */
+	u32		op_recall;          /* recall */
+	struct nfsd4_change_info  op_cinfo; /* response */
+	u32		op_rflags;          /* response */
+	int		op_truncate;        /* used during processing */
+	struct nfs4_stateowner *op_stateowner; /* used during processing */
+	struct nfs4_acl *op_acl;
+};
+#define op_iattr	iattr
+#define op_verf		verf
+
+struct nfsd4_open_confirm {
+	stateid_t	oc_req_stateid		/* request */;
+	u32		oc_seqid    		/* request */;
+	stateid_t	oc_resp_stateid		/* response */;
+	struct nfs4_stateowner * oc_stateowner;	/* response */
+};
+
+struct nfsd4_open_downgrade {
+	stateid_t       od_stateid;
+	u32             od_seqid;
+	u32             od_share_access;
+	u32             od_share_deny;
+	struct nfs4_stateowner *od_stateowner;
+};
+
+
+struct nfsd4_read {
+	stateid_t	rd_stateid;         /* request */
+	u64		rd_offset;          /* request */
+	u32		rd_length;          /* request */
+	int		rd_vlen;
+	struct file     *rd_filp;
+	
+	struct svc_rqst *rd_rqstp;          /* response */
+	struct svc_fh * rd_fhp;             /* response */
+};
+
+struct nfsd4_readdir {
+	u64		rd_cookie;          /* request */
+	nfs4_verifier	rd_verf;            /* request */
+	u32		rd_dircount;        /* request */
+	u32		rd_maxcount;        /* request */
+	u32		rd_bmval[3];        /* request */
+	struct svc_rqst *rd_rqstp;          /* response */
+	struct svc_fh * rd_fhp;             /* response */
+
+	struct readdir_cd	common;
+	__be32 *		buffer;
+	int			buflen;
+	__be32 *		offset;
+};
+
+struct nfsd4_release_lockowner {
+	clientid_t        rl_clientid;
+	struct xdr_netobj rl_owner;
+};
+struct nfsd4_readlink {
+	struct svc_rqst *rl_rqstp;          /* request */
+	struct svc_fh *	rl_fhp;             /* request */
+};
+
+struct nfsd4_remove {
+	u32		rm_namelen;         /* request */
+	char *		rm_name;            /* request */
+	struct nfsd4_change_info  rm_cinfo; /* response */
+};
+
+struct nfsd4_rename {
+	u32		rn_snamelen;        /* request */
+	char *		rn_sname;           /* request */
+	u32		rn_tnamelen;        /* request */
+	char *		rn_tname;           /* request */
+	struct nfsd4_change_info  rn_sinfo; /* response */
+	struct nfsd4_change_info  rn_tinfo; /* response */
+};
+
+struct nfsd4_secinfo {
+	u32 si_namelen;					/* request */
+	char *si_name;					/* request */
+	struct svc_export *si_exp;			/* response */
+};
+
+struct nfsd4_setattr {
+	stateid_t	sa_stateid;         /* request */
+	u32		sa_bmval[3];        /* request */
+	struct iattr	sa_iattr;           /* request */
+	struct nfs4_acl *sa_acl;
+};
+
+struct nfsd4_setclientid {
+	nfs4_verifier	se_verf;            /* request */
+	u32		se_namelen;         /* request */
+	char *		se_name;            /* request */
+	u32		se_callback_prog;   /* request */
+	u32		se_callback_netid_len;  /* request */
+	char *		se_callback_netid_val;  /* request */
+	u32		se_callback_addr_len;   /* request */
+	char *		se_callback_addr_val;   /* request */
+	u32		se_callback_ident;  /* request */
+	clientid_t	se_clientid;        /* response */
+	nfs4_verifier	se_confirm;         /* response */
+};
+
+struct nfsd4_setclientid_confirm {
+	clientid_t	sc_clientid;
+	nfs4_verifier	sc_confirm;
+};
+
+/* also used for NVERIFY */
+struct nfsd4_verify {
+	u32		ve_bmval[3];        /* request */
+	u32		ve_attrlen;         /* request */
+	char *		ve_attrval;         /* request */
+};
+
+struct nfsd4_write {
+	stateid_t	wr_stateid;         /* request */
+	u64		wr_offset;          /* request */
+	u32		wr_stable_how;      /* request */
+	u32		wr_buflen;          /* request */
+	int		wr_vlen;
+
+	u32		wr_bytes_written;   /* response */
+	u32		wr_how_written;     /* response */
+	nfs4_verifier	wr_verifier;        /* response */
+};
+
+struct nfsd4_exchange_id {
+	nfs4_verifier	verifier;
+	struct xdr_netobj clname;
+	u32		flags;
+	clientid_t	clientid;
+	u32		seqid;
+	int		spa_how;
+};
+
+struct nfsd4_sequence {
+	struct nfs4_sessionid	sessionid;		/* request/response */
+	u32			seqid;			/* request/response */
+	u32			slotid;			/* request/response */
+	u32			maxslots;		/* request/response */
+	u32			cachethis;		/* request */
+#if 0
+	u32			target_maxslots;	/* response */
+	u32			status_flags;		/* response */
+#endif /* not yet */
+};
+
+struct nfsd4_destroy_session {
+	struct nfs4_sessionid	sessionid;
+};
+
+struct nfsd4_op {
+	int					opnum;
+	__be32					status;
+	union {
+		struct nfsd4_access		access;
+		struct nfsd4_close		close;
+		struct nfsd4_commit		commit;
+		struct nfsd4_create		create;
+		struct nfsd4_delegreturn	delegreturn;
+		struct nfsd4_getattr		getattr;
+		struct svc_fh *			getfh;
+		struct nfsd4_link		link;
+		struct nfsd4_lock		lock;
+		struct nfsd4_lockt		lockt;
+		struct nfsd4_locku		locku;
+		struct nfsd4_lookup		lookup;
+		struct nfsd4_verify		nverify;
+		struct nfsd4_open		open;
+		struct nfsd4_open_confirm	open_confirm;
+		struct nfsd4_open_downgrade	open_downgrade;
+		struct nfsd4_putfh		putfh;
+		struct nfsd4_read		read;
+		struct nfsd4_readdir		readdir;
+		struct nfsd4_readlink		readlink;
+		struct nfsd4_remove		remove;
+		struct nfsd4_rename		rename;
+		clientid_t			renew;
+		struct nfsd4_secinfo		secinfo;
+		struct nfsd4_setattr		setattr;
+		struct nfsd4_setclientid	setclientid;
+		struct nfsd4_setclientid_confirm setclientid_confirm;
+		struct nfsd4_verify		verify;
+		struct nfsd4_write		write;
+		struct nfsd4_release_lockowner	release_lockowner;
+
+		/* NFSv4.1 */
+		struct nfsd4_exchange_id	exchange_id;
+		struct nfsd4_create_session	create_session;
+		struct nfsd4_destroy_session	destroy_session;
+		struct nfsd4_sequence		sequence;
+	} u;
+	struct nfs4_replay *			replay;
+};
+
+struct nfsd4_compoundargs {
+	/* scratch variables for XDR decode */
+	__be32 *			p;
+	__be32 *			end;
+	struct page **			pagelist;
+	int				pagelen;
+	__be32				tmp[8];
+	__be32 *			tmpp;
+	struct tmpbuf {
+		struct tmpbuf *next;
+		void (*release)(const void *);
+		void *buf;
+	}				*to_free;
+
+	struct svc_rqst			*rqstp;
+
+	u32				taglen;
+	char *				tag;
+	u32				minorversion;
+	u32				opcnt;
+	struct nfsd4_op			*ops;
+	struct nfsd4_op			iops[8];
+};
+
+struct nfsd4_compoundres {
+	/* scratch variables for XDR encode */
+	__be32 *			p;
+	__be32 *			end;
+	struct xdr_buf *		xbuf;
+	struct svc_rqst *		rqstp;
+
+	u32				taglen;
+	char *				tag;
+	u32				opcnt;
+	__be32 *			tagp; /* tag, opcount encode location */
+	struct nfsd4_compound_state	cstate;
+};
+
+static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
+{
+	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
+	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
+}
+
+static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
+{
+	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
+}
+
+#define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
+
+static inline void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
+	cinfo->atomic = 1;
+	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
+	if (cinfo->change_supported) {
+		cinfo->before_change = fhp->fh_pre_change;
+		cinfo->after_change = fhp->fh_post_change;
+	} else {
+		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+	}
+}
+
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
+		struct nfsd4_compoundargs *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
+		struct nfsd4_compoundres *);
+void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
+void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
+__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+		       struct dentry *dentry, __be32 *buffer, int *countp,
+		       u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
+extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_setclientid *setclid);
+extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_setclientid_confirm *setclientid_confirm);
+extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
+extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
+		struct nfsd4_sequence *seq);
+extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+struct nfsd4_exchange_id *);
+		extern __be32 nfsd4_create_session(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_create_session *);
+extern __be32 nfsd4_sequence(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_sequence *);
+extern __be32 nfsd4_destroy_session(struct svc_rqst *,
+		struct nfsd4_compound_state *,
+		struct nfsd4_destroy_session *);
+extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
+		struct nfsd4_open *open);
+extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
+		struct svc_fh *current_fh, struct nfsd4_open *open);
+extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
+extern __be32 nfsd4_close(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_close *close);
+extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_open_downgrade *od);
+extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		struct nfsd4_lock *lock);
+extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_lockt *lockt);
+extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_locku *locku);
+extern __be32
+nfsd4_release_lockowner(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *,
+		struct nfsd4_release_lockowner *rlockowner);
+extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
+extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
+extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
+			  struct nfsd4_compound_state *, clientid_t *clid);
+#endif
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
deleted file mode 100644
index a165425dea41..000000000000
--- a/include/linux/nfsd/cache.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * include/linux/nfsd/cache.h
- *
- * Request reply cache. This was heavily inspired by the
- * implementation in 4.3BSD/4.4BSD.
- *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef NFSCACHE_H
-#define NFSCACHE_H
-
-#include <linux/sunrpc/svc.h>
-
-/*
- * Representation of a reply cache entry.
- */
-struct svc_cacherep {
-	struct hlist_node	c_hash;
-	struct list_head	c_lru;
-
-	unsigned char		c_state,	/* unused, inprog, done */
-				c_type,		/* status, buffer */
-				c_secure : 1;	/* req came from port < 1024 */
-	struct sockaddr_in	c_addr;
-	__be32			c_xid;
-	u32			c_prot;
-	u32			c_proc;
-	u32			c_vers;
-	unsigned long		c_timestamp;
-	union {
-		struct kvec	u_vec;
-		__be32		u_status;
-	}			c_u;
-};
-
-#define c_replvec		c_u.u_vec
-#define c_replstat		c_u.u_status
-
-/* cache entry states */
-enum {
-	RC_UNUSED,
-	RC_INPROG,
-	RC_DONE
-};
-
-/* return values */
-enum {
-	RC_DROPIT,
-	RC_REPLY,
-	RC_DOIT,
-	RC_INTR
-};
-
-/*
- * Cache types.
- * We may want to add more types one day, e.g. for diropres and
- * attrstat replies. Using cache entries with fixed length instead
- * of buffer pointers may be more efficient.
- */
-enum {
-	RC_NOCACHE,
-	RC_REPLSTAT,
-	RC_REPLBUFF,
-};
-
-/*
- * If requests are retransmitted within this interval, they're dropped.
- */
-#define RC_DELAY		(HZ/5)
-
-int	nfsd_reply_cache_init(void);
-void	nfsd_reply_cache_shutdown(void);
-int	nfsd_cache_lookup(struct svc_rqst *, int);
-void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
-
-#ifdef CONFIG_NFSD_V4
-void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
-#else  /* CONFIG_NFSD_V4 */
-static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
-{
-}
-#endif /* CONFIG_NFSD_V4 */
-
-#endif /* NFSCACHE_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
deleted file mode 100644
index 74f67c2aca34..000000000000
--- a/include/linux/nfsd/nfsd.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * linux/include/linux/nfsd/nfsd.h
- *
- * Hodge-podge collection of knfsd-related stuff.
- * I will sort this out later.
- *
- * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_NFSD_NFSD_H
-#define LINUX_NFSD_NFSD_H
-
-#include <linux/types.h>
-#include <linux/mount.h>
-
-#include <linux/nfsd/debug.h>
-#include <linux/nfsd/export.h>
-#include <linux/nfsd/stats.h>
-/*
- * nfsd version
- */
-#define NFSD_SUPPORTED_MINOR_VERSION	1
-
-struct readdir_cd {
-	__be32			err;	/* 0, nfserr, or nfserr_eof */
-};
-
-
-extern struct svc_program	nfsd_program;
-extern struct svc_version	nfsd_version2, nfsd_version3,
-				nfsd_version4;
-extern u32			nfsd_supported_minorversion;
-extern struct mutex		nfsd_mutex;
-extern struct svc_serv		*nfsd_serv;
-extern spinlock_t		nfsd_drc_lock;
-extern unsigned int		nfsd_drc_max_mem;
-extern unsigned int		nfsd_drc_mem_used;
-
-extern const struct seq_operations nfs_exports_op;
-
-/*
- * Function prototypes.
- */
-int		nfsd_svc(unsigned short port, int nrservs);
-int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
-
-int		nfsd_nrthreads(void);
-int		nfsd_nrpools(void);
-int		nfsd_get_nrthreads(int n, int *);
-int		nfsd_set_nrthreads(int n, int *);
-
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-#ifdef CONFIG_NFSD_V2_ACL
-extern struct svc_version nfsd_acl_version2;
-#else
-#define nfsd_acl_version2 NULL
-#endif
-#ifdef CONFIG_NFSD_V3_ACL
-extern struct svc_version nfsd_acl_version3;
-#else
-#define nfsd_acl_version3 NULL
-#endif
-#endif
-
-enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-int nfsd_vers(int vers, enum vers_op change);
-int nfsd_minorversion(u32 minorversion, enum vers_op change);
-void nfsd_reset_versions(void);
-int nfsd_create_serv(void);
-
-extern int nfsd_max_blksize;
-
-/* 
- * NFSv4 State
- */
-#ifdef CONFIG_NFSD_V4
-extern unsigned int max_delegations;
-int nfs4_state_init(void);
-void nfsd4_free_slabs(void);
-int nfs4_state_start(void);
-void nfs4_state_shutdown(void);
-time_t nfs4_lease_time(void);
-void nfs4_reset_lease(time_t leasetime);
-int nfs4_reset_recoverydir(char *recdir);
-#else
-static inline int nfs4_state_init(void) { return 0; }
-static inline void nfsd4_free_slabs(void) { }
-static inline int nfs4_state_start(void) { return 0; }
-static inline void nfs4_state_shutdown(void) { }
-static inline time_t nfs4_lease_time(void) { return 0; }
-static inline void nfs4_reset_lease(time_t leasetime) { }
-static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
-#endif
-
-/*
- * lockd binding
- */
-void		nfsd_lockd_init(void);
-void		nfsd_lockd_shutdown(void);
-
-
-/*
- * These macros provide pre-xdr'ed values for faster operation.
- */
-#define	nfs_ok			cpu_to_be32(NFS_OK)
-#define	nfserr_perm		cpu_to_be32(NFSERR_PERM)
-#define	nfserr_noent		cpu_to_be32(NFSERR_NOENT)
-#define	nfserr_io		cpu_to_be32(NFSERR_IO)
-#define	nfserr_nxio		cpu_to_be32(NFSERR_NXIO)
-#define	nfserr_eagain		cpu_to_be32(NFSERR_EAGAIN)
-#define	nfserr_acces		cpu_to_be32(NFSERR_ACCES)
-#define	nfserr_exist		cpu_to_be32(NFSERR_EXIST)
-#define	nfserr_xdev		cpu_to_be32(NFSERR_XDEV)
-#define	nfserr_nodev		cpu_to_be32(NFSERR_NODEV)
-#define	nfserr_notdir		cpu_to_be32(NFSERR_NOTDIR)
-#define	nfserr_isdir		cpu_to_be32(NFSERR_ISDIR)
-#define	nfserr_inval		cpu_to_be32(NFSERR_INVAL)
-#define	nfserr_fbig		cpu_to_be32(NFSERR_FBIG)
-#define	nfserr_nospc		cpu_to_be32(NFSERR_NOSPC)
-#define	nfserr_rofs		cpu_to_be32(NFSERR_ROFS)
-#define	nfserr_mlink		cpu_to_be32(NFSERR_MLINK)
-#define	nfserr_opnotsupp	cpu_to_be32(NFSERR_OPNOTSUPP)
-#define	nfserr_nametoolong	cpu_to_be32(NFSERR_NAMETOOLONG)
-#define	nfserr_notempty		cpu_to_be32(NFSERR_NOTEMPTY)
-#define	nfserr_dquot		cpu_to_be32(NFSERR_DQUOT)
-#define	nfserr_stale		cpu_to_be32(NFSERR_STALE)
-#define	nfserr_remote		cpu_to_be32(NFSERR_REMOTE)
-#define	nfserr_wflush		cpu_to_be32(NFSERR_WFLUSH)
-#define	nfserr_badhandle	cpu_to_be32(NFSERR_BADHANDLE)
-#define	nfserr_notsync		cpu_to_be32(NFSERR_NOT_SYNC)
-#define	nfserr_badcookie	cpu_to_be32(NFSERR_BAD_COOKIE)
-#define	nfserr_notsupp		cpu_to_be32(NFSERR_NOTSUPP)
-#define	nfserr_toosmall		cpu_to_be32(NFSERR_TOOSMALL)
-#define	nfserr_serverfault	cpu_to_be32(NFSERR_SERVERFAULT)
-#define	nfserr_badtype		cpu_to_be32(NFSERR_BADTYPE)
-#define	nfserr_jukebox		cpu_to_be32(NFSERR_JUKEBOX)
-#define	nfserr_denied		cpu_to_be32(NFSERR_DENIED)
-#define	nfserr_deadlock		cpu_to_be32(NFSERR_DEADLOCK)
-#define nfserr_expired          cpu_to_be32(NFSERR_EXPIRED)
-#define	nfserr_bad_cookie	cpu_to_be32(NFSERR_BAD_COOKIE)
-#define	nfserr_same		cpu_to_be32(NFSERR_SAME)
-#define	nfserr_clid_inuse	cpu_to_be32(NFSERR_CLID_INUSE)
-#define	nfserr_stale_clientid	cpu_to_be32(NFSERR_STALE_CLIENTID)
-#define	nfserr_resource		cpu_to_be32(NFSERR_RESOURCE)
-#define	nfserr_moved		cpu_to_be32(NFSERR_MOVED)
-#define	nfserr_nofilehandle	cpu_to_be32(NFSERR_NOFILEHANDLE)
-#define	nfserr_minor_vers_mismatch	cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
-#define nfserr_share_denied	cpu_to_be32(NFSERR_SHARE_DENIED)
-#define nfserr_stale_stateid	cpu_to_be32(NFSERR_STALE_STATEID)
-#define nfserr_old_stateid	cpu_to_be32(NFSERR_OLD_STATEID)
-#define nfserr_bad_stateid	cpu_to_be32(NFSERR_BAD_STATEID)
-#define nfserr_bad_seqid	cpu_to_be32(NFSERR_BAD_SEQID)
-#define	nfserr_symlink		cpu_to_be32(NFSERR_SYMLINK)
-#define	nfserr_not_same		cpu_to_be32(NFSERR_NOT_SAME)
-#define	nfserr_restorefh	cpu_to_be32(NFSERR_RESTOREFH)
-#define	nfserr_attrnotsupp	cpu_to_be32(NFSERR_ATTRNOTSUPP)
-#define	nfserr_bad_xdr		cpu_to_be32(NFSERR_BAD_XDR)
-#define	nfserr_openmode		cpu_to_be32(NFSERR_OPENMODE)
-#define	nfserr_locks_held	cpu_to_be32(NFSERR_LOCKS_HELD)
-#define	nfserr_op_illegal	cpu_to_be32(NFSERR_OP_ILLEGAL)
-#define	nfserr_grace		cpu_to_be32(NFSERR_GRACE)
-#define	nfserr_no_grace		cpu_to_be32(NFSERR_NO_GRACE)
-#define	nfserr_reclaim_bad	cpu_to_be32(NFSERR_RECLAIM_BAD)
-#define	nfserr_badname		cpu_to_be32(NFSERR_BADNAME)
-#define	nfserr_cb_path_down	cpu_to_be32(NFSERR_CB_PATH_DOWN)
-#define	nfserr_locked		cpu_to_be32(NFSERR_LOCKED)
-#define	nfserr_wrongsec		cpu_to_be32(NFSERR_WRONGSEC)
-#define nfserr_badiomode		cpu_to_be32(NFS4ERR_BADIOMODE)
-#define nfserr_badlayout		cpu_to_be32(NFS4ERR_BADLAYOUT)
-#define nfserr_bad_session_digest	cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
-#define nfserr_badsession		cpu_to_be32(NFS4ERR_BADSESSION)
-#define nfserr_badslot			cpu_to_be32(NFS4ERR_BADSLOT)
-#define nfserr_complete_already		cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
-#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
-#define nfserr_deleg_already_wanted	cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
-#define nfserr_back_chan_busy		cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
-#define nfserr_layouttrylater		cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
-#define nfserr_layoutunavailable	cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
-#define nfserr_nomatching_layout	cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
-#define nfserr_recallconflict		cpu_to_be32(NFS4ERR_RECALLCONFLICT)
-#define nfserr_unknown_layouttype	cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
-#define nfserr_seq_misordered		cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
-#define nfserr_sequence_pos		cpu_to_be32(NFS4ERR_SEQUENCE_POS)
-#define nfserr_req_too_big		cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
-#define nfserr_rep_too_big		cpu_to_be32(NFS4ERR_REP_TOO_BIG)
-#define nfserr_rep_too_big_to_cache	cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
-#define nfserr_retry_uncached_rep	cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
-#define nfserr_unsafe_compound		cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
-#define nfserr_too_many_ops		cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
-#define nfserr_op_not_in_session	cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
-#define nfserr_hash_alg_unsupp		cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
-#define nfserr_clientid_busy		cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
-#define nfserr_pnfs_io_hole		cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
-#define nfserr_seq_false_retry		cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
-#define nfserr_bad_high_slot		cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
-#define nfserr_deadsession		cpu_to_be32(NFS4ERR_DEADSESSION)
-#define nfserr_encr_alg_unsupp		cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
-#define nfserr_pnfs_no_layout		cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
-#define nfserr_not_only_op		cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
-#define nfserr_wrong_cred		cpu_to_be32(NFS4ERR_WRONG_CRED)
-#define nfserr_wrong_type		cpu_to_be32(NFS4ERR_WRONG_TYPE)
-#define nfserr_dirdeleg_unavail		cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
-#define nfserr_reject_deleg		cpu_to_be32(NFS4ERR_REJECT_DELEG)
-#define nfserr_returnconflict		cpu_to_be32(NFS4ERR_RETURNCONFLICT)
-#define nfserr_deleg_revoked		cpu_to_be32(NFS4ERR_DELEG_REVOKED)
-
-/* error codes for internal use */
-/* if a request fails due to kmalloc failure, it gets dropped.
- *  Client should resend eventually
- */
-#define	nfserr_dropit		cpu_to_be32(30000)
-/* end-of-file indicator in readdir */
-#define	nfserr_eof		cpu_to_be32(30001)
-/* replay detected */
-#define	nfserr_replay_me	cpu_to_be32(11001)
-/* nfs41 replay detected */
-#define	nfserr_replay_cache	cpu_to_be32(11002)
-
-/* Check for dir entries '.' and '..' */
-#define isdotent(n, l)	(l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
-
-/*
- * Time of server startup
- */
-extern struct timeval	nfssvc_boot;
-
-#ifdef CONFIG_NFSD_V4
-
-/* before processing a COMPOUND operation, we have to check that there
- * is enough space in the buffer for XDR encode to succeed.  otherwise,
- * we might process an operation with side effects, and be unable to
- * tell the client that the operation succeeded.
- *
- * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
- * needed to encode an "ordinary" _successful_ operation.  (GETATTR,
- * READ, READDIR, and READLINK have their own buffer checks.)  if we
- * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
- *
- * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
- * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
- * care is taken to ensure that we never fall below this level for any
- * reason.
- */
-#define	COMPOUND_SLACK_SPACE		140    /* OP_GETFH */
-#define COMPOUND_ERR_SLACK_SPACE	12     /* OP_SETATTR */
-
-#define NFSD_LEASE_TIME                 (nfs4_lease_time())
-#define NFSD_LAUNDROMAT_MINTIMEOUT      10   /* seconds */
-
-/*
- * The following attributes are currently not supported by the NFSv4 server:
- *    ARCHIVE       (deprecated anyway)
- *    HIDDEN        (unlikely to be supported any time soon)
- *    MIMETYPE      (unlikely to be supported any time soon)
- *    QUOTA_*       (will be supported in a forthcoming patch)
- *    SYSTEM        (unlikely to be supported any time soon)
- *    TIME_BACKUP   (unlikely to be supported any time soon)
- *    TIME_CREATE   (unlikely to be supported any time soon)
- */
-#define NFSD4_SUPPORTED_ATTRS_WORD0                                                         \
-(FATTR4_WORD0_SUPPORTED_ATTRS   | FATTR4_WORD0_TYPE         | FATTR4_WORD0_FH_EXPIRE_TYPE   \
- | FATTR4_WORD0_CHANGE          | FATTR4_WORD0_SIZE         | FATTR4_WORD0_LINK_SUPPORT     \
- | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR   | FATTR4_WORD0_FSID             \
- | FATTR4_WORD0_UNIQUE_HANDLES  | FATTR4_WORD0_LEASE_TIME   | FATTR4_WORD0_RDATTR_ERROR     \
- | FATTR4_WORD0_ACLSUPPORT      | FATTR4_WORD0_CANSETTIME   | FATTR4_WORD0_CASE_INSENSITIVE \
- | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED                             \
- | FATTR4_WORD0_FILEHANDLE      | FATTR4_WORD0_FILEID       | FATTR4_WORD0_FILES_AVAIL      \
- | FATTR4_WORD0_FILES_FREE      | FATTR4_WORD0_FILES_TOTAL  | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS      \
- | FATTR4_WORD0_MAXFILESIZE     | FATTR4_WORD0_MAXLINK      | FATTR4_WORD0_MAXNAME          \
- | FATTR4_WORD0_MAXREAD         | FATTR4_WORD0_MAXWRITE     | FATTR4_WORD0_ACL)
-
-#define NFSD4_SUPPORTED_ATTRS_WORD1                                                         \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_NO_TRUNC     | FATTR4_WORD1_NUMLINKS         \
- | FATTR4_WORD1_OWNER	        | FATTR4_WORD1_OWNER_GROUP  | FATTR4_WORD1_RAWDEV           \
- | FATTR4_WORD1_SPACE_AVAIL     | FATTR4_WORD1_SPACE_FREE   | FATTR4_WORD1_SPACE_TOTAL      \
- | FATTR4_WORD1_SPACE_USED      | FATTR4_WORD1_TIME_ACCESS  | FATTR4_WORD1_TIME_ACCESS_SET  \
- | FATTR4_WORD1_TIME_DELTA   | FATTR4_WORD1_TIME_METADATA    \
- | FATTR4_WORD1_TIME_MODIFY     | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
-
-#define NFSD4_SUPPORTED_ATTRS_WORD2 0
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
-	NFSD4_SUPPORTED_ATTRS_WORD0
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
-	NFSD4_SUPPORTED_ATTRS_WORD1
-
-#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
-
-static inline u32 nfsd_suppattrs0(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
-			    : NFSD4_SUPPORTED_ATTRS_WORD0;
-}
-
-static inline u32 nfsd_suppattrs1(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
-			    : NFSD4_SUPPORTED_ATTRS_WORD1;
-}
-
-static inline u32 nfsd_suppattrs2(u32 minorversion)
-{
-	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
-			    : NFSD4_SUPPORTED_ATTRS_WORD2;
-}
-
-/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
-#define NFSD_WRITEONLY_ATTRS_WORD1							    \
-(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
-
-/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
-#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
-(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
-#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
-#define NFSD_WRITEABLE_ATTRS_WORD2 0
-
-#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
-	NFSD_WRITEABLE_ATTRS_WORD0
-/*
- * we currently store the exclusive create verifier in the v_{a,m}time
- * attributes so the client can't set these at create time using EXCLUSIVE4_1
- */
-#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
-	(NFSD_WRITEABLE_ATTRS_WORD1 & \
-	 ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
-#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
-	NFSD_WRITEABLE_ATTRS_WORD2
-
-#endif /* CONFIG_NFSD_V4 */
-
-#endif /* LINUX_NFSD_NFSD_H */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
deleted file mode 100644
index 2af75686e0d3..000000000000
--- a/include/linux/nfsd/state.h
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- *  linux/include/nfsd/state.h
- *
- *  Copyright (c) 2001 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Kendrick Smith <kmsmith@umich.edu>
- *  Andy Adamson <andros@umich.edu>
- *  
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *  
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _NFSD4_STATE_H
-#define _NFSD4_STATE_H
-
-#include <linux/nfsd/nfsfh.h>
-
-typedef struct {
-	u32             cl_boot;
-	u32             cl_id;
-} clientid_t;
-
-typedef struct {
-	u32             so_boot;
-	u32             so_stateownerid;
-	u32             so_fileid;
-} stateid_opaque_t;
-
-typedef struct {
-	u32                     si_generation;
-	stateid_opaque_t        si_opaque;
-} stateid_t;
-#define si_boot           si_opaque.so_boot
-#define si_stateownerid   si_opaque.so_stateownerid
-#define si_fileid         si_opaque.so_fileid
-
-#define STATEID_FMT	"(%08x/%08x/%08x/%08x)"
-#define STATEID_VAL(s) \
-	(s)->si_boot, \
-	(s)->si_stateownerid, \
-	(s)->si_fileid, \
-	(s)->si_generation
-
-struct nfsd4_cb_sequence {
-	/* args/res */
-	u32			cbs_minorversion;
-	struct nfs4_client	*cbs_clp;
-};
-
-struct nfs4_delegation {
-	struct list_head	dl_perfile;
-	struct list_head	dl_perclnt;
-	struct list_head	dl_recall_lru;  /* delegation recalled */
-	atomic_t		dl_count;       /* ref count */
-	struct nfs4_client	*dl_client;
-	struct nfs4_file	*dl_file;
-	struct file_lock	*dl_flock;
-	struct file		*dl_vfs_file;
-	u32			dl_type;
-	time_t			dl_time;
-/* For recall: */
-	u32			dl_ident;
-	stateid_t		dl_stateid;
-	struct knfsd_fh		dl_fh;
-	int			dl_retries;
-};
-
-/* client delegation callback info */
-struct nfs4_cb_conn {
-	/* SETCLIENTID info */
-	struct sockaddr_storage	cb_addr;
-	size_t			cb_addrlen;
-	u32                     cb_prog;
-	u32			cb_minorversion;
-	u32                     cb_ident;	/* minorversion 0 only */
-	/* RPC client info */
-	atomic_t		cb_set;     /* successful CB_NULL call */
-	struct rpc_clnt *       cb_client;
-};
-
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION     160
-/* Maximum number of operations per session compound */
-#define NFSD_MAX_OPS_PER_COMPOUND	16
-/* Maximum  session per slot cache size */
-#define NFSD_SLOT_CACHE_SIZE		1024
-/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
-#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
-#define NFSD_MAX_MEM_PER_SESSION  \
-		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
-
-struct nfsd4_slot {
-	bool	sl_inuse;
-	bool	sl_cachethis;
-	u16	sl_opcnt;
-	u32	sl_seqid;
-	__be32	sl_status;
-	u32	sl_datalen;
-	char	sl_data[];
-};
-
-struct nfsd4_channel_attrs {
-	u32		headerpadsz;
-	u32		maxreq_sz;
-	u32		maxresp_sz;
-	u32		maxresp_cached;
-	u32		maxops;
-	u32		maxreqs;
-	u32		nr_rdma_attrs;
-	u32		rdma_attrs;
-};
-
-struct nfsd4_create_session {
-	clientid_t			clientid;
-	struct nfs4_sessionid		sessionid;
-	u32				seqid;
-	u32				flags;
-	struct nfsd4_channel_attrs	fore_channel;
-	struct nfsd4_channel_attrs	back_channel;
-	u32				callback_prog;
-	u32				uid;
-	u32				gid;
-};
-
-/* The single slot clientid cache structure */
-struct nfsd4_clid_slot {
-	u32				sl_seqid;
-	__be32				sl_status;
-	struct nfsd4_create_session	sl_cr_ses;
-};
-
-struct nfsd4_session {
-	struct kref		se_ref;
-	struct list_head	se_hash;	/* hash by sessionid */
-	struct list_head	se_perclnt;
-	u32			se_flags;
-	struct nfs4_client	*se_client;	/* for expire_client */
-	struct nfs4_sessionid	se_sessionid;
-	struct nfsd4_channel_attrs se_fchannel;
-	struct nfsd4_channel_attrs se_bchannel;
-	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
-};
-
-static inline void
-nfsd4_put_session(struct nfsd4_session *ses)
-{
-	extern void free_session(struct kref *kref);
-	kref_put(&ses->se_ref, free_session);
-}
-
-static inline void
-nfsd4_get_session(struct nfsd4_session *ses)
-{
-	kref_get(&ses->se_ref);
-}
-
-/* formatted contents of nfs4_sessionid */
-struct nfsd4_sessionid {
-	clientid_t	clientid;
-	u32		sequence;
-	u32		reserved;
-};
-
-#define HEXDIR_LEN     33 /* hex version of 16 byte md5 of cl_name plus '\0' */
-
-/*
- * struct nfs4_client - one per client.  Clientids live here.
- * 	o Each nfs4_client is hashed by clientid.
- *
- * 	o Each nfs4_clients is also hashed by name 
- * 	  (the opaque quantity initially sent by the client to identify itself).
- * 	  
- *	o cl_perclient list is used to ensure no dangling stateowner references
- *	  when we expire the nfs4_client
- */
-struct nfs4_client {
-	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
-	struct list_head	cl_strhash; 	/* hash by cl_name */
-	struct list_head	cl_openowners;
-	struct list_head	cl_delegations;
-	struct list_head        cl_lru;         /* tail queue */
-	struct xdr_netobj	cl_name; 	/* id generated by client */
-	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
-	nfs4_verifier		cl_verifier; 	/* generated by client */
-	time_t                  cl_time;        /* time of last lease renewal */
-	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
-	u32			cl_flavor;	/* setclientid pseudoflavor */
-	char			*cl_principal;	/* setclientid principal name */
-	struct svc_cred		cl_cred; 	/* setclientid principal */
-	clientid_t		cl_clientid;	/* generated by server */
-	nfs4_verifier		cl_confirm;	/* generated by server */
-	struct nfs4_cb_conn	cl_cb_conn;     /* callback info */
-	atomic_t		cl_count;	/* ref count */
-	u32			cl_firststate;	/* recovery dir creation */
-
-	/* for nfs41 */
-	struct list_head	cl_sessions;
-	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
-	u32			cl_exchange_flags;
-	struct nfs4_sessionid	cl_sessionid;
-
-	/* for nfs41 callbacks */
-	/* We currently support a single back channel with a single slot */
-	unsigned long		cl_cb_slot_busy;
-	u32			cl_cb_seq_nr;
-	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
-	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
-						/* wait here for slots */
-};
-
-/* struct nfs4_client_reset
- * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
- * upon lease reset, or from upcall to state_daemon (to read in state
- * from non-volitile storage) upon reboot.
- */
-struct nfs4_client_reclaim {
-	struct list_head	cr_strhash;	/* hash by cr_name */
-	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
-};
-
-static inline void
-update_stateid(stateid_t *stateid)
-{
-	stateid->si_generation++;
-}
-
-/* A reasonable value for REPLAY_ISIZE was estimated as follows:  
- * The OPEN response, typically the largest, requires 
- *   4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) +  8(verifier) + 
- *   4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) + 
- *   20(deleg. space limit) + ~32(deleg. ace) = 112 bytes 
- */
-
-#define NFSD4_REPLAY_ISIZE       112 
-
-/*
- * Replay buffer, where the result of the last seqid-mutating operation 
- * is cached. 
- */
-struct nfs4_replay {
-	__be32			rp_status;
-	unsigned int		rp_buflen;
-	char			*rp_buf;
-	unsigned		intrp_allocated;
-	struct knfsd_fh		rp_openfh;
-	char			rp_ibuf[NFSD4_REPLAY_ISIZE];
-};
-
-/*
-* nfs4_stateowner can either be an open_owner, or a lock_owner
-*
-*    so_idhash:  stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
-*         for lock_owner
-*    so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
-*         for lock_owner
-*    so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
-*         struct is reaped.
-*    so_perfilestate: heads the list of nfs4_stateid (either open or lock) 
-*         and is used to ensure no dangling nfs4_stateid references when we 
-*         release a stateowner.
-*    so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
-*         close is called to reap associated byte-range locks
-*    so_close_lru: (open) stateowner is placed on this list instead of being
-*         reaped (when so_perfilestate is empty) to hold the last close replay.
-*         reaped by laundramat thread after lease period.
-*/
-struct nfs4_stateowner {
-	struct kref		so_ref;
-	struct list_head        so_idhash;   /* hash by so_id */
-	struct list_head        so_strhash;   /* hash by op_name */
-	struct list_head        so_perclient;
-	struct list_head        so_stateids;
-	struct list_head        so_perstateid; /* for lockowners only */
-	struct list_head	so_close_lru; /* tail queue */
-	time_t			so_time; /* time of placement on so_close_lru */
-	int			so_is_open_owner; /* 1=openowner,0=lockowner */
-	u32                     so_id;
-	struct nfs4_client *    so_client;
-	/* after increment in ENCODE_SEQID_OP_TAIL, represents the next
-	 * sequence id expected from the client: */
-	u32                     so_seqid;
-	struct xdr_netobj       so_owner;     /* open owner name */
-	int                     so_confirmed; /* successful OPEN_CONFIRM? */
-	struct nfs4_replay	so_replay;
-};
-
-/*
-*  nfs4_file: a file opened by some number of (open) nfs4_stateowners.
-*    o fi_perfile list is used to search for conflicting 
-*      share_acces, share_deny on the file.
-*/
-struct nfs4_file {
-	atomic_t		fi_ref;
-	struct list_head        fi_hash;    /* hash by "struct inode *" */
-	struct list_head        fi_stateids;
-	struct list_head	fi_delegations;
-	struct inode		*fi_inode;
-	u32                     fi_id;      /* used with stateowner->so_id 
-					     * for stateid_hashtbl hash */
-	bool			fi_had_conflict;
-};
-
-/*
-* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
-*
-* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
-*
-* 	st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
-* 	st_perfile: file_hashtbl[] entry.
-* 	st_perfile_state: nfs4_stateowner->so_perfilestate
-*       st_perlockowner: (open stateid) list of lock nfs4_stateowners
-* 	st_access_bmap: used only for open stateid
-* 	st_deny_bmap: used only for open stateid
-*	st_openstp: open stateid lock stateid was derived from
-*
-* XXX: open stateids and lock stateids have diverged sufficiently that
-* we should consider defining separate structs for the two cases.
-*/
-
-struct nfs4_stateid {
-	struct list_head              st_hash; 
-	struct list_head              st_perfile;
-	struct list_head              st_perstateowner;
-	struct list_head              st_lockowners;
-	struct nfs4_stateowner      * st_stateowner;
-	struct nfs4_file            * st_file;
-	stateid_t                     st_stateid;
-	struct file                 * st_vfs_file;
-	unsigned long                 st_access_bmap;
-	unsigned long                 st_deny_bmap;
-	struct nfs4_stateid         * st_openstp;
-};
-
-/* flags for preprocess_seqid_op() */
-#define HAS_SESSION             0x00000001
-#define CONFIRM                 0x00000002
-#define OPEN_STATE              0x00000004
-#define LOCK_STATE              0x00000008
-#define RD_STATE	        0x00000010
-#define WR_STATE	        0x00000020
-#define CLOSE_STATE             0x00000040
-
-#define seqid_mutating_err(err)                       \
-	(((err) != nfserr_stale_clientid) &&    \
-	((err) != nfserr_bad_seqid) &&          \
-	((err) != nfserr_stale_stateid) &&      \
-	((err) != nfserr_bad_stateid))
-
-struct nfsd4_compound_state;
-
-extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
-		stateid_t *stateid, int flags, struct file **filp);
-extern void nfs4_lock_state(void);
-extern void nfs4_unlock_state(void);
-extern int nfs4_in_grace(void);
-extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
-extern void put_nfs4_client(struct nfs4_client *clp);
-extern void nfs4_free_stateowner(struct kref *kref);
-extern int set_callback_cred(void);
-extern void nfsd4_probe_callback(struct nfs4_client *clp);
-extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
-extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
-extern void nfsd4_init_recdir(char *recdir_name);
-extern int nfsd4_recdir_load(void);
-extern void nfsd4_shutdown_recdir(void);
-extern int nfs4_client_to_reclaim(const char *name);
-extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
-extern void nfsd4_recdir_purge_old(void);
-extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
-extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
-
-static inline void
-nfs4_put_stateowner(struct nfs4_stateowner *so)
-{
-	kref_put(&so->so_ref, nfs4_free_stateowner);
-}
-
-static inline void
-nfs4_get_stateowner(struct nfs4_stateowner *so)
-{
-	kref_get(&so->so_ref);
-}
-
-#endif   /* NFSD4_STATE_H */
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
deleted file mode 100644
index 58f824d854c2..000000000000
--- a/include/linux/nfsd/xdr.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * linux/include/linux/nfsd/xdr.h
- *
- * XDR types for nfsd. This is mainly a typing exercise.
- */
-
-#ifndef LINUX_NFSD_H
-#define LINUX_NFSD_H
-
-#include <linux/vfs.h>
-#include <linux/nfsd/nfsd.h>
-
-struct nfsd_fhandle {
-	struct svc_fh		fh;
-};
-
-struct nfsd_sattrargs {
-	struct svc_fh		fh;
-	struct iattr		attrs;
-};
-
-struct nfsd_diropargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-};
-
-struct nfsd_readargs {
-	struct svc_fh		fh;
-	__u32			offset;
-	__u32			count;
-	int			vlen;
-};
-
-struct nfsd_writeargs {
-	svc_fh			fh;
-	__u32			offset;
-	int			len;
-	int			vlen;
-};
-
-struct nfsd_createargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	struct iattr		attrs;
-};
-
-struct nfsd_renameargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd_readlinkargs {
-	struct svc_fh		fh;
-	char *			buffer;
-};
-	
-struct nfsd_linkargs {
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd_symlinkargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	char *			tname;
-	unsigned int		tlen;
-	struct iattr		attrs;
-};
-
-struct nfsd_readdirargs {
-	struct svc_fh		fh;
-	__u32			cookie;
-	__u32			count;
-	__be32 *		buffer;
-};
-
-struct nfsd_attrstat {
-	struct svc_fh		fh;
-	struct kstat		stat;
-};
-
-struct nfsd_diropres  {
-	struct svc_fh		fh;
-	struct kstat		stat;
-};
-
-struct nfsd_readlinkres {
-	int			len;
-};
-
-struct nfsd_readres {
-	struct svc_fh		fh;
-	unsigned long		count;
-	struct kstat		stat;
-};
-
-struct nfsd_readdirres {
-	int			count;
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-};
-
-struct nfsd_statfsres {
-	struct kstatfs		stats;
-};
-
-/*
- * Storage requirements for XDR arguments and results.
- */
-union nfsd_xdrstore {
-	struct nfsd_sattrargs	sattr;
-	struct nfsd_diropargs	dirop;
-	struct nfsd_readargs	read;
-	struct nfsd_writeargs	write;
-	struct nfsd_createargs	create;
-	struct nfsd_renameargs	rename;
-	struct nfsd_linkargs	link;
-	struct nfsd_symlinkargs	symlink;
-	struct nfsd_readdirargs	readdir;
-};
-
-#define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
-
-
-int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readdirargs *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
-
-int nfssvc_encode_entry(void *, const char *name,
-			int namlen, loff_t offset, u64 ino, unsigned int);
-
-int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-
-/* Helper functions for NFSv2 ACL code */
-__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
-__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
-
-#endif /* LINUX_NFSD_H */
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h
deleted file mode 100644
index 421eddd65a25..000000000000
--- a/include/linux/nfsd/xdr3.h
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * linux/include/linux/nfsd/xdr3.h
- *
- * XDR types for NFSv3 in nfsd.
- *
- * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef _LINUX_NFSD_XDR3_H
-#define _LINUX_NFSD_XDR3_H
-
-#include <linux/nfsd/xdr.h>
-
-struct nfsd3_sattrargs {
-	struct svc_fh		fh;
-	struct iattr		attrs;
-	int			check_guard;
-	time_t			guardtime;
-};
-
-struct nfsd3_diropargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-};
-
-struct nfsd3_accessargs {
-	struct svc_fh		fh;
-	unsigned int		access;
-};
-
-struct nfsd3_readargs {
-	struct svc_fh		fh;
-	__u64			offset;
-	__u32			count;
-	int			vlen;
-};
-
-struct nfsd3_writeargs {
-	svc_fh			fh;
-	__u64			offset;
-	__u32			count;
-	int			stable;
-	__u32			len;
-	int			vlen;
-};
-
-struct nfsd3_createargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	int			createmode;
-	struct iattr		attrs;
-	__be32 *		verf;
-};
-
-struct nfsd3_mknodargs {
-	struct svc_fh		fh;
-	char *			name;
-	unsigned int		len;
-	__u32			ftype;
-	__u32			major, minor;
-	struct iattr		attrs;
-};
-
-struct nfsd3_renameargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd3_readlinkargs {
-	struct svc_fh		fh;
-	char *			buffer;
-};
-
-struct nfsd3_linkargs {
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-	char *			tname;
-	unsigned int		tlen;
-};
-
-struct nfsd3_symlinkargs {
-	struct svc_fh		ffh;
-	char *			fname;
-	unsigned int		flen;
-	char *			tname;
-	unsigned int		tlen;
-	struct iattr		attrs;
-};
-
-struct nfsd3_readdirargs {
-	struct svc_fh		fh;
-	__u64			cookie;
-	__u32			dircount;
-	__u32			count;
-	__be32 *		verf;
-	__be32 *		buffer;
-};
-
-struct nfsd3_commitargs {
-	struct svc_fh		fh;
-	__u64			offset;
-	__u32			count;
-};
-
-struct nfsd3_getaclargs {
-	struct svc_fh		fh;
-	int			mask;
-};
-
-struct posix_acl;
-struct nfsd3_setaclargs {
-	struct svc_fh		fh;
-	int			mask;
-	struct posix_acl	*acl_access;
-	struct posix_acl	*acl_default;
-};
-
-struct nfsd3_attrstat {
-	__be32			status;
-	struct svc_fh		fh;
-	struct kstat            stat;
-};
-
-/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
-struct nfsd3_diropres  {
-	__be32			status;
-	struct svc_fh		dirfh;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_accessres {
-	__be32			status;
-	struct svc_fh		fh;
-	__u32			access;
-};
-
-struct nfsd3_readlinkres {
-	__be32			status;
-	struct svc_fh		fh;
-	__u32			len;
-};
-
-struct nfsd3_readres {
-	__be32			status;
-	struct svc_fh		fh;
-	unsigned long		count;
-	int			eof;
-};
-
-struct nfsd3_writeres {
-	__be32			status;
-	struct svc_fh		fh;
-	unsigned long		count;
-	int			committed;
-};
-
-struct nfsd3_renameres {
-	__be32			status;
-	struct svc_fh		ffh;
-	struct svc_fh		tfh;
-};
-
-struct nfsd3_linkres {
-	__be32			status;
-	struct svc_fh		tfh;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_readdirres {
-	__be32			status;
-	struct svc_fh		fh;
-	int			count;
-	__be32			verf[2];
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-	__be32 *		offset1;
-	struct svc_rqst *	rqstp;
-
-};
-
-struct nfsd3_fsstatres {
-	__be32			status;
-	struct kstatfs		stats;
-	__u32			invarsec;
-};
-
-struct nfsd3_fsinfores {
-	__be32			status;
-	__u32			f_rtmax;
-	__u32			f_rtpref;
-	__u32			f_rtmult;
-	__u32			f_wtmax;
-	__u32			f_wtpref;
-	__u32			f_wtmult;
-	__u32			f_dtpref;
-	__u64			f_maxfilesize;
-	__u32			f_properties;
-};
-
-struct nfsd3_pathconfres {
-	__be32			status;
-	__u32			p_link_max;
-	__u32			p_name_max;
-	__u32			p_no_trunc;
-	__u32			p_chown_restricted;
-	__u32			p_case_insensitive;
-	__u32			p_case_preserving;
-};
-
-struct nfsd3_commitres {
-	__be32			status;
-	struct svc_fh		fh;
-};
-
-struct nfsd3_getaclres {
-	__be32			status;
-	struct svc_fh		fh;
-	int			mask;
-	struct posix_acl	*acl_access;
-	struct posix_acl	*acl_default;
-};
-
-/* dummy type for release */
-struct nfsd3_fhandle_pair {
-	__u32			dummy;
-	struct svc_fh		fh1;
-	struct svc_fh		fh2;
-};
-
-/*
- * Storage requirements for XDR arguments and results.
- */
-union nfsd3_xdrstore {
-	struct nfsd3_sattrargs		sattrargs;
-	struct nfsd3_diropargs		diropargs;
-	struct nfsd3_readargs		readargs;
-	struct nfsd3_writeargs		writeargs;
-	struct nfsd3_createargs		createargs;
-	struct nfsd3_renameargs		renameargs;
-	struct nfsd3_linkargs		linkargs;
-	struct nfsd3_symlinkargs	symlinkargs;
-	struct nfsd3_readdirargs	readdirargs;
-	struct nfsd3_diropres 		diropres;
-	struct nfsd3_accessres		accessres;
-	struct nfsd3_readlinkres	readlinkres;
-	struct nfsd3_readres		readres;
-	struct nfsd3_writeres		writeres;
-	struct nfsd3_renameres		renameres;
-	struct nfsd3_linkres		linkres;
-	struct nfsd3_readdirres		readdirres;
-	struct nfsd3_fsstatres		fsstatres;
-	struct nfsd3_fsinfores		fsinfores;
-	struct nfsd3_pathconfres	pathconfres;
-	struct nfsd3_commitres		commitres;
-	struct nfsd3_getaclres		getaclres;
-};
-
-#define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
-
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitargs *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
-				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitres *);
-
-int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
-				struct nfsd3_fhandle_pair *);
-int nfs3svc_encode_entry(void *, const char *name,
-				int namlen, loff_t offset, u64 ino,
-				unsigned int);
-int nfs3svc_encode_entry_plus(void *, const char *name,
-				int namlen, loff_t offset, u64 ino,
-				unsigned int);
-/* Helper functions for NFSv3 ACL code */
-__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
-				struct svc_fh *fhp);
-__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
-
-
-#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
deleted file mode 100644
index 1bf266239c7e..000000000000
--- a/include/linux/nfsd/xdr4.h
+++ /dev/null
@@ -1,564 +0,0 @@
-/*
- *  include/linux/nfsd/xdr4.h
- *
- *  Server-side types for NFSv4.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Kendrick Smith <kmsmith@umich.edu>
- *  Andy Adamson   <andros@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _LINUX_NFSD_XDR4_H
-#define _LINUX_NFSD_XDR4_H
-
-#include <linux/nfsd/state.h>
-#include <linux/nfsd/nfsd.h>
-
-#define NFSD4_MAX_TAGLEN	128
-#define XDR_LEN(n)                     (((n) + 3) & ~3)
-
-struct nfsd4_compound_state {
-	struct svc_fh		current_fh;
-	struct svc_fh		save_fh;
-	struct nfs4_stateowner	*replay_owner;
-	/* For sessions DRC */
-	struct nfsd4_session	*session;
-	struct nfsd4_slot	*slot;
-	__be32			*datap;
-	size_t			iovlen;
-	u32			minorversion;
-	u32			status;
-};
-
-static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
-{
-	return cs->slot != NULL;
-}
-
-struct nfsd4_change_info {
-	u32		atomic;
-	bool		change_supported;
-	u32		before_ctime_sec;
-	u32		before_ctime_nsec;
-	u64		before_change;
-	u32		after_ctime_sec;
-	u32		after_ctime_nsec;
-	u64		after_change;
-};
-
-struct nfsd4_access {
-	u32		ac_req_access;      /* request */
-	u32		ac_supported;       /* response */
-	u32		ac_resp_access;     /* response */
-};
-
-struct nfsd4_close {
-	u32		cl_seqid;           /* request */
-	stateid_t	cl_stateid;         /* request+response */
-	struct nfs4_stateowner * cl_stateowner;	/* response */
-};
-
-struct nfsd4_commit {
-	u64		co_offset;          /* request */
-	u32		co_count;           /* request */
-	nfs4_verifier	co_verf;            /* response */
-};
-
-struct nfsd4_create {
-	u32		cr_namelen;         /* request */
-	char *		cr_name;            /* request */
-	u32		cr_type;            /* request */
-	union {                             /* request */
-		struct {
-			u32 namelen;
-			char *name;
-		} link;   /* NF4LNK */
-		struct {
-			u32 specdata1;
-			u32 specdata2;
-		} dev;    /* NF4BLK, NF4CHR */
-	} u;
-	u32		cr_bmval[3];        /* request */
-	struct iattr	cr_iattr;           /* request */
-	struct nfsd4_change_info  cr_cinfo; /* response */
-	struct nfs4_acl *cr_acl;
-};
-#define cr_linklen	u.link.namelen
-#define cr_linkname	u.link.name
-#define cr_specdata1	u.dev.specdata1
-#define cr_specdata2	u.dev.specdata2
-
-struct nfsd4_delegreturn {
-	stateid_t	dr_stateid;
-};
-
-struct nfsd4_getattr {
-	u32		ga_bmval[3];        /* request */
-	struct svc_fh	*ga_fhp;            /* response */
-};
-
-struct nfsd4_link {
-	u32		li_namelen;         /* request */
-	char *		li_name;            /* request */
-	struct nfsd4_change_info  li_cinfo; /* response */
-};
-
-struct nfsd4_lock_denied {
-	clientid_t	ld_clientid;
-	struct nfs4_stateowner   *ld_sop;
-	u64             ld_start;
-	u64             ld_length;
-	u32             ld_type;
-};
-
-struct nfsd4_lock {
-	/* request */
-	u32             lk_type;
-	u32             lk_reclaim;         /* boolean */
-	u64             lk_offset;
-	u64             lk_length;
-	u32             lk_is_new;
-	union {
-		struct {
-			u32             open_seqid;
-			stateid_t       open_stateid;
-			u32             lock_seqid;
-			clientid_t      clientid;
-			struct xdr_netobj owner;
-		} new;
-		struct {
-			stateid_t       lock_stateid;
-			u32             lock_seqid;
-		} old;
-	} v;
-
-	/* response */
-	union {
-		struct {
-			stateid_t               stateid;
-		} ok;
-		struct nfsd4_lock_denied        denied;
-	} u;
-	/* The lk_replay_owner is the open owner in the open_to_lock_owner
-	 * case and the lock owner otherwise: */
-	struct nfs4_stateowner *lk_replay_owner;
-};
-#define lk_new_open_seqid       v.new.open_seqid
-#define lk_new_open_stateid     v.new.open_stateid
-#define lk_new_lock_seqid       v.new.lock_seqid
-#define lk_new_clientid         v.new.clientid
-#define lk_new_owner            v.new.owner
-#define lk_old_lock_stateid     v.old.lock_stateid
-#define lk_old_lock_seqid       v.old.lock_seqid
-
-#define lk_rflags       u.ok.rflags
-#define lk_resp_stateid u.ok.stateid
-#define lk_denied       u.denied
-
-
-struct nfsd4_lockt {
-	u32				lt_type;
-	clientid_t			lt_clientid;
-	struct xdr_netobj		lt_owner;
-	u64				lt_offset;
-	u64				lt_length;
-	struct nfs4_stateowner * 	lt_stateowner;
-	struct nfsd4_lock_denied  	lt_denied;
-};
-
- 
-struct nfsd4_locku {
-	u32             lu_type;
-	u32             lu_seqid;
-	stateid_t       lu_stateid;
-	u64             lu_offset;
-	u64             lu_length;
-	struct nfs4_stateowner  *lu_stateowner;
-};
-
-
-struct nfsd4_lookup {
-	u32		lo_len;             /* request */
-	char *		lo_name;            /* request */
-};
-
-struct nfsd4_putfh {
-	u32		pf_fhlen;           /* request */
-	char		*pf_fhval;          /* request */
-};
-
-struct nfsd4_open {
-	u32		op_claim_type;      /* request */
-	struct xdr_netobj op_fname;	    /* request - everything but CLAIM_PREV */
-	u32		op_delegate_type;   /* request - CLAIM_PREV only */
-	stateid_t       op_delegate_stateid; /* request - response */
-	u32		op_create;     	    /* request */
-	u32		op_createmode;      /* request */
-	u32		op_bmval[3];        /* request */
-	struct iattr	iattr;              /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
-	nfs4_verifier	verf;               /* EXCLUSIVE4 */
-	clientid_t	op_clientid;        /* request */
-	struct xdr_netobj op_owner;           /* request */
-	u32		op_seqid;           /* request */
-	u32		op_share_access;    /* request */
-	u32		op_share_deny;      /* request */
-	stateid_t	op_stateid;         /* response */
-	u32		op_recall;          /* recall */
-	struct nfsd4_change_info  op_cinfo; /* response */
-	u32		op_rflags;          /* response */
-	int		op_truncate;        /* used during processing */
-	struct nfs4_stateowner *op_stateowner; /* used during processing */
-	struct nfs4_acl *op_acl;
-};
-#define op_iattr	iattr
-#define op_verf		verf
-
-struct nfsd4_open_confirm {
-	stateid_t	oc_req_stateid		/* request */;
-	u32		oc_seqid    		/* request */;
-	stateid_t	oc_resp_stateid		/* response */;
-	struct nfs4_stateowner * oc_stateowner;	/* response */
-};
-
-struct nfsd4_open_downgrade {
-	stateid_t       od_stateid;
-	u32             od_seqid;
-	u32             od_share_access;
-	u32             od_share_deny;
-	struct nfs4_stateowner *od_stateowner;
-};
-
-
-struct nfsd4_read {
-	stateid_t	rd_stateid;         /* request */
-	u64		rd_offset;          /* request */
-	u32		rd_length;          /* request */
-	int		rd_vlen;
-	struct file     *rd_filp;
-	
-	struct svc_rqst *rd_rqstp;          /* response */
-	struct svc_fh * rd_fhp;             /* response */
-};
-
-struct nfsd4_readdir {
-	u64		rd_cookie;          /* request */
-	nfs4_verifier	rd_verf;            /* request */
-	u32		rd_dircount;        /* request */
-	u32		rd_maxcount;        /* request */
-	u32		rd_bmval[3];        /* request */
-	struct svc_rqst *rd_rqstp;          /* response */
-	struct svc_fh * rd_fhp;             /* response */
-
-	struct readdir_cd	common;
-	__be32 *		buffer;
-	int			buflen;
-	__be32 *		offset;
-};
-
-struct nfsd4_release_lockowner {
-	clientid_t        rl_clientid;
-	struct xdr_netobj rl_owner;
-};
-struct nfsd4_readlink {
-	struct svc_rqst *rl_rqstp;          /* request */
-	struct svc_fh *	rl_fhp;             /* request */
-};
-
-struct nfsd4_remove {
-	u32		rm_namelen;         /* request */
-	char *		rm_name;            /* request */
-	struct nfsd4_change_info  rm_cinfo; /* response */
-};
-
-struct nfsd4_rename {
-	u32		rn_snamelen;        /* request */
-	char *		rn_sname;           /* request */
-	u32		rn_tnamelen;        /* request */
-	char *		rn_tname;           /* request */
-	struct nfsd4_change_info  rn_sinfo; /* response */
-	struct nfsd4_change_info  rn_tinfo; /* response */
-};
-
-struct nfsd4_secinfo {
-	u32 si_namelen;					/* request */
-	char *si_name;					/* request */
-	struct svc_export *si_exp;			/* response */
-};
-
-struct nfsd4_setattr {
-	stateid_t	sa_stateid;         /* request */
-	u32		sa_bmval[3];        /* request */
-	struct iattr	sa_iattr;           /* request */
-	struct nfs4_acl *sa_acl;
-};
-
-struct nfsd4_setclientid {
-	nfs4_verifier	se_verf;            /* request */
-	u32		se_namelen;         /* request */
-	char *		se_name;            /* request */
-	u32		se_callback_prog;   /* request */
-	u32		se_callback_netid_len;  /* request */
-	char *		se_callback_netid_val;  /* request */
-	u32		se_callback_addr_len;   /* request */
-	char *		se_callback_addr_val;   /* request */
-	u32		se_callback_ident;  /* request */
-	clientid_t	se_clientid;        /* response */
-	nfs4_verifier	se_confirm;         /* response */
-};
-
-struct nfsd4_setclientid_confirm {
-	clientid_t	sc_clientid;
-	nfs4_verifier	sc_confirm;
-};
-
-/* also used for NVERIFY */
-struct nfsd4_verify {
-	u32		ve_bmval[3];        /* request */
-	u32		ve_attrlen;         /* request */
-	char *		ve_attrval;         /* request */
-};
-
-struct nfsd4_write {
-	stateid_t	wr_stateid;         /* request */
-	u64		wr_offset;          /* request */
-	u32		wr_stable_how;      /* request */
-	u32		wr_buflen;          /* request */
-	int		wr_vlen;
-
-	u32		wr_bytes_written;   /* response */
-	u32		wr_how_written;     /* response */
-	nfs4_verifier	wr_verifier;        /* response */
-};
-
-struct nfsd4_exchange_id {
-	nfs4_verifier	verifier;
-	struct xdr_netobj clname;
-	u32		flags;
-	clientid_t	clientid;
-	u32		seqid;
-	int		spa_how;
-};
-
-struct nfsd4_sequence {
-	struct nfs4_sessionid	sessionid;		/* request/response */
-	u32			seqid;			/* request/response */
-	u32			slotid;			/* request/response */
-	u32			maxslots;		/* request/response */
-	u32			cachethis;		/* request */
-#if 0
-	u32			target_maxslots;	/* response */
-	u32			status_flags;		/* response */
-#endif /* not yet */
-};
-
-struct nfsd4_destroy_session {
-	struct nfs4_sessionid	sessionid;
-};
-
-struct nfsd4_op {
-	int					opnum;
-	__be32					status;
-	union {
-		struct nfsd4_access		access;
-		struct nfsd4_close		close;
-		struct nfsd4_commit		commit;
-		struct nfsd4_create		create;
-		struct nfsd4_delegreturn	delegreturn;
-		struct nfsd4_getattr		getattr;
-		struct svc_fh *			getfh;
-		struct nfsd4_link		link;
-		struct nfsd4_lock		lock;
-		struct nfsd4_lockt		lockt;
-		struct nfsd4_locku		locku;
-		struct nfsd4_lookup		lookup;
-		struct nfsd4_verify		nverify;
-		struct nfsd4_open		open;
-		struct nfsd4_open_confirm	open_confirm;
-		struct nfsd4_open_downgrade	open_downgrade;
-		struct nfsd4_putfh		putfh;
-		struct nfsd4_read		read;
-		struct nfsd4_readdir		readdir;
-		struct nfsd4_readlink		readlink;
-		struct nfsd4_remove		remove;
-		struct nfsd4_rename		rename;
-		clientid_t			renew;
-		struct nfsd4_secinfo		secinfo;
-		struct nfsd4_setattr		setattr;
-		struct nfsd4_setclientid	setclientid;
-		struct nfsd4_setclientid_confirm setclientid_confirm;
-		struct nfsd4_verify		verify;
-		struct nfsd4_write		write;
-		struct nfsd4_release_lockowner	release_lockowner;
-
-		/* NFSv4.1 */
-		struct nfsd4_exchange_id	exchange_id;
-		struct nfsd4_create_session	create_session;
-		struct nfsd4_destroy_session	destroy_session;
-		struct nfsd4_sequence		sequence;
-	} u;
-	struct nfs4_replay *			replay;
-};
-
-struct nfsd4_compoundargs {
-	/* scratch variables for XDR decode */
-	__be32 *			p;
-	__be32 *			end;
-	struct page **			pagelist;
-	int				pagelen;
-	__be32				tmp[8];
-	__be32 *			tmpp;
-	struct tmpbuf {
-		struct tmpbuf *next;
-		void (*release)(const void *);
-		void *buf;
-	}				*to_free;
-
-	struct svc_rqst			*rqstp;
-
-	u32				taglen;
-	char *				tag;
-	u32				minorversion;
-	u32				opcnt;
-	struct nfsd4_op			*ops;
-	struct nfsd4_op			iops[8];
-};
-
-struct nfsd4_compoundres {
-	/* scratch variables for XDR encode */
-	__be32 *			p;
-	__be32 *			end;
-	struct xdr_buf *		xbuf;
-	struct svc_rqst *		rqstp;
-
-	u32				taglen;
-	char *				tag;
-	u32				opcnt;
-	__be32 *			tagp; /* tag, opcount encode location */
-	struct nfsd4_compound_state	cstate;
-};
-
-static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
-{
-	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
-	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
-}
-
-static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
-{
-	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
-}
-
-#define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
-
-static inline void
-set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
-{
-	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
-	cinfo->atomic = 1;
-	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
-	if (cinfo->change_supported) {
-		cinfo->before_change = fhp->fh_pre_change;
-		cinfo->after_change = fhp->fh_post_change;
-	} else {
-		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
-		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
-		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
-		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
-	}
-}
-
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundargs *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundres *);
-void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
-void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
-__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-		       struct dentry *dentry, __be32 *buffer, int *countp,
-		       u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
-extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid *setclid);
-extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid_confirm *setclientid_confirm);
-extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
-extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
-		struct nfsd4_sequence *seq);
-extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-struct nfsd4_exchange_id *);
-		extern __be32 nfsd4_create_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_create_session *);
-extern __be32 nfsd4_sequence(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_sequence *);
-extern __be32 nfsd4_destroy_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_destroy_session *);
-extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
-		struct nfsd4_open *open);
-extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
-		struct svc_fh *current_fh, struct nfsd4_open *open);
-extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
-extern __be32 nfsd4_close(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_close *close);
-extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_open_downgrade *od);
-extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
-		struct nfsd4_lock *lock);
-extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_lockt *lockt);
-extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_locku *locku);
-extern __be32
-nfsd4_release_lockowner(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_release_lockowner *rlockowner);
-extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
-extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
-extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
-			  struct nfsd4_compound_state *, clientid_t *clid);
-#endif
-
-/*
- * Local variables:
- *  c-basic-offset: 8
- * End:
- */
-- 
cgit v1.2.3


From e8e8753f7a32ce4f636771126fc8eba0dc4ad817 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 14 Dec 2009 12:53:32 -0500
Subject: nfsd: new interface to advertise export features

Soon we will add the new V4ROOT flag, and allow the INSECURE flag to
vary by pseudoflavor.  It would be useful for nfs-utils (for example,
for improved exportfs error reporting) to be able to know when this
happens.  Use this new interface for that purpose.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsctl.c            | 21 +++++++++++++++++++++
 include/linux/nfsd/export.h |  3 ++-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0415680d3f58..e7051ac4dc73 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -31,6 +31,7 @@ enum {
 	NFSD_Getfd,
 	NFSD_Getfs,
 	NFSD_List,
+	NFSD_Export_features,
 	NFSD_Fh,
 	NFSD_FO_UnlockIP,
 	NFSD_FO_UnlockFS,
@@ -149,6 +150,24 @@ static const struct file_operations exports_operations = {
 	.owner		= THIS_MODULE,
 };
 
+static int export_features_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
+	return 0;
+}
+
+static int export_features_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, export_features_show, NULL);
+}
+
+static struct file_operations export_features_operations = {
+	.open		= export_features_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
 extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
 
@@ -1306,6 +1325,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 		[NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+		[NFSD_Export_features] = {"export_features",
+					&export_features_operations, S_IRUGO},
 		[NFSD_FO_UnlockIP] = {"unlock_ip",
 					&transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_FO_UnlockFS] = {"unlock_filesystem",
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index ef3d416fcf67..4f1df1d7312c 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -39,7 +39,8 @@
 #define NFSEXP_FSID		0x2000
 #define	NFSEXP_CROSSMOUNT	0x4000
 #define	NFSEXP_NOACL		0x8000	/* reserved for possible ACL related use */
-#define NFSEXP_ALLFLAGS		0xFE3F
+/* All flags that we claim to support.  (Note we don't support NOACL.) */
+#define NFSEXP_ALLFLAGS		0x7E3F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3


From 12045a6ee9908b38b6d286530c7d816e39071346 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 8 Dec 2009 18:15:52 -0500
Subject: nfsd: let "insecure" flag vary by pseudoflavor

This was an oversight; it should be among the export flags that can be
allowed to vary by pseudoflavor.  This allows an administrator to (for
example) allow auth_sys mounts only from low ports, but allow auth_krb5
mounts to use any port.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfsfh.c             | 4 +++-
 include/linux/nfsd/export.h | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0eb1c59f5ab8..951938d6c495 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -88,8 +88,10 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
 static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
 					  struct svc_export *exp)
 {
+	int flags = nfsexp_flags(rqstp, exp);
+
 	/* Check if the request originated from a secure port. */
-	if (!rqstp->rq_secure && EX_SECURE(exp)) {
+	if (!rqstp->rq_secure && (flags & NFSEXP_INSECURE_PORT)) {
 		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 		dprintk(KERN_WARNING
 		       "nfsd: request from insecure port %s!\n",
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 4f1df1d7312c..4cafbe1255f0 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -44,7 +44,8 @@
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-					| NFSEXP_ALLSQUASH)
+					| NFSEXP_ALLSQUASH \
+					| NFSEXP_INSECURE_PORT)
 
 #ifdef __KERNEL__
 
@@ -109,7 +110,6 @@ struct svc_expkey {
 	struct path		ek_path;
 };
 
-#define EX_SECURE(exp)		(!((exp)->ex_flags & NFSEXP_INSECURE_PORT))
 #define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
 #define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
 #define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
-- 
cgit v1.2.3


From 9e1b9b80721661bd63b3662453767b22cd614fe7 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Sat, 7 Nov 2009 21:03:54 +0000
Subject: module: make MODULE_SYMBOL_PREFIX into a CONFIG option

The next commit will require the use of MODULE_SYMBOL_PREFIX in
.tmp_exports-asm.S.  Currently it is mixed in with C structure
definitions in "asm/module.h".  Move the definition of this arch option
into Kconfig, so it can be easily accessed by any code.

This also lets modpost.c use the same definition.  Previously modpost
relied on a hardcoded list of architectures in mk_elfconfig.c.

A build test for blackfin, one of the two MODULE_SYMBOL_PREFIX archs,
showed the generated code was unchanged.  vmlinux was identical save
for build ids, and an apparently randomized suffix on a single "__key"
symbol in the kallsyms data).

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Acked-by: Mike Frysinger <vapier@gentoo.org> (blackfin)
CC: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/blackfin/Kconfig              | 4 ++++
 arch/blackfin/include/asm/module.h | 2 --
 arch/blackfin/kernel/vmlinux.lds.S | 2 --
 arch/h8300/Kconfig                 | 4 ++++
 arch/h8300/include/asm/module.h    | 2 --
 arch/h8300/kernel/vmlinux.lds.S    | 1 -
 include/asm-generic/vmlinux.lds.h  | 8 ++++++--
 include/linux/module.h             | 6 ++++--
 scripts/Makefile.lib               | 5 +++++
 scripts/mod/Makefile               | 2 +-
 scripts/mod/mk_elfconfig.c         | 9 ---------
 scripts/mod/modpost.c              | 9 +++++++++
 12 files changed, 33 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index ae6a60f10120..2180433213b7 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -5,6 +5,10 @@
 
 mainmenu "Blackfin Kernel Configuration"
 
+config SYMBOL_PREFIX
+	string
+	default "_"
+
 config MMU
 	def_bool n
 
diff --git a/arch/blackfin/include/asm/module.h b/arch/blackfin/include/asm/module.h
index 9c1cfffddd9b..4282b169ead9 100644
--- a/arch/blackfin/include/asm/module.h
+++ b/arch/blackfin/include/asm/module.h
@@ -7,8 +7,6 @@
 #ifndef _ASM_BFIN_MODULE_H
 #define _ASM_BFIN_MODULE_H
 
-#define MODULE_SYMBOL_PREFIX "_"
-
 #define Elf_Shdr        Elf32_Shdr
 #define Elf_Sym         Elf32_Sym
 #define Elf_Ehdr        Elf32_Ehdr
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 10e12539000e..f39707c6590d 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -4,8 +4,6 @@
  * Licensed under the GPL-2 or later
  */
 
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
-
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/mem_map.h>
 #include <asm/page.h>
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 9420648352b8..53cc669e6d59 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -10,6 +10,10 @@ config H8300
 	default y
 	select HAVE_IDE
 
+config SYMBOL_PREFIX
+	string
+	default "_"
+
 config MMU
 	bool
 	default n
diff --git a/arch/h8300/include/asm/module.h b/arch/h8300/include/asm/module.h
index de23231f3196..8e46724b7c09 100644
--- a/arch/h8300/include/asm/module.h
+++ b/arch/h8300/include/asm/module.h
@@ -8,6 +8,4 @@ struct mod_arch_specific { };
 #define Elf_Sym Elf32_Sym
 #define Elf_Ehdr Elf32_Ehdr
 
-#define MODULE_SYMBOL_PREFIX "_"
-
 #endif /* _ASM_H8/300_MODULE_H */
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index b9e24907e6ea..03d356d96e5d 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -1,4 +1,3 @@
-#define VMLINUX_SYMBOL(_sym_) _##_sym_
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b6e818f4b247..67e652068e0e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -52,8 +52,12 @@
 #define LOAD_OFFSET 0
 #endif
 
-#ifndef VMLINUX_SYMBOL
-#define VMLINUX_SYMBOL(_sym_) _sym_
+#ifndef SYMBOL_PREFIX
+#define VMLINUX_SYMBOL(sym) sym
+#else
+#define PASTE2(x,y) x##y
+#define PASTE(x,y) PASTE2(x,y)
+#define VMLINUX_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym)
 #endif
 
 /* Align . to a 8 byte boundary equals to maximum function alignment. */
diff --git a/include/linux/module.h b/include/linux/module.h
index 482efc865acf..6cb1a3cab5d3 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -25,8 +25,10 @@
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
-/* some toolchains uses a `_' prefix for all user symbols */
-#ifndef MODULE_SYMBOL_PREFIX
+/* Some toolchains use a `_' prefix for all user symbols. */
+#ifdef CONFIG_SYMBOL_PREFIX
+#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
 #define MODULE_SYMBOL_PREFIX ""
 #endif
 
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ffdafb26f539..224d85e72ef1 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -127,6 +127,11 @@ _c_flags += $(if $(patsubst n%,, \
 		$(CFLAGS_GCOV))
 endif
 
+ifdef CONFIG_SYMBOL_PREFIX
+_cpp_flags += -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX))
+endif
+
+
 # If building the kernel in a separate objtree expand all occurrences
 # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/').
 
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index 11d69c35e5b4..ff954f8168c1 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -8,7 +8,7 @@ modpost-objs	:= modpost.o file2alias.o sumversion.o
 $(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
 
 quiet_cmd_elfconfig = MKELF   $@
-      cmd_elfconfig = $(obj)/mk_elfconfig $(ARCH) < $< > $@
+      cmd_elfconfig = $(obj)/mk_elfconfig < $< > $@
 
 $(obj)/elfconfig.h: $(obj)/empty.o $(obj)/mk_elfconfig FORCE
 	$(call if_changed,elfconfig)
diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c
index 6a96d47bd1e6..639bca7ba559 100644
--- a/scripts/mod/mk_elfconfig.c
+++ b/scripts/mod/mk_elfconfig.c
@@ -9,9 +9,6 @@ main(int argc, char **argv)
 	unsigned char ei[EI_NIDENT];
 	union { short s; char c[2]; } endian_test;
 
-	if (argc != 2) {
-		fprintf(stderr, "Error: no arch\n");
-	}
 	if (fread(ei, 1, EI_NIDENT, stdin) != EI_NIDENT) {
 		fprintf(stderr, "Error: input truncated\n");
 		return 1;
@@ -55,12 +52,6 @@ main(int argc, char **argv)
 	else
 		exit(1);
 
-	if ((strcmp(argv[1], "h8300") == 0)
-	    || (strcmp(argv[1], "blackfin") == 0))
-		printf("#define MODULE_SYMBOL_PREFIX \"_\"\n");
-	else
-		printf("#define MODULE_SYMBOL_PREFIX \"\"\n");
-
 	return 0;
 }
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 801a16a17545..fb0f9b711af3 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -15,8 +15,17 @@
 #include <stdio.h>
 #include <ctype.h>
 #include "modpost.h"
+#include "../../include/linux/autoconf.h"
 #include "../../include/linux/license.h"
 
+/* Some toolchains use a `_' prefix for all user symbols. */
+#ifdef CONFIG_SYMBOL_PREFIX
+#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
+#define MODULE_SYMBOL_PREFIX ""
+#endif
+
+
 /* Are we using CONFIG_MODVERSIONS? */
 int modversions = 0;
 /* Warn about undefined symbols? (do so if we have vmlinux) */
-- 
cgit v1.2.3


From e36c54582c6f14adc9e10473e2aec2cc4f0acc03 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 14 Dec 2009 15:58:33 -0500
Subject: tracing: Fix return of trace_dump_stack()

The trace_dump_stack() returned a value for a void function.

Also, added the missing stub for trace_dump_stack() when tracing is
not configured.

Reported-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <20091214162713.GA31060@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 1 +
 kernel/trace/trace.c   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5ad4199fb073..f1dc752da0d2 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -527,6 +527,7 @@ trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
+static inline void trace_dump_stack(void) { }
 static inline int
 trace_printk(const char *fmt, ...)
 {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bd7b969a729a..ee61915935d5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1158,7 +1158,7 @@ void trace_dump_stack(void)
 	unsigned long flags;
 
 	if (tracing_disabled || tracing_selftest_running)
-		return 0;
+		return;
 
 	local_save_flags(flags);
 
-- 
cgit v1.2.3


From 48f186124220794fce85ed1439fc32f16f69d3e2 Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Mon, 14 Dec 2009 21:27:53 -0800
Subject: rpc: add rpc_queue_empty function

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/sched.c           | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 1906782ec86b..9157405f9320 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -229,6 +229,7 @@ void		rpc_wake_up_queued_task(struct rpc_wait_queue *,
 void		rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
 void		rpc_wake_up_status(struct rpc_wait_queue *, int);
+int		rpc_queue_empty(struct rpc_wait_queue *);
 void		rpc_delay(struct rpc_task *, unsigned long);
 void *		rpc_malloc(struct rpc_task *, size_t);
 void		rpc_free(void *);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cef74ba0666c..89ea8e69ec78 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -384,6 +384,20 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r
 		__rpc_do_wake_up_task(queue, task);
 }
 
+/*
+ * Tests whether rpc queue is empty
+ */
+int rpc_queue_empty(struct rpc_wait_queue *queue)
+{
+	int res;
+
+	spin_lock_bh(&queue->lock);
+	res = queue->qlen;
+	spin_unlock_bh(&queue->lock);
+	return (res == 0);
+}
+EXPORT_SYMBOL_GPL(rpc_queue_empty);
+
 /*
  * Wake up a task on a specific queue
  */
-- 
cgit v1.2.3


From cf3b01b54880debb01ea7d471123da5887a7c2cb Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Mon, 14 Dec 2009 21:27:55 -0800
Subject: rpc: add a new priority in RPC task

Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 9157405f9320..7bc7fd5291ce 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -173,7 +173,8 @@ struct rpc_task_setup {
 #define RPC_PRIORITY_LOW	(-1)
 #define RPC_PRIORITY_NORMAL	(0)
 #define RPC_PRIORITY_HIGH	(1)
-#define RPC_NR_PRIORITY		(1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW)
+#define RPC_PRIORITY_PRIVILEGED	(2)
+#define RPC_NR_PRIORITY		(1 + RPC_PRIORITY_PRIVILEGED - RPC_PRIORITY_LOW)
 
 struct rpc_timer {
 	struct timer_list timer;
@@ -255,6 +256,16 @@ static inline int rpc_wait_for_completion_task(struct rpc_task *task)
 	return __rpc_wait_for_completion_task(task, NULL);
 }
 
+static inline void rpc_task_set_priority(struct rpc_task *task, unsigned char prio)
+{
+	task->tk_priority = prio - RPC_PRIORITY_LOW;
+}
+
+static inline int rpc_task_has_priority(struct rpc_task *task, unsigned char prio)
+{
+	return (task->tk_priority + RPC_PRIORITY_LOW == prio);
+}
+
 #ifdef RPC_DEBUG
 static inline const char * rpc_qname(struct rpc_wait_queue *q)
 {
-- 
cgit v1.2.3


From eb4c86c6a5adec423c9e615d4937fdddd06a16c5 Mon Sep 17 00:00:00 2001
From: Steve Dickson <SteveD@redhat.com>
Date: Wed, 9 Sep 2009 14:58:22 -0400
Subject: nfsd: introduce export flag for v4 pseudoroot

NFSv4 differs from v2 and v3 in that it presents a single unified
filesystem tree, whereas v2 and v3 exported multiple filesystem (whose
roots could be found using a separate mount protocol).

Our original NFSv4 server implementation asked the administrator to
designate a single filesystem as the NFSv4 root, then to mount
filesystems they wished to export underneath.  (Often using bind mounts
of already-existing filesystems.)

This was conceptually simple, and allowed easy implementation, but
created a serious obstacle to upgrading between v2/v3: since the paths
to v4 filesystems were different, administrators would have to adjust
all the paths in client-side mount commands when switching to v4.

Various workarounds are possible.  For example, the administrator could
export "/" and designate it as the v4 root.  However, the security risks
of that approach are obvious, and in any case we shouldn't be requiring
the administrator to take extra steps to fix this problem; instead, the
server should present consistent paths across different versions by
default.

These patches take a modified version of that approach: we provide a new
export option which exports only a subset of a filesystem.  With this
flag, it becomes safe for mountd to export "/" by default, with no need
for additional configuration.

We begin just by defining the new flag.

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c            |  1 +
 include/linux/nfsd/export.h | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cb3dae2fcd86..c64d55f319bd 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1425,6 +1425,7 @@ static struct flags {
 	{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
 	{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
 	{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+	{ NFSEXP_V4ROOT, {"v4root", ""}},
 #ifdef MSNFS
 	{ NFSEXP_MSNFS, {"msnfs", ""}},
 #endif
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 4cafbe1255f0..41f0d4e25374 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -39,6 +39,16 @@
 #define NFSEXP_FSID		0x2000
 #define	NFSEXP_CROSSMOUNT	0x4000
 #define	NFSEXP_NOACL		0x8000	/* reserved for possible ACL related use */
+/*
+ * The NFSEXP_V4ROOT flag causes the kernel to give access only to NFSv4
+ * clients, and only to the single directory that is the root of the
+ * export; further lookup and readdir operations are treated as if every
+ * subdirectory was a mountpoint, and ignored if they are not themselves
+ * exported.  This is used by nfsd and mountd to construct the NFSv4
+ * pseudofilesystem, which provides access only to paths leading to each
+ * exported filesystem.
+ */
+#define	NFSEXP_V4ROOT		0x10000
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
 #define NFSEXP_ALLFLAGS		0x7E3F
 
-- 
cgit v1.2.3


From f13c12c634e124d5d31f912b969d542a016d6105 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 15 Dec 2009 19:43:11 +0100
Subject: perf_events: Fix perf_event_attr layout

The miss-alignment of bp_addr created a 32bit hole, causing
different structure packings on 32 and 64 bit machines.

Fix that by moving __reserve_2 into that hole.

Further, remove the useless struct and redundant __bp_reserve
muck.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <1260902591.8023.781.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 12 +++---------
 kernel/perf_event.c        |  2 +-
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 64a53f74c9a9..5fcbf7d2712a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -211,17 +211,11 @@ struct perf_event_attr {
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
 
-	struct { /* Hardware breakpoint info */
-		__u64		bp_addr;
-		__u32		bp_type;
-		__u32		bp_len;
-		__u64		__bp_reserved_1;
-		__u64		__bp_reserved_2;
-	};
-
 	__u32			__reserved_2;
 
-	__u64			__reserved_3;
+	__u64			bp_addr;
+	__u32			bp_type;
+	__u32			bp_len;
 };
 
 /*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 8823b0885183..0dd8e5d02c66 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4564,7 +4564,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (attr->type >= PERF_TYPE_MAX)
 		return -EINVAL;
 
-	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+	if (attr->__reserved_1 || attr->__reserved_2)
 		return -EINVAL;
 
 	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-- 
cgit v1.2.3


From f2511774863487e61b56a97da07ebf8dd61d7836 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Sun, 13 Dec 2009 20:29:01 +0100
Subject: PM: Add initcall_debug style timing for suspend/resume

In order to diagnose overall suspend/resume times, we need
basic instrumentation to break down the total time into per
device timing, similar to initcall_debug.

This patch adds the basic timing instrumentation, needed
for a scritps/bootgraph.pl equivalent or humans.
The bootgraph.pl program is still a work in progress, but
is far enough along to know that this patch is sufficient.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/main.c | 31 +++++++++++++++++++++++++++++++
 include/linux/init.h      |  2 ++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 8aa2443182d5..30f0ceebd36c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -25,6 +25,7 @@
 #include <linux/resume-trace.h>
 #include <linux/rwsem.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 
 #include "../base.h"
 #include "power.h"
@@ -172,6 +173,13 @@ static int pm_op(struct device *dev,
 		 pm_message_t state)
 {
 	int error = 0;
+	ktime_t calltime, delta, rettime;
+
+	if (initcall_debug) {
+		pr_info("calling  %s+ @ %i\n",
+				dev_name(dev), task_pid_nr(current));
+		calltime = ktime_get();
+	}
 
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
@@ -219,6 +227,14 @@ static int pm_op(struct device *dev,
 	default:
 		error = -EINVAL;
 	}
+
+	if (initcall_debug) {
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev),
+			error, (unsigned long long)ktime_to_ns(delta) >> 10);
+	}
+
 	return error;
 }
 
@@ -236,6 +252,13 @@ static int pm_noirq_op(struct device *dev,
 			pm_message_t state)
 {
 	int error = 0;
+	ktime_t calltime, delta, rettime;
+
+	if (initcall_debug) {
+		pr_info("calling  %s_i+ @ %i\n",
+				dev_name(dev), task_pid_nr(current));
+		calltime = ktime_get();
+	}
 
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
@@ -283,6 +306,14 @@ static int pm_noirq_op(struct device *dev,
 	default:
 		error = -EINVAL;
 	}
+
+	if (initcall_debug) {
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		printk("initcall %s_i+ returned %d after %Ld usecs\n", dev_name(dev),
+			error, (unsigned long long)ktime_to_ns(delta) >> 10);
+	}
+
 	return error;
 }
 
diff --git a/include/linux/init.h b/include/linux/init.h
index ff8bde520d03..ab1d31f9352b 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -149,6 +149,8 @@ void prepare_namespace(void);
 
 extern void (*late_time_init)(void);
 
+extern int initcall_debug;
+
 #endif
   
 #ifndef MODULE
-- 
cgit v1.2.3


From 3d8986c7585457c45fd349b2c542c7c1ecd20843 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Dec 2009 14:09:03 -0500
Subject: nfsd: enable V4ROOT exports

With the v4root option now enforced everywhere it should be, it is safe
to advertise support for it to mountd.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 41f0d4e25374..8ae78a61eea4 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -50,7 +50,7 @@
  */
 #define	NFSEXP_V4ROOT		0x10000
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS		0x7E3F
+#define NFSEXP_ALLFLAGS		0x17E3F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3


From c7af6b0895229bd080b86afc91302b66f6df0378 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Dec 2009 18:29:33 -0500
Subject: nfsd: remove unused field rq_reffh

This field is never referenced anywhere else.  I don't know what it was
intended for.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d1567d627557..5a3085b9b394 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -273,10 +273,6 @@ struct svc_rqst {
 	struct auth_domain *	rq_client;	/* RPC peer info */
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
-	struct knfsd_fh *	rq_reffh;	/* Referrence filehandle, used to
-						 * determine what device number
-						 * to report (real or virtual)
-						 */
 	int			rq_splice_ok;   /* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
-- 
cgit v1.2.3


From 1557aca7904ed6fadd22cdc3364754070bb3d3c3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Fri, 4 Dec 2009 19:36:06 -0500
Subject: nfsd: move most of nfsfh.h to fs/nfsd

Most of this can be trivially moved to a private header as well.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/export.c           |   1 +
 fs/nfsd/nfsfh.h            | 208 +++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/state.h            |   1 +
 fs/nfsd/vfs.h              |   2 +
 fs/nfsd/xdr.h              |   1 +
 include/linux/nfsd/nfsfh.h | 199 -------------------------------------------
 6 files changed, 213 insertions(+), 199 deletions(-)
 create mode 100644 fs/nfsd/nfsfh.h

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7d5ba1b0ffcf..b26a3644fbb9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -22,6 +22,7 @@
 #include <net/ipv6.h>
 
 #include "nfsd.h"
+#include "nfsfh.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 000000000000..cdfb8c6a4206
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,208 @@
+/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+
+#ifndef _LINUX_NFSD_FH_INT_H
+#define _LINUX_NFSD_FH_INT_H
+
+#include <linux/nfsd/nfsfh.h>
+
+enum nfsd_fsid {
+	FSID_DEV = 0,
+	FSID_NUM,
+	FSID_MAJOR_MINOR,
+	FSID_ENCODE_DEV,
+	FSID_UUID4_INUM,
+	FSID_UUID8,
+	FSID_UUID16,
+	FSID_UUID16_INUM,
+};
+
+enum fsid_source {
+	FSIDSOURCE_DEV,
+	FSIDSOURCE_FSID,
+	FSIDSOURCE_UUID,
+};
+extern enum fsid_source fsid_source(struct svc_fh *fhp);
+
+
+/* This might look a little large to "inline" but in all calls except
+ * one, 'vers' is constant so moste of the function disappears.
+ */
+static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
+			   u32 fsid, unsigned char *uuid)
+{
+	u32 *up;
+	switch(vers) {
+	case FSID_DEV:
+		fsidv[0] = htonl((MAJOR(dev)<<16) |
+				 MINOR(dev));
+		fsidv[1] = ino_t_to_u32(ino);
+		break;
+	case FSID_NUM:
+		fsidv[0] = fsid;
+		break;
+	case FSID_MAJOR_MINOR:
+		fsidv[0] = htonl(MAJOR(dev));
+		fsidv[1] = htonl(MINOR(dev));
+		fsidv[2] = ino_t_to_u32(ino);
+		break;
+
+	case FSID_ENCODE_DEV:
+		fsidv[0] = new_encode_dev(dev);
+		fsidv[1] = ino_t_to_u32(ino);
+		break;
+
+	case FSID_UUID4_INUM:
+		/* 4 byte fsid and inode number */
+		up = (u32*)uuid;
+		fsidv[0] = ino_t_to_u32(ino);
+		fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
+		break;
+
+	case FSID_UUID8:
+		/* 8 byte fsid  */
+		up = (u32*)uuid;
+		fsidv[0] = up[0] ^ up[2];
+		fsidv[1] = up[1] ^ up[3];
+		break;
+
+	case FSID_UUID16:
+		/* 16 byte fsid - NFSv3+ only */
+		memcpy(fsidv, uuid, 16);
+		break;
+
+	case FSID_UUID16_INUM:
+		/* 8 byte inode and 16 byte fsid */
+		*(u64*)fsidv = (u64)ino;
+		memcpy(fsidv+2, uuid, 16);
+		break;
+	default: BUG();
+	}
+}
+
+static inline int key_len(int type)
+{
+	switch(type) {
+	case FSID_DEV:		return 8;
+	case FSID_NUM: 		return 4;
+	case FSID_MAJOR_MINOR:	return 12;
+	case FSID_ENCODE_DEV:	return 8;
+	case FSID_UUID4_INUM:	return 8;
+	case FSID_UUID8:	return 8;
+	case FSID_UUID16:	return 16;
+	case FSID_UUID16_INUM:	return 24;
+	default: return 0;
+	}
+}
+
+/*
+ * Shorthand for dprintk()'s
+ */
+extern char * SVCFH_fmt(struct svc_fh *fhp);
+
+/*
+ * Function prototypes
+ */
+__be32	fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
+__be32	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
+__be32	fh_update(struct svc_fh *);
+void	fh_put(struct svc_fh *);
+
+static __inline__ struct svc_fh *
+fh_copy(struct svc_fh *dst, struct svc_fh *src)
+{
+	WARN_ON(src->fh_dentry || src->fh_locked);
+			
+	*dst = *src;
+	return dst;
+}
+
+static inline void
+fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
+{
+	dst->fh_size = src->fh_size;
+	memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
+}
+
+static __inline__ struct svc_fh *
+fh_init(struct svc_fh *fhp, int maxsize)
+{
+	memset(fhp, 0, sizeof(*fhp));
+	fhp->fh_maxsize = maxsize;
+	return fhp;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Fill in the pre_op attr for the wcc data
+ */
+static inline void
+fill_pre_wcc(struct svc_fh *fhp)
+{
+	struct inode    *inode;
+
+	inode = fhp->fh_dentry->d_inode;
+	if (!fhp->fh_pre_saved) {
+		fhp->fh_pre_mtime = inode->i_mtime;
+		fhp->fh_pre_ctime = inode->i_ctime;
+		fhp->fh_pre_size  = inode->i_size;
+		fhp->fh_pre_change = inode->i_version;
+		fhp->fh_pre_saved = 1;
+	}
+}
+
+extern void fill_post_wcc(struct svc_fh *);
+#else
+#define	fill_pre_wcc(ignored)
+#define fill_post_wcc(notused)
+#endif /* CONFIG_NFSD_V3 */
+
+
+/*
+ * Lock a file handle/inode
+ * NOTE: both fh_lock and fh_unlock are done "by hand" in
+ * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
+ * so, any changes here should be reflected there.
+ */
+
+static inline void
+fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
+{
+	struct dentry	*dentry = fhp->fh_dentry;
+	struct inode	*inode;
+
+	BUG_ON(!dentry);
+
+	if (fhp->fh_locked) {
+		printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name);
+		return;
+	}
+
+	inode = dentry->d_inode;
+	mutex_lock_nested(&inode->i_mutex, subclass);
+	fill_pre_wcc(fhp);
+	fhp->fh_locked = 1;
+}
+
+static inline void
+fh_lock(struct svc_fh *fhp)
+{
+	fh_lock_nested(fhp, I_MUTEX_NORMAL);
+}
+
+/*
+ * Unlock a file handle/inode
+ */
+static inline void
+fh_unlock(struct svc_fh *fhp)
+{
+	BUG_ON(!fhp->fh_dentry);
+
+	if (fhp->fh_locked) {
+		fill_post_wcc(fhp);
+		mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
+		fhp->fh_locked = 0;
+	}
+}
+
+#endif /* _LINUX_NFSD_FH_INT_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2af75686e0d3..775b8d281d6a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -38,6 +38,7 @@
 #define _NFSD4_STATE_H
 
 #include <linux/nfsd/nfsfh.h>
+#include "nfsfh.h"
 
 typedef struct {
 	u32             cl_boot;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index f4fa6d351bbd..4b1de0a9ea75 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -5,6 +5,8 @@
 #ifndef LINUX_NFSD_VFS_H
 #define LINUX_NFSD_VFS_H
 
+#include "nfsfh.h"
+
 /*
  * Flags for nfsd_permission
  */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 235ee5c3be54..87fe6f64b8f7 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -9,6 +9,7 @@
 
 #include <linux/vfs.h>
 #include "nfsd.h"
+#include "nfsfh.h"
 
 struct nfsd_fhandle {
 	struct svc_fh		fh;
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 49523edbc510..65e333afaee4 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -162,205 +162,6 @@ typedef struct svc_fh {
 
 } svc_fh;
 
-enum nfsd_fsid {
-	FSID_DEV = 0,
-	FSID_NUM,
-	FSID_MAJOR_MINOR,
-	FSID_ENCODE_DEV,
-	FSID_UUID4_INUM,
-	FSID_UUID8,
-	FSID_UUID16,
-	FSID_UUID16_INUM,
-};
-
-enum fsid_source {
-	FSIDSOURCE_DEV,
-	FSIDSOURCE_FSID,
-	FSIDSOURCE_UUID,
-};
-extern enum fsid_source fsid_source(struct svc_fh *fhp);
-
-
-/* This might look a little large to "inline" but in all calls except
- * one, 'vers' is constant so moste of the function disappears.
- */
-static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
-			   u32 fsid, unsigned char *uuid)
-{
-	u32 *up;
-	switch(vers) {
-	case FSID_DEV:
-		fsidv[0] = htonl((MAJOR(dev)<<16) |
-				 MINOR(dev));
-		fsidv[1] = ino_t_to_u32(ino);
-		break;
-	case FSID_NUM:
-		fsidv[0] = fsid;
-		break;
-	case FSID_MAJOR_MINOR:
-		fsidv[0] = htonl(MAJOR(dev));
-		fsidv[1] = htonl(MINOR(dev));
-		fsidv[2] = ino_t_to_u32(ino);
-		break;
-
-	case FSID_ENCODE_DEV:
-		fsidv[0] = new_encode_dev(dev);
-		fsidv[1] = ino_t_to_u32(ino);
-		break;
-
-	case FSID_UUID4_INUM:
-		/* 4 byte fsid and inode number */
-		up = (u32*)uuid;
-		fsidv[0] = ino_t_to_u32(ino);
-		fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
-		break;
-
-	case FSID_UUID8:
-		/* 8 byte fsid  */
-		up = (u32*)uuid;
-		fsidv[0] = up[0] ^ up[2];
-		fsidv[1] = up[1] ^ up[3];
-		break;
-
-	case FSID_UUID16:
-		/* 16 byte fsid - NFSv3+ only */
-		memcpy(fsidv, uuid, 16);
-		break;
-
-	case FSID_UUID16_INUM:
-		/* 8 byte inode and 16 byte fsid */
-		*(u64*)fsidv = (u64)ino;
-		memcpy(fsidv+2, uuid, 16);
-		break;
-	default: BUG();
-	}
-}
-
-static inline int key_len(int type)
-{
-	switch(type) {
-	case FSID_DEV:		return 8;
-	case FSID_NUM: 		return 4;
-	case FSID_MAJOR_MINOR:	return 12;
-	case FSID_ENCODE_DEV:	return 8;
-	case FSID_UUID4_INUM:	return 8;
-	case FSID_UUID8:	return 8;
-	case FSID_UUID16:	return 16;
-	case FSID_UUID16_INUM:	return 24;
-	default: return 0;
-	}
-}
-
-/*
- * Shorthand for dprintk()'s
- */
-extern char * SVCFH_fmt(struct svc_fh *fhp);
-
-/*
- * Function prototypes
- */
-__be32	fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
-__be32	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
-__be32	fh_update(struct svc_fh *);
-void	fh_put(struct svc_fh *);
-
-static __inline__ struct svc_fh *
-fh_copy(struct svc_fh *dst, struct svc_fh *src)
-{
-	WARN_ON(src->fh_dentry || src->fh_locked);
-			
-	*dst = *src;
-	return dst;
-}
-
-static inline void
-fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
-{
-	dst->fh_size = src->fh_size;
-	memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
-}
-
-static __inline__ struct svc_fh *
-fh_init(struct svc_fh *fhp, int maxsize)
-{
-	memset(fhp, 0, sizeof(*fhp));
-	fhp->fh_maxsize = maxsize;
-	return fhp;
-}
-
-#ifdef CONFIG_NFSD_V3
-/*
- * Fill in the pre_op attr for the wcc data
- */
-static inline void
-fill_pre_wcc(struct svc_fh *fhp)
-{
-	struct inode    *inode;
-
-	inode = fhp->fh_dentry->d_inode;
-	if (!fhp->fh_pre_saved) {
-		fhp->fh_pre_mtime = inode->i_mtime;
-		fhp->fh_pre_ctime = inode->i_ctime;
-		fhp->fh_pre_size  = inode->i_size;
-		fhp->fh_pre_change = inode->i_version;
-		fhp->fh_pre_saved = 1;
-	}
-}
-
-extern void fill_post_wcc(struct svc_fh *);
-#else
-#define	fill_pre_wcc(ignored)
-#define fill_post_wcc(notused)
-#endif /* CONFIG_NFSD_V3 */
-
-
-/*
- * Lock a file handle/inode
- * NOTE: both fh_lock and fh_unlock are done "by hand" in
- * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
- * so, any changes here should be reflected there.
- */
-
-static inline void
-fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
-{
-	struct dentry	*dentry = fhp->fh_dentry;
-	struct inode	*inode;
-
-	BUG_ON(!dentry);
-
-	if (fhp->fh_locked) {
-		printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
-		return;
-	}
-
-	inode = dentry->d_inode;
-	mutex_lock_nested(&inode->i_mutex, subclass);
-	fill_pre_wcc(fhp);
-	fhp->fh_locked = 1;
-}
-
-static inline void
-fh_lock(struct svc_fh *fhp)
-{
-	fh_lock_nested(fhp, I_MUTEX_NORMAL);
-}
-
-/*
- * Unlock a file handle/inode
- */
-static inline void
-fh_unlock(struct svc_fh *fhp)
-{
-	BUG_ON(!fhp->fh_dentry);
-
-	if (fhp->fh_locked) {
-		fill_post_wcc(fhp);
-		mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
-		fhp->fh_locked = 0;
-	}
-}
 #endif /* __KERNEL__ */
 
 
-- 
cgit v1.2.3


From c1e7c3ae59b065bf7ff24a05cb609b2f9e314db6 Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.demon.co.uk>
Date: Mon, 14 Dec 2009 21:45:19 +0000
Subject: bzip2/lzma/gzip: pre-boot malloc doesn't return NULL on failure

The trivial malloc implementation used in the pre-boot environment by the
decompressors returns a bad pointer on failure (falling through after
calling error).  This is doubly wrong - the callers expect malloc to
return NULL on failure, second the error function is intended to be
used by the decompressors to propagate errors to *their* callers.  The
decompressors have no access to any state set by the error function.

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
LKML-Reference: <4b26b1ef.hIInb2AYPMtImAJO%phillip@lougher.demon.co.uk>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/decompress/mm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 12ff8c3f1d05..5032b9a31ae7 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -25,7 +25,7 @@ static void *malloc(int size)
 	void *p;
 
 	if (size < 0)
-		error("Malloc error");
+		return NULL;
 	if (!malloc_ptr)
 		malloc_ptr = free_mem_ptr;
 
@@ -35,7 +35,7 @@ static void *malloc(int size)
 	malloc_ptr += size;
 
 	if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
-		error("Out of memory");
+		return NULL;
 
 	malloc_count++;
 	return p;
-- 
cgit v1.2.3


From 9065ce4500085b9ca66b19d3c4d21a73cb410173 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 17 Nov 2009 17:05:19 -0700
Subject: PNP: add interface to retrieve ACPI device from a PNPACPI device

Add pnp_acpi_device(pnp_dev), which takes a PNP device and returns the
associated ACPI device (or NULL, if the device is not a PNPACPI device).

This allows us to write a PNP driver that can manage both traditional
PNPBIOS and ACPI devices, treating ACPI-only functionality as an optional
extension.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/pnp/pnpacpi/core.c |  3 ++-
 include/linux/pnp.h        | 13 +++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index b2348fc2378e..5314bf630bc4 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -144,7 +144,7 @@ static int pnpacpi_resume(struct pnp_dev *dev)
 }
 #endif
 
-static struct pnp_protocol pnpacpi_protocol = {
+struct pnp_protocol pnpacpi_protocol = {
 	.name	 = "Plug and Play ACPI",
 	.get	 = pnpacpi_get_resources,
 	.set	 = pnpacpi_set_resources,
@@ -154,6 +154,7 @@ static struct pnp_protocol pnpacpi_protocol = {
 	.resume = pnpacpi_resume,
 #endif
 };
+EXPORT_SYMBOL(pnpacpi_protocol);
 
 static int __init pnpacpi_add_device(struct acpi_device *device)
 {
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index fddfafaed024..7c4193eb0072 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -334,6 +334,19 @@ extern struct pnp_protocol pnpbios_protocol;
 #define pnp_device_is_pnpbios(dev) 0
 #endif
 
+#ifdef CONFIG_PNPACPI
+extern struct pnp_protocol pnpacpi_protocol;
+
+static inline struct acpi_device *pnp_acpi_device(struct pnp_dev *dev)
+{
+	if (dev->protocol == &pnpacpi_protocol)
+		return dev->data;
+	return NULL;
+}
+#else
+#define pnp_acpi_device(dev) 0
+#endif
+
 /* status */
 #define PNP_READY		0x0000
 #define PNP_ATTACHED		0x0001
-- 
cgit v1.2.3


From bb5b7c11263dbbe78253cd05945a6bf8f55add8e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 15 Dec 2009 20:56:42 -0800
Subject: tcp: Revert per-route SACK/DSACK/TIMESTAMP changes.

It creates a regression, triggering badness for SYN_RECV
sockets, for example:

[19148.022102] Badness at net/ipv4/inet_connection_sock.c:293
[19148.022570] NIP: c02a0914 LR: c02a0904 CTR: 00000000
[19148.023035] REGS: eeecbd30 TRAP: 0700   Not tainted  (2.6.32)
[19148.023496] MSR: 00029032 <EE,ME,CE,IR,DR>  CR: 24002442  XER: 00000000
[19148.024012] TASK = eee9a820[1756] 'privoxy' THREAD: eeeca000

This is likely caused by the change in the 'estab' parameter
passed to tcp_parse_options() when invoked by the functions
in net/ipv4/tcp_minisocks.c

But even if that is fixed, the ->conn_request() changes made in
this patch series is fundamentally wrong.  They try to use the
listening socket's 'dst' to probe the route settings.  The
listening socket doesn't even have a route, and you can't
get the right route (the child request one) until much later
after we setup all of the state, and it must be done by hand.

This stuff really isn't ready, so the best thing to do is a
full revert.  This reverts the following commits:

f55017a93f1a74d50244b1254b9a2bd7ac9bbf7d
022c3f7d82f0f1c68018696f2f027b87b9bb45c2
1aba721eba1d84a2defce45b950272cee1e6c72a
cda42ebd67ee5fdf09d7057b5a4584d36fe8a335
345cda2fd695534be5a4494f1b59da9daed33663
dc343475ed062e13fc260acccaab91d7d80fd5b2
05eaade2782fb0c90d3034fd7a7d5a16266182bb
6a2a2d6bf8581216e08be15fcb563cfd6c430e1e

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  6 ++----
 include/net/dst.h         |  2 +-
 include/net/tcp.h         |  3 +--
 net/ipv4/syncookies.c     | 27 +++++++++++++--------------
 net/ipv4/tcp_input.c      | 24 ++++++++----------------
 net/ipv4/tcp_ipv4.c       | 21 +++++++++------------
 net/ipv4/tcp_minisocks.c  | 10 +++++-----
 net/ipv4/tcp_output.c     | 18 +++++-------------
 net/ipv6/syncookies.c     | 28 +++++++++++++---------------
 net/ipv6/tcp_ipv6.c       |  3 +--
 10 files changed, 58 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 14fc906ed602..05330fc5b436 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -368,11 +368,9 @@ enum {
 #define RTAX_MAX (__RTAX_MAX - 1)
 
 #define RTAX_FEATURE_ECN	0x00000001
-#define RTAX_FEATURE_NO_SACK	0x00000002
-#define RTAX_FEATURE_NO_TSTAMP	0x00000004
+#define RTAX_FEATURE_SACK	0x00000002
+#define RTAX_FEATURE_TIMESTAMP	0x00000004
 #define RTAX_FEATURE_ALLFRAG	0x00000008
-#define RTAX_FEATURE_NO_WSCALE	0x00000010
-#define RTAX_FEATURE_NO_DSACK	0x00000020
 
 struct rta_session {
 	__u8	proto;
diff --git a/include/net/dst.h b/include/net/dst.h
index 387cb3cfde7e..39c4a5963e12 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -113,7 +113,7 @@ dst_metric(const struct dst_entry *dst, int metric)
 static inline u32
 dst_feature(const struct dst_entry *dst, u32 feature)
 {
-	return (dst ? dst_metric(dst, RTAX_FEATURES) & feature : 0);
+	return dst_metric(dst, RTAX_FEATURES) & feature;
 }
 
 static inline u32 dst_mtu(const struct dst_entry *dst)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1b6f7d348cee..34f5cc24d903 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -408,8 +408,7 @@ extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 extern void			tcp_parse_options(struct sk_buff *skb,
 						  struct tcp_options_received *opt_rx,
 						  u8 **hvpp,
-						  int estab,
-						  struct dst_entry *dst);
+						  int estab);
 
 extern u8			*tcp_parse_md5sig_option(struct tcphdr *th);
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26399ad2a289..66fd80ef2473 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -277,6 +277,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
 	if (!req)
@@ -292,6 +299,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= 0;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -340,20 +353,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
-	ireq->snd_wscale        = tcp_opt.snd_wscale;
-	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
-	ireq->sack_ok           = tcp_opt.sack_ok;
-	ireq->wscale_ok         = tcp_opt.wscale_ok;
-	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
-	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-
 	/* Try to redo what tcp_v4_send_synack did. */
 	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 12cab7d74dba..28e029632493 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3727,7 +3727,7 @@ old_ack:
  * the fast version below fails.
  */
 void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       u8 **hvpp, int estab,  struct dst_entry *dst)
+		       u8 **hvpp, int estab)
 {
 	unsigned char *ptr;
 	struct tcphdr *th = tcp_hdr(skb);
@@ -3766,8 +3766,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_WINDOW:
 				if (opsize == TCPOLEN_WINDOW && th->syn &&
-				    !estab && sysctl_tcp_window_scaling &&
-				    !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) {
+				    !estab && sysctl_tcp_window_scaling) {
 					__u8 snd_wscale = *(__u8 *)ptr;
 					opt_rx->wscale_ok = 1;
 					if (snd_wscale > 14) {
@@ -3783,8 +3782,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 			case TCPOPT_TIMESTAMP:
 				if ((opsize == TCPOLEN_TIMESTAMP) &&
 				    ((estab && opt_rx->tstamp_ok) ||
-				     (!estab && sysctl_tcp_timestamps &&
-				      !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) {
+				     (!estab && sysctl_tcp_timestamps))) {
 					opt_rx->saw_tstamp = 1;
 					opt_rx->rcv_tsval = get_unaligned_be32(ptr);
 					opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@@ -3792,8 +3790,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 				break;
 			case TCPOPT_SACK_PERM:
 				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
-				    !estab && sysctl_tcp_sack &&
-				    !dst_feature(dst, RTAX_FEATURE_NO_SACK)) {
+				    !estab && sysctl_tcp_sack) {
 					opt_rx->sack_ok = 1;
 					tcp_sack_reset(opt_rx);
 				}
@@ -3878,7 +3875,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 		if (tcp_parse_aligned_timestamp(tp, th))
 			return 1;
 	}
-	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
 	return 1;
 }
 
@@ -4133,10 +4130,8 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
 static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 
-	if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
-	    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
+	if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 		int mib_idx;
 
 		if (before(seq, tp->rcv_nxt))
@@ -4165,15 +4160,13 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
 static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 	    before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
 		tcp_enter_quickack_mode(sk);
 
-		if (tcp_is_sack(tp) && sysctl_tcp_dsack &&
-		    !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) {
+		if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
 			u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 			if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -5428,11 +5421,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
-	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
 
 	if (th->ack) {
 		/* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 15e96030ce47..65b8ebfd078a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1262,20 +1262,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
 #endif
 
-	ireq = inet_rsk(req);
-	ireq->loc_addr = daddr;
-	ireq->rmt_addr = saddr;
-	ireq->no_srccheck = inet_sk(sk)->transparent;
-	ireq->opt = tcp_v4_save_options(sk, skb);
-
-	dst = inet_csk_route_req(sk, req);
-	if(!dst)
-		goto drop_and_free;
-
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 	if (tmp_opt.cookie_plus > 0 &&
 	    tmp_opt.saw_tstamp &&
@@ -1319,8 +1309,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
+	ireq = inet_rsk(req);
+	ireq->loc_addr = daddr;
+	ireq->rmt_addr = saddr;
+	ireq->no_srccheck = inet_sk(sk)->transparent;
+	ireq->opt = tcp_v4_save_options(sk, skb);
+
 	if (security_inet_conn_request(sk, skb, req))
-		goto drop_and_release;
+		goto drop_and_free;
 
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
@@ -1345,6 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (tmp_opt.saw_tstamp &&
 		    tcp_death_row.sysctl_tw_recycle &&
+		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 87accec8d097..f206ee5dda80 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -95,9 +95,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	int paws_reject = 0;
 
+	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
@@ -526,9 +526,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 
-	if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) {
-		tmp_opt.tstamp_ok = 1;
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL);
+	tmp_opt.saw_tstamp = 0;
+	if (th->doff > (sizeof(struct tcphdr)>>2)) {
+		tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 93316a96d820..383ce237640f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -553,7 +553,6 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_md5sig_key **md5) {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
-	struct dst_entry *dst = __sk_dst_get(sk);
 	unsigned remaining = MAX_TCP_OPTION_SPACE;
 	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
 			 tcp_cookie_size_check(cvp->cookie_desired) :
@@ -581,22 +580,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 	opts->mss = tcp_advertise_mss(sk);
 	remaining -= TCPOLEN_MSS_ALIGNED;
 
-	if (likely(sysctl_tcp_timestamps &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) &&
-		   *md5 == NULL)) {
+	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
 		opts->options |= OPTION_TS;
 		opts->tsval = TCP_SKB_CB(skb)->when;
 		opts->tsecr = tp->rx_opt.ts_recent;
 		remaining -= TCPOLEN_TSTAMP_ALIGNED;
 	}
-	if (likely(sysctl_tcp_window_scaling &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) {
+	if (likely(sysctl_tcp_window_scaling)) {
 		opts->ws = tp->rx_opt.rcv_wscale;
 		opts->options |= OPTION_WSCALE;
 		remaining -= TCPOLEN_WSCALE_ALIGNED;
 	}
-	if (likely(sysctl_tcp_sack &&
-		   !dst_feature(dst, RTAX_FEATURE_NO_SACK))) {
+	if (likely(sysctl_tcp_sack)) {
 		opts->options |= OPTION_SACK_ADVERTISE;
 		if (unlikely(!(OPTION_TS & opts->options)))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
@@ -2527,9 +2522,7 @@ static void tcp_connect_init(struct sock *sk)
 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
 	 */
 	tp->tcp_header_len = sizeof(struct tcphdr) +
-		(sysctl_tcp_timestamps &&
-		(!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ?
-		  TCPOLEN_TSTAMP_ALIGNED : 0));
+		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (tp->af_specific->md5_lookup(sk, sk) != NULL)
@@ -2555,8 +2548,7 @@ static void tcp_connect_init(struct sock *sk)
 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
-				  (sysctl_tcp_window_scaling &&
-				   !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)),
+				  sysctl_tcp_window_scaling,
 				  &rcv_wscale);
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5b9af508b8f2..7208a06576c6 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -185,6 +185,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
 
+	/* check for timestamp cookie support */
+	memset(&tcp_opt, 0, sizeof(tcp_opt));
+	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+
+	if (tcp_opt.saw_tstamp)
+		cookie_check_timestamp(&tcp_opt);
+
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (!req)
@@ -218,6 +225,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->expires = 0UL;
 	req->retrans = 0;
 	ireq->ecn_ok		= 0;
+	ireq->snd_wscale	= tcp_opt.snd_wscale;
+	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
+	ireq->sack_ok		= tcp_opt.sack_ok;
+	ireq->wscale_ok		= tcp_opt.wscale_ok;
+	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
+	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 
@@ -253,21 +266,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 			goto out_free;
 	}
 
-	/* check for timestamp cookie support */
-	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst);
-
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
-
-	req->ts_recent          = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-
-	ireq->snd_wscale        = tcp_opt.snd_wscale;
-	ireq->rcv_wscale        = tcp_opt.rcv_wscale;
-	ireq->sack_ok           = tcp_opt.sack_ok;
-	ireq->wscale_ok         = tcp_opt.wscale_ok;
-	ireq->tstamp_ok         = tcp_opt.saw_tstamp;
-
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index ee9cf62458d4..febfd595a40d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1169,7 +1169,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
@@ -1208,7 +1207,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst);
+	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
 
 	if (tmp_opt.cookie_plus > 0 &&
 	    tmp_opt.saw_tstamp &&
-- 
cgit v1.2.3


From 588f9ce6ca61ecb4663ee6ef2f75d2d96c73151e Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:19:57 +0100
Subject: HWPOISON: Be more aggressive at freeing non LRU caches

shake_page handles more types of page caches than lru_drain_all()

- per cpu page allocator pages
- per CPU LRU

Stops early when the page became free.

Used in followon patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h  |  1 +
 mm/memory-failure.c | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d65ae4ba0e0..68c84bb2ad3f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1335,6 +1335,7 @@ extern void memory_failure(unsigned long pfn, int trapno);
 extern int __memory_failure(unsigned long pfn, int trapno, int ref);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
+extern void shake_page(struct page *p);
 extern atomic_long_t mce_bad_pages;
 
 #endif /* __KERNEL__ */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 50d4f8d7024a..38fcbb22eab9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -82,6 +82,28 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
 	return ret;
 }
 
+/*
+ * When a unknown page type is encountered drain as many buffers as possible
+ * in the hope to turn the page into a LRU or free page, which we can handle.
+ */
+void shake_page(struct page *p)
+{
+	if (!PageSlab(p)) {
+		lru_add_drain_all();
+		if (PageLRU(p))
+			return;
+		drain_all_pages();
+		if (PageLRU(p) || is_free_buddy_page(p))
+			return;
+	}
+	/*
+	 * Could call shrink_slab here (which would also
+	 * shrink other caches). Unfortunately that might
+	 * also access the corrupted page, which could be fatal.
+	 */
+}
+EXPORT_SYMBOL_GPL(shake_page);
+
 /*
  * Kill all processes that have a poisoned page mapped and then isolate
  * the page.
-- 
cgit v1.2.3


From 82ba011b9041dd31c15e4f63797b08aa0a288e61 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:19:57 +0100
Subject: HWPOISON: Turn ref argument into flags argument

Now that "ref" is just a boolean turn it into
a flags argument. First step is only a single flag
that makes the code's intention more clear, but more
may follow.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h  | 5 ++++-
 mm/madvise.c        | 2 +-
 mm/memory-failure.c | 5 +++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 68c84bb2ad3f..135e19198cd3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1331,8 +1331,11 @@ extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
 				 size_t size);
 extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 
+enum mf_flags {
+	MF_COUNT_INCREASED = 1 << 0,
+};
 extern void memory_failure(unsigned long pfn, int trapno);
-extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int __memory_failure(unsigned long pfn, int trapno, int flags);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p);
diff --git a/mm/madvise.c b/mm/madvise.c
index 18970aec0d2f..6ca34f0cd4aa 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -237,7 +237,7 @@ static int madvise_hwpoison(unsigned long start, unsigned long end)
 		printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
 		       page_to_pfn(p), start);
 		/* Ignore return value for now */
-		__memory_failure(page_to_pfn(p), 0, 1);
+		__memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
 	}
 	return ret;
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4253e14fa709..3338c443272c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -737,7 +737,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
 		      ret != SWAP_SUCCESS, pfn);
 }
 
-int __memory_failure(unsigned long pfn, int trapno, int ref)
+int __memory_failure(unsigned long pfn, int trapno, int flags)
 {
 	unsigned long lru_flag;
 	struct page_state *ps;
@@ -773,7 +773,8 @@ int __memory_failure(unsigned long pfn, int trapno, int ref)
 	 * In fact it's dangerous to directly bump up page count from 0,
 	 * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
 	 */
-	if (!ref && !get_page_unless_zero(compound_head(p))) {
+	if (!(flags & MF_COUNT_INCREASED) &&
+		!get_page_unless_zero(compound_head(p))) {
 		action_result(pfn, "free or high order kernel", IGNORED);
 		return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
 	}
-- 
cgit v1.2.3


From 847ce401df392b0704369fd3f75df614ac1414b4 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:58 +0100
Subject: HWPOISON: Add unpoisoning support

The unpoisoning interface is useful for stress testing tools to
reclaim poisoned pages (to prevent OOM)

There is no hardware level unpoisioning, so this
cannot be used for real memory errors, only for software injected errors.

Note that it may leak pages silently - those who have been removed from
LRU cache, but not isolated from page cache/swap cache at hwpoison time.
Especially the stress test of dirty swap cache pages shall reboot system
before exhausting memory.

AK: Fix comments, add documentation, add printks, rename symbol

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Documentation/vm/hwpoison.txt | 16 ++++++++--
 include/linux/mm.h            |  1 +
 include/linux/page-flags.h    |  2 +-
 mm/hwpoison-inject.c          | 36 +++++++++++++++++++----
 mm/memory-failure.c           | 68 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 114 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
index 3ffadf8da61f..f047e75acb23 100644
--- a/Documentation/vm/hwpoison.txt
+++ b/Documentation/vm/hwpoison.txt
@@ -98,10 +98,22 @@ madvise(MADV_POISON, ....)
 
 
 hwpoison-inject module through debugfs
-	/sys/debug/hwpoison/corrupt-pfn
 
-Inject hwpoison fault at PFN echoed into this file
+/sys/debug/hwpoison/
 
+corrupt-pfn
+
+Inject hwpoison fault at PFN echoed into this file.
+
+unpoison-pfn
+
+Software-unpoison page at PFN echoed into this file. This
+way a page can be reused again.
+This only works for Linux injected failures, not for real
+memory failures.
+
+Note these injection interfaces are not stable and might change between
+kernel versions
 
 Architecture specific MCE injector
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 135e19198cd3..8cdb941fc7b5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1336,6 +1336,7 @@ enum mf_flags {
 };
 extern void memory_failure(unsigned long pfn, int trapno);
 extern int __memory_failure(unsigned long pfn, int trapno, int flags);
+extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 49e907bd067f..f9df6308af95 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -275,7 +275,7 @@ PAGEFLAG_FALSE(Uncached)
 
 #ifdef CONFIG_MEMORY_FAILURE
 PAGEFLAG(HWPoison, hwpoison)
-TESTSETFLAG(HWPoison, hwpoison)
+TESTSCFLAG(HWPoison, hwpoison)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
 #else
 PAGEFLAG_FALSE(HWPoison)
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index e1d85137f086..6e35e563bf50 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -4,7 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 
-static struct dentry *hwpoison_dir, *corrupt_pfn;
+static struct dentry *hwpoison_dir;
 
 static int hwpoison_inject(void *data, u64 val)
 {
@@ -14,7 +14,16 @@ static int hwpoison_inject(void *data, u64 val)
 	return __memory_failure(val, 18, 0);
 }
 
+static int hwpoison_unpoison(void *data, u64 val)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return unpoison_memory(val);
+}
+
 DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
+DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
 
 static void pfn_inject_exit(void)
 {
@@ -24,16 +33,31 @@ static void pfn_inject_exit(void)
 
 static int pfn_inject_init(void)
 {
+	struct dentry *dentry;
+
 	hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
 	if (hwpoison_dir == NULL)
 		return -ENOMEM;
-	corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+
+	/*
+	 * Note that the below poison/unpoison interfaces do not involve
+	 * hardware status change, hence do not require hardware support.
+	 * They are mainly for testing hwpoison in software level.
+	 */
+	dentry = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
 					  NULL, &hwpoison_fops);
-	if (corrupt_pfn == NULL) {
-		pfn_inject_exit();
-		return -ENOMEM;
-	}
+	if (!dentry)
+		goto fail;
+
+	dentry = debugfs_create_file("unpoison-pfn", 0600, hwpoison_dir,
+				     NULL, &unpoison_fops);
+	if (!dentry)
+		goto fail;
+
 	return 0;
+fail:
+	pfn_inject_exit();
+	return -ENOMEM;
 }
 
 module_init(pfn_inject_init);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5055b940df5f..ed6e91c87a54 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -838,6 +838,16 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * and in many cases impossible, so we just avoid it here.
 	 */
 	lock_page_nosync(p);
+
+	/*
+	 * unpoison always clear PG_hwpoison inside page lock
+	 */
+	if (!PageHWPoison(p)) {
+		action_result(pfn, "unpoisoned", IGNORED);
+		res = 0;
+		goto out;
+	}
+
 	wait_on_page_writeback(p);
 
 	/*
@@ -893,3 +903,61 @@ void memory_failure(unsigned long pfn, int trapno)
 {
 	__memory_failure(pfn, trapno, 0);
 }
+
+/**
+ * unpoison_memory - Unpoison a previously poisoned page
+ * @pfn: Page number of the to be unpoisoned page
+ *
+ * Software-unpoison a page that has been poisoned by
+ * memory_failure() earlier.
+ *
+ * This is only done on the software-level, so it only works
+ * for linux injected failures, not real hardware failures
+ *
+ * Returns 0 for success, otherwise -errno.
+ */
+int unpoison_memory(unsigned long pfn)
+{
+	struct page *page;
+	struct page *p;
+	int freeit = 0;
+
+	if (!pfn_valid(pfn))
+		return -ENXIO;
+
+	p = pfn_to_page(pfn);
+	page = compound_head(p);
+
+	if (!PageHWPoison(p)) {
+		pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn);
+		return 0;
+	}
+
+	if (!get_page_unless_zero(page)) {
+		if (TestClearPageHWPoison(p))
+			atomic_long_dec(&mce_bad_pages);
+		pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn);
+		return 0;
+	}
+
+	lock_page_nosync(page);
+	/*
+	 * This test is racy because PG_hwpoison is set outside of page lock.
+	 * That's acceptable because that won't trigger kernel panic. Instead,
+	 * the PG_hwpoison page will be caught and isolated on the entrance to
+	 * the free buddy page pool.
+	 */
+	if (TestClearPageHWPoison(p)) {
+		pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn);
+		atomic_long_dec(&mce_bad_pages);
+		freeit = 1;
+	}
+	unlock_page(page);
+
+	put_page(page);
+	if (freeit)
+		put_page(page);
+
+	return 0;
+}
+EXPORT_SYMBOL(unpoison_memory);
-- 
cgit v1.2.3


From 1a9b5b7fe0c5dad8a635288882d36785dea742f9 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: mm: export stable page flags

Rename get_uflags() to stable_page_flags() and make it a global function
for use in the hwpoison page flags filter, which need to compare user
page flags with the value provided by user space.

Also move KPF_* to kernel-page-flags.h for use by user space tools.

Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
CC: Nick Piggin <npiggin@suse.de>
CC: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 fs/proc/page.c                    | 45 +++-----------------------------------
 include/linux/kernel-page-flags.h | 46 +++++++++++++++++++++++++++++++++++++++
 include/linux/page-flags.h        |  2 ++
 3 files changed, 51 insertions(+), 42 deletions(-)
 create mode 100644 include/linux/kernel-page-flags.h

(limited to 'include/linux')

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5033ce0d254b..180cf5a0bd67 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -8,6 +8,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/hugetlb.h>
+#include <linux/kernel-page-flags.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -71,52 +72,12 @@ static const struct file_operations proc_kpagecount_operations = {
  * physical page flags.
  */
 
-/* These macros are used to decouple internal flags from exported ones */
-
-#define KPF_LOCKED		0
-#define KPF_ERROR		1
-#define KPF_REFERENCED		2
-#define KPF_UPTODATE		3
-#define KPF_DIRTY		4
-#define KPF_LRU			5
-#define KPF_ACTIVE		6
-#define KPF_SLAB		7
-#define KPF_WRITEBACK		8
-#define KPF_RECLAIM		9
-#define KPF_BUDDY		10
-
-/* 11-20: new additions in 2.6.31 */
-#define KPF_MMAP		11
-#define KPF_ANON		12
-#define KPF_SWAPCACHE		13
-#define KPF_SWAPBACKED		14
-#define KPF_COMPOUND_HEAD	15
-#define KPF_COMPOUND_TAIL	16
-#define KPF_HUGE		17
-#define KPF_UNEVICTABLE		18
-#define KPF_HWPOISON		19
-#define KPF_NOPAGE		20
-
-#define KPF_KSM			21
-
-/* kernel hacking assistances
- * WARNING: subject to change, never rely on them!
- */
-#define KPF_RESERVED		32
-#define KPF_MLOCKED		33
-#define KPF_MAPPEDTODISK	34
-#define KPF_PRIVATE		35
-#define KPF_PRIVATE_2		36
-#define KPF_OWNER_PRIVATE	37
-#define KPF_ARCH		38
-#define KPF_UNCACHED		39
-
 static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
 {
 	return ((kflags >> kbit) & 1) << ubit;
 }
 
-static u64 get_uflags(struct page *page)
+u64 stable_page_flags(struct page *page)
 {
 	u64 k;
 	u64 u;
@@ -219,7 +180,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
 		else
 			ppage = NULL;
 
-		if (put_user(get_uflags(ppage), out)) {
+		if (put_user(stable_page_flags(ppage), out)) {
 			ret = -EFAULT;
 			break;
 		}
diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
new file mode 100644
index 000000000000..bd92a89f4b0a
--- /dev/null
+++ b/include/linux/kernel-page-flags.h
@@ -0,0 +1,46 @@
+#ifndef LINUX_KERNEL_PAGE_FLAGS_H
+#define LINUX_KERNEL_PAGE_FLAGS_H
+
+/*
+ * Stable page flag bits exported to user space
+ */
+
+#define KPF_LOCKED		0
+#define KPF_ERROR		1
+#define KPF_REFERENCED		2
+#define KPF_UPTODATE		3
+#define KPF_DIRTY		4
+#define KPF_LRU			5
+#define KPF_ACTIVE		6
+#define KPF_SLAB		7
+#define KPF_WRITEBACK		8
+#define KPF_RECLAIM		9
+#define KPF_BUDDY		10
+
+/* 11-20: new additions in 2.6.31 */
+#define KPF_MMAP		11
+#define KPF_ANON		12
+#define KPF_SWAPCACHE		13
+#define KPF_SWAPBACKED		14
+#define KPF_COMPOUND_HEAD	15
+#define KPF_COMPOUND_TAIL	16
+#define KPF_HUGE		17
+#define KPF_UNEVICTABLE		18
+#define KPF_HWPOISON		19
+#define KPF_NOPAGE		20
+
+#define KPF_KSM			21
+
+/* kernel hacking assistances
+ * WARNING: subject to change, never rely on them!
+ */
+#define KPF_RESERVED		32
+#define KPF_MLOCKED		33
+#define KPF_MAPPEDTODISK	34
+#define KPF_PRIVATE		35
+#define KPF_PRIVATE_2		36
+#define KPF_OWNER_PRIVATE	37
+#define KPF_ARCH		38
+#define KPF_UNCACHED		39
+
+#endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f9df6308af95..feee2ba8d06a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -282,6 +282,8 @@ PAGEFLAG_FALSE(HWPoison)
 #define __PG_HWPOISON 0
 #endif
 
+u64 stable_page_flags(struct page *page);
+
 static inline int PageUptodate(struct page *page)
 {
 	int ret = test_bit(PG_uptodate, &(page)->flags);
-- 
cgit v1.2.3


From e42d9d5d47961fb5db0be65b56dd52fe7b2421f1 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: memcg: rename and export try_get_mem_cgroup_from_page()

So that the hwpoison injector can get mem_cgroup for arbitrary page
and thus know whether it is owned by some mem_cgroup task(s).

[AK: Merged with latest git tree]

CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Hugh Dickins <hugh.dickins@tiscali.co.uk>
CC: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
CC: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/memcontrol.h |  6 ++++++
 mm/memcontrol.c            | 11 ++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bf9213b2db8f..fc9bae82ac42 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -68,6 +68,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
+extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 static inline
@@ -189,6 +190,11 @@ mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
 {
 }
 
+static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
+{
+	return NULL;
+}
+
 static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
 {
 	return 1;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e0c2066495e3..b5ac61ce7346 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1379,25 +1379,22 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 	return container_of(css, struct mem_cgroup, css);
 }
 
-static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
+struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
-	struct mem_cgroup *mem;
+	struct mem_cgroup *mem = NULL;
 	struct page_cgroup *pc;
 	unsigned short id;
 	swp_entry_t ent;
 
 	VM_BUG_ON(!PageLocked(page));
 
-	if (!PageSwapCache(page))
-		return NULL;
-
 	pc = lookup_page_cgroup(page);
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
 		mem = pc->mem_cgroup;
 		if (mem && !css_tryget(&mem->css))
 			mem = NULL;
-	} else {
+	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
 		id = lookup_swap_cgroup(ent);
 		rcu_read_lock();
@@ -1743,7 +1740,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	 */
 	if (!PageSwapCache(page))
 		goto charge_cur_mm;
-	mem = try_get_mem_cgroup_from_swapcache(page);
+	mem = try_get_mem_cgroup_from_page(page);
 	if (!mem)
 		goto charge_cur_mm;
 	*ptr = mem;
-- 
cgit v1.2.3


From d324236b3333e87c8825b35f2104184734020d35 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Dec 2009 12:19:59 +0100
Subject: memcg: add accessor to mem_cgroup.css

So that an outside user can free the reference count grabbed by
try_get_mem_cgroup_from_page().

CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
CC: Hugh Dickins <hugh.dickins@tiscali.co.uk>
CC: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
CC: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/memcontrol.h | 7 +++++++
 mm/memcontrol.c            | 5 +++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index fc9bae82ac42..2c30a1116d84 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -81,6 +81,8 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 	return cgroup == mem;
 }
 
+extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
+
 extern int
 mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
 extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
@@ -206,6 +208,11 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 	return 1;
 }
 
+static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+{
+	return NULL;
+}
+
 static inline int
 mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b5ac61ce7346..9eee80d6d490 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -282,6 +282,11 @@ mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
 	return &mem->info.nodeinfo[nid]->zoneinfo[zid];
 }
 
+struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
+{
+	return &mem->css;
+}
+
 static struct mem_cgroup_per_zone *
 page_cgroup_zoneinfo(struct page_cgroup *pc)
 {
-- 
cgit v1.2.3


From facb6011f3993947283fa15d039dacb4ad140230 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Dec 2009 12:20:00 +0100
Subject: HWPOISON: Add soft page offline support

This is a simpler, gentler variant of memory_failure() for soft page
offlining controlled from user space.  It doesn't kill anything, just
tries to invalidate and if that doesn't work migrate the
page away.

This is useful for predictive failure analysis, where a page has
a high rate of corrected errors, but hasn't gone bad yet. Instead
it can be offlined early and avoided.

The offlining is controlled from sysfs, including a new generic
entry point for hard page offlining for symmetry too.

We use the page isolate facility to prevent re-allocation
race. Normally this is only used by memory hotplug. To avoid
races with memory allocation I am using lock_system_sleep().
This avoids the situation where memory hotplug is about
to isolate a page range and then hwpoison undoes that work.
This is a big hammer currently, but the simplest solution
currently.

When the page is not free or LRU we try to free pages
from slab and other caches. The slab freeing is currently
quite dumb and does not try to focus on the specific slab
cache which might own the page. This could be potentially
improved later.

Thanks to Fengguang Wu and Haicheng Li for some fixes.

[Added fix from Andrew Morton to adapt to new migrate_pages prototype]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 .../ABI/testing/sysfs-memory-page-offline          |  44 +++++
 drivers/base/memory.c                              |  61 +++++++
 include/linux/mm.h                                 |   3 +-
 mm/hwpoison-inject.c                               |   2 +-
 mm/memory-failure.c                                | 194 ++++++++++++++++++++-
 5 files changed, 297 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-memory-page-offline

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-memory-page-offline b/Documentation/ABI/testing/sysfs-memory-page-offline
new file mode 100644
index 000000000000..e14703f12fdf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-memory-page-offline
@@ -0,0 +1,44 @@
+What:		/sys/devices/system/memory/soft_offline_page
+Date:		Sep 2009
+KernelVersion:	2.6.33
+Contact:	andi@firstfloor.org
+Description:
+		Soft-offline the memory page containing the physical address
+		written into this file. Input is a hex number specifying the
+		physical address of the page. The kernel will then attempt
+		to soft-offline it, by moving the contents elsewhere or
+		dropping it if possible. The kernel will then be placed
+		on the bad page list and never be reused.
+
+		The offlining is done in kernel specific granuality.
+		Normally it's the base page size of the kernel, but
+		this might change.
+
+		The page must be still accessible, not poisoned. The
+		kernel will never kill anything for this, but rather
+		fail the offline.  Return value is the size of the
+		number, or a error when the offlining failed.  Reading
+		the file is not allowed.
+
+What:		/sys/devices/system/memory/hard_offline_page
+Date:		Sep 2009
+KernelVersion:	2.6.33
+Contact:	andi@firstfloor.org
+Description:
+		Hard-offline the memory page containing the physical
+		address written into this file. Input is a hex number
+		specifying the physical address of the page. The
+		kernel will then attempt to hard-offline the page, by
+		trying to drop the page or killing any owner or
+		triggering IO errors if needed.  Note this may kill
+		any processes owning the page. The kernel will avoid
+		to access this page assuming it's poisoned by the
+		hardware.
+
+		The offlining is done in kernel specific granuality.
+		Normally it's the base page size of the kernel, but
+		this might change.
+
+		Return value is the size of the number, or a error when
+		the offlining failed.
+		Reading the file is not allowed.
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 989429cfed88..c4c8f2e1dd15 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -341,6 +341,64 @@ static inline int memory_probe_init(void)
 }
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for offlining pages of memory
+ */
+
+/* Soft offline a page */
+static ssize_t
+store_soft_offline_page(struct class *class, const char *buf, size_t count)
+{
+	int ret;
+	u64 pfn;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (strict_strtoull(buf, 0, &pfn) < 0)
+		return -EINVAL;
+	pfn >>= PAGE_SHIFT;
+	if (!pfn_valid(pfn))
+		return -ENXIO;
+	ret = soft_offline_page(pfn_to_page(pfn), 0);
+	return ret == 0 ? count : ret;
+}
+
+/* Forcibly offline a page, including killing processes. */
+static ssize_t
+store_hard_offline_page(struct class *class, const char *buf, size_t count)
+{
+	int ret;
+	u64 pfn;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (strict_strtoull(buf, 0, &pfn) < 0)
+		return -EINVAL;
+	pfn >>= PAGE_SHIFT;
+	ret = __memory_failure(pfn, 0, 0);
+	return ret ? ret : count;
+}
+
+static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page);
+static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page);
+
+static __init int memory_fail_init(void)
+{
+	int err;
+
+	err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+				&class_attr_soft_offline_page.attr);
+	if (!err)
+		err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+				&class_attr_hard_offline_page.attr);
+	return err;
+}
+#else
+static inline int memory_fail_init(void)
+{
+	return 0;
+}
+#endif
+
 /*
  * Note that phys_device is optional.  It is here to allow for
  * differentiation between which *physical* devices each
@@ -471,6 +529,9 @@ int __init memory_dev_init(void)
 	}
 
 	err = memory_probe_init();
+	if (!ret)
+		ret = err;
+	err = memory_fail_init();
 	if (!ret)
 		ret = err;
 	err = block_size_init();
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8cdb941fc7b5..849b4a61bd8f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1339,8 +1339,9 @@ extern int __memory_failure(unsigned long pfn, int trapno, int flags);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
-extern void shake_page(struct page *p);
+extern void shake_page(struct page *p, int access);
 extern atomic_long_t mce_bad_pages;
+extern int soft_offline_page(struct page *page, int flags);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index c597f46ac18a..a77fe3f9e211 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -29,7 +29,7 @@ static int hwpoison_inject(void *data, u64 val)
 		return 0;
 
 	if (!PageLRU(p))
-		shake_page(p);
+		shake_page(p, 0);
 	/*
 	 * This implies unable to support non-LRU pages.
 	 */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b5c3b6bd511f..bcce28755832 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -41,6 +41,9 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/backing-dev.h>
+#include <linux/migrate.h>
+#include <linux/page-isolation.h>
+#include <linux/suspend.h>
 #include "internal.h"
 
 int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -201,7 +204,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
  * When a unknown page type is encountered drain as many buffers as possible
  * in the hope to turn the page into a LRU or free page, which we can handle.
  */
-void shake_page(struct page *p)
+void shake_page(struct page *p, int access)
 {
 	if (!PageSlab(p)) {
 		lru_add_drain_all();
@@ -211,11 +214,19 @@ void shake_page(struct page *p)
 		if (PageLRU(p) || is_free_buddy_page(p))
 			return;
 	}
+
 	/*
-	 * Could call shrink_slab here (which would also
-	 * shrink other caches). Unfortunately that might
-	 * also access the corrupted page, which could be fatal.
+	 * Only all shrink_slab here (which would also
+	 * shrink other caches) if access is not potentially fatal.
 	 */
+	if (access) {
+		int nr;
+		do {
+			nr = shrink_slab(1000, GFP_KERNEL, 1000);
+			if (page_count(p) == 0)
+				break;
+		} while (nr > 10);
+	}
 }
 EXPORT_SYMBOL_GPL(shake_page);
 
@@ -949,7 +960,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * walked by the page reclaim code, however that's not a big loss.
 	 */
 	if (!PageLRU(p))
-		shake_page(p);
+		shake_page(p, 0);
 	if (!PageLRU(p)) {
 		/*
 		 * shake_page could have turned it free.
@@ -1099,3 +1110,176 @@ int unpoison_memory(unsigned long pfn)
 	return 0;
 }
 EXPORT_SYMBOL(unpoison_memory);
+
+static struct page *new_page(struct page *p, unsigned long private, int **x)
+{
+	return alloc_pages(GFP_HIGHUSER_MOVABLE, 0);
+}
+
+/*
+ * Safely get reference count of an arbitrary page.
+ * Returns 0 for a free page, -EIO for a zero refcount page
+ * that is not free, and 1 for any other page type.
+ * For 1 the page is returned with increased page count, otherwise not.
+ */
+static int get_any_page(struct page *p, unsigned long pfn, int flags)
+{
+	int ret;
+
+	if (flags & MF_COUNT_INCREASED)
+		return 1;
+
+	/*
+	 * The lock_system_sleep prevents a race with memory hotplug,
+	 * because the isolation assumes there's only a single user.
+	 * This is a big hammer, a better would be nicer.
+	 */
+	lock_system_sleep();
+
+	/*
+	 * Isolate the page, so that it doesn't get reallocated if it
+	 * was free.
+	 */
+	set_migratetype_isolate(p);
+	if (!get_page_unless_zero(compound_head(p))) {
+		if (is_free_buddy_page(p)) {
+			pr_debug("get_any_page: %#lx free buddy page\n", pfn);
+			/* Set hwpoison bit while page is still isolated */
+			SetPageHWPoison(p);
+			ret = 0;
+		} else {
+			pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n",
+				pfn, p->flags);
+			ret = -EIO;
+		}
+	} else {
+		/* Not a free page */
+		ret = 1;
+	}
+	unset_migratetype_isolate(p);
+	unlock_system_sleep();
+	return ret;
+}
+
+/**
+ * soft_offline_page - Soft offline a page.
+ * @page: page to offline
+ * @flags: flags. Same as memory_failure().
+ *
+ * Returns 0 on success, otherwise negated errno.
+ *
+ * Soft offline a page, by migration or invalidation,
+ * without killing anything. This is for the case when
+ * a page is not corrupted yet (so it's still valid to access),
+ * but has had a number of corrected errors and is better taken
+ * out.
+ *
+ * The actual policy on when to do that is maintained by
+ * user space.
+ *
+ * This should never impact any application or cause data loss,
+ * however it might take some time.
+ *
+ * This is not a 100% solution for all memory, but tries to be
+ * ``good enough'' for the majority of memory.
+ */
+int soft_offline_page(struct page *page, int flags)
+{
+	int ret;
+	unsigned long pfn = page_to_pfn(page);
+
+	ret = get_any_page(page, pfn, flags);
+	if (ret < 0)
+		return ret;
+	if (ret == 0)
+		goto done;
+
+	/*
+	 * Page cache page we can handle?
+	 */
+	if (!PageLRU(page)) {
+		/*
+		 * Try to free it.
+		 */
+		put_page(page);
+		shake_page(page, 1);
+
+		/*
+		 * Did it turn free?
+		 */
+		ret = get_any_page(page, pfn, 0);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			goto done;
+	}
+	if (!PageLRU(page)) {
+		pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n",
+				pfn, page->flags);
+		return -EIO;
+	}
+
+	lock_page(page);
+	wait_on_page_writeback(page);
+
+	/*
+	 * Synchronized using the page lock with memory_failure()
+	 */
+	if (PageHWPoison(page)) {
+		unlock_page(page);
+		put_page(page);
+		pr_debug("soft offline: %#lx page already poisoned\n", pfn);
+		return -EBUSY;
+	}
+
+	/*
+	 * Try to invalidate first. This should work for
+	 * non dirty unmapped page cache pages.
+	 */
+	ret = invalidate_inode_page(page);
+	unlock_page(page);
+
+	/*
+	 * Drop count because page migration doesn't like raised
+	 * counts. The page could get re-allocated, but if it becomes
+	 * LRU the isolation will just fail.
+	 * RED-PEN would be better to keep it isolated here, but we
+	 * would need to fix isolation locking first.
+	 */
+	put_page(page);
+	if (ret == 1) {
+		ret = 0;
+		pr_debug("soft_offline: %#lx: invalidated\n", pfn);
+		goto done;
+	}
+
+	/*
+	 * Simple invalidation didn't work.
+	 * Try to migrate to a new page instead. migrate.c
+	 * handles a large number of cases for us.
+	 */
+	ret = isolate_lru_page(page);
+	if (!ret) {
+		LIST_HEAD(pagelist);
+
+		list_add(&page->lru, &pagelist);
+		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
+		if (ret) {
+			pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
+				pfn, ret, page->flags);
+			if (ret > 0)
+				ret = -EIO;
+		}
+	} else {
+		pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
+				pfn, ret, page_count(page), page->flags);
+	}
+	if (ret)
+		return ret;
+
+done:
+	atomic_long_add(1, &mce_bad_pages);
+	SetPageHWPoison(page);
+	/* keep elevated page count for bad page */
+	return ret;
+}
-- 
cgit v1.2.3


From 9905a43b2d563e6f89e4c63c4278ada03f2ebb14 Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Mon, 14 Dec 2009 00:58:57 +0100
Subject: backlight: Constify struct backlight_ops

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/adp5520_bl.c    |  2 +-
 drivers/video/backlight/adx_bl.c        |  2 +-
 drivers/video/backlight/atmel-pwm-bl.c  |  2 +-
 drivers/video/backlight/backlight.c     |  2 +-
 drivers/video/backlight/corgi_lcd.c     |  2 +-
 drivers/video/backlight/cr_bllcd.c      |  2 +-
 drivers/video/backlight/da903x_bl.c     |  2 +-
 drivers/video/backlight/generic_bl.c    |  2 +-
 drivers/video/backlight/hp680_bl.c      |  2 +-
 drivers/video/backlight/jornada720_bl.c |  2 +-
 drivers/video/backlight/kb3886_bl.c     |  2 +-
 drivers/video/backlight/locomolcd.c     |  2 +-
 drivers/video/backlight/mbp_nvidia_bl.c |  2 +-
 drivers/video/backlight/omap1_bl.c      |  2 +-
 drivers/video/backlight/progear_bl.c    |  2 +-
 drivers/video/backlight/pwm_bl.c        |  2 +-
 drivers/video/backlight/tosa_bl.c       |  2 +-
 drivers/video/backlight/wm831x_bl.c     |  2 +-
 include/linux/backlight.h               | 12 ++++++------
 19 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index 4c10edecfb66..86d95c228adb 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -85,7 +85,7 @@ static int adp5520_bl_get_brightness(struct backlight_device *bl)
 	return error ? data->current_brightness : reg_val;
 }
 
-static struct backlight_ops adp5520_bl_ops = {
+static const struct backlight_ops adp5520_bl_ops = {
 	.update_status	= adp5520_bl_update_status,
 	.get_brightness	= adp5520_bl_get_brightness,
 };
diff --git a/drivers/video/backlight/adx_bl.c b/drivers/video/backlight/adx_bl.c
index 2c3bdfc620b7..d769b0bab21a 100644
--- a/drivers/video/backlight/adx_bl.c
+++ b/drivers/video/backlight/adx_bl.c
@@ -61,7 +61,7 @@ static int adx_backlight_check_fb(struct fb_info *fb)
 	return 1;
 }
 
-static struct backlight_ops adx_backlight_ops = {
+static const struct backlight_ops adx_backlight_ops = {
 	.options = 0,
 	.update_status = adx_backlight_update_status,
 	.get_brightness = adx_backlight_get_brightness,
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index 2cf7ba52f67c..f625ffc69ad3 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -113,7 +113,7 @@ static int atmel_pwm_bl_init_pwm(struct atmel_pwm_bl *pwmbl)
 	return pwm_channel_enable(&pwmbl->pwmc);
 }
 
-static struct backlight_ops atmel_pwm_bl_ops = {
+static const struct backlight_ops atmel_pwm_bl_ops = {
 	.get_brightness = atmel_pwm_bl_get_intensity,
 	.update_status  = atmel_pwm_bl_set_intensity,
 };
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 6615ac7fa60a..18829cf68b1b 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(backlight_force_update);
  * ERR_PTR() or a pointer to the newly allocated device.
  */
 struct backlight_device *backlight_device_register(const char *name,
-		struct device *parent, void *devdata, struct backlight_ops *ops)
+		struct device *parent, void *devdata, const struct backlight_ops *ops)
 {
 	struct backlight_device *new_bd;
 	int rc;
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index 96774949cd30..b4bcf8043797 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -451,7 +451,7 @@ void corgi_lcd_limit_intensity(int limit)
 }
 EXPORT_SYMBOL(corgi_lcd_limit_intensity);
 
-static struct backlight_ops corgi_bl_ops = {
+static const struct backlight_ops corgi_bl_ops = {
 	.get_brightness	= corgi_bl_get_intensity,
 	.update_status  = corgi_bl_update_status,
 };
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index b9fe62b475c6..2914bf104adf 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -108,7 +108,7 @@ static int cr_backlight_get_intensity(struct backlight_device *bd)
 	return intensity;
 }
 
-static struct backlight_ops cr_backlight_ops = {
+static const struct backlight_ops cr_backlight_ops = {
 	.get_brightness = cr_backlight_get_intensity,
 	.update_status = cr_backlight_set_intensity,
 };
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index f2d76dae1eb3..74cdc640173d 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -95,7 +95,7 @@ static int da903x_backlight_get_brightness(struct backlight_device *bl)
 	return data->current_brightness;
 }
 
-static struct backlight_ops da903x_backlight_ops = {
+static const struct backlight_ops da903x_backlight_ops = {
 	.update_status	= da903x_backlight_update_status,
 	.get_brightness	= da903x_backlight_get_brightness,
 };
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index 6d27f62fdcd0..e6d348e63596 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c
@@ -70,7 +70,7 @@ void corgibl_limit_intensity(int limit)
 }
 EXPORT_SYMBOL(corgibl_limit_intensity);
 
-static struct backlight_ops genericbl_ops = {
+static const struct backlight_ops genericbl_ops = {
 	.options = BL_CORE_SUSPENDRESUME,
 	.get_brightness = genericbl_get_intensity,
 	.update_status  = genericbl_send_intensity,
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 7fb4eefff80d..f7cc528d5be7 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -98,7 +98,7 @@ static int hp680bl_get_intensity(struct backlight_device *bd)
 	return current_intensity;
 }
 
-static struct backlight_ops hp680bl_ops = {
+static const struct backlight_ops hp680bl_ops = {
 	.get_brightness = hp680bl_get_intensity,
 	.update_status  = hp680bl_set_intensity,
 };
diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index 7aed2565c1bd..db9071fc5665 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -93,7 +93,7 @@ out:
 	return ret;
 }
 
-static struct backlight_ops jornada_bl_ops = {
+static const struct backlight_ops jornada_bl_ops = {
 	.get_brightness = jornada_bl_get_brightness,
 	.update_status = jornada_bl_update_status,
 	.options = BL_CORE_SUSPENDRESUME,
diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c
index a38fda1742dd..939e7b830cf3 100644
--- a/drivers/video/backlight/kb3886_bl.c
+++ b/drivers/video/backlight/kb3886_bl.c
@@ -134,7 +134,7 @@ static int kb3886bl_get_intensity(struct backlight_device *bd)
 	return kb3886bl_intensity;
 }
 
-static struct backlight_ops kb3886bl_ops = {
+static const struct backlight_ops kb3886bl_ops = {
 	.get_brightness = kb3886bl_get_intensity,
 	.update_status  = kb3886bl_send_intensity,
 };
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 6b488b8a7eee..00a9591b0003 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -141,7 +141,7 @@ static int locomolcd_get_intensity(struct backlight_device *bd)
 	return current_intensity;
 }
 
-static struct backlight_ops locomobl_data = {
+static const struct backlight_ops locomobl_data = {
 	.get_brightness = locomolcd_get_intensity,
 	.update_status  = locomolcd_set_intensity,
 };
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index 9edb8d7c295f..581246894733 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -33,7 +33,7 @@ struct dmi_match_data {
 	unsigned long iostart;
 	unsigned long iolen;
 	/* Backlight operations structure. */
-	struct backlight_ops backlight_ops;
+	const struct backlight_ops backlight_ops;
 };
 
 /* Module parameters. */
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index 8693e5fcd2eb..409ca9643528 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -125,7 +125,7 @@ static int omapbl_get_intensity(struct backlight_device *dev)
 	return bl->current_intensity;
 }
 
-static struct backlight_ops omapbl_ops = {
+static const struct backlight_ops omapbl_ops = {
 	.get_brightness = omapbl_get_intensity,
 	.update_status  = omapbl_update_status,
 };
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 9edaf24fd82d..075786e05034 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -54,7 +54,7 @@ static int progearbl_get_intensity(struct backlight_device *bd)
 	return intensity - HW_LEVEL_MIN;
 }
 
-static struct backlight_ops progearbl_ops = {
+static const struct backlight_ops progearbl_ops = {
 	.get_brightness = progearbl_get_intensity,
 	.update_status = progearbl_set_intensity,
 };
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 887166267443..df9e0b32cf39 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -56,7 +56,7 @@ static int pwm_backlight_get_brightness(struct backlight_device *bl)
 	return bl->props.brightness;
 }
 
-static struct backlight_ops pwm_backlight_ops = {
+static const struct backlight_ops pwm_backlight_ops = {
 	.update_status	= pwm_backlight_update_status,
 	.get_brightness	= pwm_backlight_get_brightness,
 };
diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c
index 43edbada12d1..e14ce4d469f5 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c
@@ -72,7 +72,7 @@ static int tosa_bl_get_brightness(struct backlight_device *dev)
 	return props->brightness;
 }
 
-static struct backlight_ops bl_ops = {
+static const struct backlight_ops bl_ops = {
 	.get_brightness		= tosa_bl_get_brightness,
 	.update_status		= tosa_bl_update_status,
 };
diff --git a/drivers/video/backlight/wm831x_bl.c b/drivers/video/backlight/wm831x_bl.c
index 467bdb7efb23..e32add37a203 100644
--- a/drivers/video/backlight/wm831x_bl.c
+++ b/drivers/video/backlight/wm831x_bl.c
@@ -112,7 +112,7 @@ static int wm831x_backlight_get_brightness(struct backlight_device *bl)
 	return data->current_brightness;
 }
 
-static struct backlight_ops wm831x_backlight_ops = {
+static const struct backlight_ops wm831x_backlight_ops = {
 	.options = BL_CORE_SUSPENDRESUME,
 	.update_status	= wm831x_backlight_update_status,
 	.get_brightness	= wm831x_backlight_get_brightness,
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 0f5f57858a23..8c4f884db6b4 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -36,18 +36,18 @@ struct backlight_device;
 struct fb_info;
 
 struct backlight_ops {
-	unsigned int options;
+	const unsigned int options;
 
 #define BL_CORE_SUSPENDRESUME	(1 << 0)
 
 	/* Notify the backlight driver some property has changed */
-	int (*update_status)(struct backlight_device *);
+	int (* const update_status)(struct backlight_device *);
 	/* Return the current backlight brightness (accounting for power,
 	   fb_blank etc.) */
-	int (*get_brightness)(struct backlight_device *);
+	int (* const get_brightness)(struct backlight_device *);
 	/* Check if given framebuffer device is the one bound to this backlight;
 	   return 0 if not, !=0 if it is. If NULL, backlight always matches the fb. */
-	int (*check_fb)(struct fb_info *);
+	int (* const check_fb)(struct fb_info *);
 };
 
 /* This structure defines all the properties of a backlight */
@@ -86,7 +86,7 @@ struct backlight_device {
 	   registered this device has been unloaded, and if class_get_devdata()
 	   points to something in the body of that driver, it is also invalid. */
 	struct mutex ops_lock;
-	struct backlight_ops *ops;
+	const struct backlight_ops *ops;
 
 	/* The framebuffer notifier block */
 	struct notifier_block fb_notif;
@@ -103,7 +103,7 @@ static inline void backlight_update_status(struct backlight_device *bd)
 }
 
 extern struct backlight_device *backlight_device_register(const char *name,
-	struct device *dev, void *devdata, struct backlight_ops *ops);
+	struct device *dev, void *devdata, const struct backlight_ops *ops);
 extern void backlight_device_unregister(struct backlight_device *bd);
 extern void backlight_force_update(struct backlight_device *bd,
 				   enum backlight_update_reason reason);
-- 
cgit v1.2.3


From 3d1e463158febf6e047897597722f768b15350cd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 8 Aug 2009 23:56:29 +0400
Subject: get rid of init_file()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 30 ++----------------------------
 include/linux/file.h |  3 ---
 2 files changed, 2 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index f906ac8c9a9f..602a9ee3023a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -171,32 +171,6 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	if (!file)
 		return NULL;
 
-	init_file(file, mnt, dentry, mode, fop);
-	return file;
-}
-EXPORT_SYMBOL(alloc_file);
-
-/**
- * init_file - initialize a 'struct file'
- * @file: the already allocated 'struct file' to initialized
- * @mnt: the vfsmount on which the file resides
- * @dentry: the dentry representing this file
- * @mode: the mode the file is opened with
- * @fop: the 'struct file_operations' for this file
- *
- * Use this instead of setting the members directly.  Doing so
- * avoids making mistakes like forgetting the mntget() or
- * forgetting to take a write on the mnt.
- *
- * Note: This is a crappy interface.  It is here to make
- * merging with the existing users of get_empty_filp()
- * who have complex failure logic easier.  All users
- * of this should be moving to alloc_file().
- */
-int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
-	   fmode_t mode, const struct file_operations *fop)
-{
-	int error = 0;
 	file->f_path.dentry = dentry;
 	file->f_path.mnt = mntget(mnt);
 	file->f_mapping = dentry->d_inode->i_mapping;
@@ -210,13 +184,13 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
 	 * that we can do debugging checks at __fput()
 	 */
 	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+		int error = 0;
 		file_take_write(file);
 		error = mnt_clone_write(mnt);
 		WARN_ON(error);
 	}
-	return error;
+	return file;
 }
-EXPORT_SYMBOL(init_file);
 
 void fput(struct file *file)
 {
diff --git a/include/linux/file.h b/include/linux/file.h
index 335a0a5c316e..6a8d3612eb2a 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,9 +18,6 @@ extern void drop_file_write_access(struct file *file);
 struct file_operations;
 struct vfsmount;
 struct dentry;
-extern int init_file(struct file *, struct vfsmount *mnt,
-		struct dentry *dentry, fmode_t mode,
-		const struct file_operations *fop);
 extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry,
 		fmode_t mode, const struct file_operations *fop);
 
-- 
cgit v1.2.3


From 2c48b9c45579a9b5e3e74694eebf3d2451f3dbd3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Aug 2009 00:52:35 +0400
Subject: switch alloc_file() to passing struct path

... and have the caller grab both mnt and dentry; kill
leak in infiniband, while we are at it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/perfmon.c            | 15 ++++++++-------
 drivers/infiniband/core/uverbs_main.c |  9 +++++++--
 fs/anon_inodes.c                      | 18 +++++++++---------
 fs/file_table.c                       | 13 ++++++-------
 fs/hugetlbfs/inode.c                  | 15 ++++++++-------
 fs/notify/inotify/inotify_user.c      |  8 ++++++--
 fs/pipe.c                             | 17 +++++++++--------
 include/linux/file.h                  |  5 +++--
 ipc/shm.c                             | 10 +++++-----
 mm/shmem.c                            | 14 ++++++++------
 net/socket.c                          | 17 +++++++++--------
 11 files changed, 78 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 599b233bef75..5246285a95fb 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2200,7 +2200,7 @@ pfm_alloc_file(pfm_context_t *ctx)
 {
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry;
+	struct path path;
 	char name[32];
 	struct qstr this;
 
@@ -2225,18 +2225,19 @@ pfm_alloc_file(pfm_context_t *ctx)
 	/*
 	 * allocate a new dcache entry
 	 */
-	dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
-	if (!dentry) {
+	path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
+	if (!path.dentry) {
 		iput(inode);
 		return ERR_PTR(-ENOMEM);
 	}
+	path.mnt = mntget(pfmfs_mnt);
 
-	dentry->d_op = &pfmfs_dentry_operations;
-	d_add(dentry, inode);
+	path.dentry->d_op = &pfmfs_dentry_operations;
+	d_add(path.dentry, inode);
 
-	file = alloc_file(pfmfs_mnt, dentry, FMODE_READ, &pfm_file_ops);
+	file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
 	if (!file) {
-		dput(dentry);
+		path_put(&path);
 		return ERR_PTR(-ENFILE);
 	}
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index aec0fbdfe7f0..5f284ffd430e 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -492,6 +492,7 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 					int is_async, int *fd)
 {
 	struct ib_uverbs_event_file *ev_file;
+	struct path path;
 	struct file *filp;
 	int ret;
 
@@ -519,8 +520,10 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 	 * system call on a uverbs file, which will already have a
 	 * module reference.
 	 */
-	filp = alloc_file(uverbs_event_mnt, dget(uverbs_event_mnt->mnt_root),
-			  FMODE_READ, fops_get(&uverbs_event_fops));
+	path.mnt = uverbs_event_mnt;
+	path.dentry = uverbs_event_mnt->mnt_root;
+	path_get(&path);
+	filp = alloc_file(&path, FMODE_READ, fops_get(&uverbs_event_fops));
 	if (!filp) {
 		ret = -ENFILE;
 		goto err_fd;
@@ -531,6 +534,8 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
 	return filp;
 
 err_fd:
+	fops_put(&uverbs_event_fops);
+	path_put(&path);
 	put_unused_fd(*fd);
 
 err:
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2ca7a7cafdbf..94f5110c4655 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -88,7 +88,7 @@ struct file *anon_inode_getfile(const char *name,
 				void *priv, int flags)
 {
 	struct qstr this;
-	struct dentry *dentry;
+	struct path path;
 	struct file *file;
 	int error;
 
@@ -106,10 +106,11 @@ struct file *anon_inode_getfile(const char *name,
 	this.name = name;
 	this.len = strlen(name);
 	this.hash = 0;
-	dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
-	if (!dentry)
+	path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+	if (!path.dentry)
 		goto err_module;
 
+	path.mnt = mntget(anon_inode_mnt);
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
 	 * so we can avoid doing an igrab() and we can use an open-coded
@@ -117,14 +118,13 @@ struct file *anon_inode_getfile(const char *name,
 	 */
 	atomic_inc(&anon_inode_inode->i_count);
 
-	dentry->d_op = &anon_inodefs_dentry_operations;
+	path.dentry->d_op = &anon_inodefs_dentry_operations;
 	/* Do not publish this dentry inside the global dentry hash table */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, anon_inode_inode);
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
-	file = alloc_file(anon_inode_mnt, dentry,
-			  FMODE_READ | FMODE_WRITE, fops);
+	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, fops);
 	if (!file)
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
@@ -137,7 +137,7 @@ struct file *anon_inode_getfile(const char *name,
 	return file;
 
 err_dput:
-	dput(dentry);
+	path_put(&path);
 err_module:
 	module_put(fops->owner);
 	return ERR_PTR(error);
diff --git a/fs/file_table.c b/fs/file_table.c
index 602a9ee3023a..163cd28314e0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -162,8 +162,8 @@ fail:
  * If all the callers of init_file() are eliminated, its
  * code should be moved into this function.
  */
-struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
-		fmode_t mode, const struct file_operations *fop)
+struct file *alloc_file(struct path *path, fmode_t mode,
+		const struct file_operations *fop)
 {
 	struct file *file;
 
@@ -171,9 +171,8 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	if (!file)
 		return NULL;
 
-	file->f_path.dentry = dentry;
-	file->f_path.mnt = mntget(mnt);
-	file->f_mapping = dentry->d_inode->i_mapping;
+	file->f_path = *path;
+	file->f_mapping = path->dentry->d_inode->i_mapping;
 	file->f_mode = mode;
 	file->f_op = fop;
 
@@ -183,10 +182,10 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
 	 * visible.  We do this for consistency, and so
 	 * that we can do debugging checks at __fput()
 	 */
-	if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+	if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
 		int error = 0;
 		file_take_write(file);
-		error = mnt_clone_write(mnt);
+		error = mnt_clone_write(path->mnt);
 		WARN_ON(error);
 	}
 	return file;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87a1258953b8..6bd41525cd71 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -922,7 +922,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	int error = -ENOMEM;
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry, *root;
+	struct path path;
+	struct dentry *root;
 	struct qstr quick_string;
 
 	*user = NULL;
@@ -944,10 +945,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	quick_string.name = name;
 	quick_string.len = strlen(quick_string.name);
 	quick_string.hash = 0;
-	dentry = d_alloc(root, &quick_string);
-	if (!dentry)
+	path.dentry = d_alloc(root, &quick_string);
+	if (!path.dentry)
 		goto out_shm_unlock;
 
+	path.mnt = mntget(hugetlbfs_vfsmount);
 	error = -ENOSPC;
 	inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
 				current_fsgid(), S_IFREG | S_IRWXUGO, 0);
@@ -960,13 +962,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 			acctflag))
 		goto out_inode;
 
-	d_instantiate(dentry, inode);
+	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;
 
 	error = -ENFILE;
-	file = alloc_file(hugetlbfs_vfsmount, dentry,
-			FMODE_WRITE | FMODE_READ,
+	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
 			&hugetlbfs_file_operations);
 	if (!file)
 		goto out_dentry; /* inode is already attached */
@@ -977,7 +978,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 out_inode:
 	iput(inode);
 out_dentry:
-	dput(dentry);
+	path_put(&path);
 out_shm_unlock:
 	if (*user) {
 		user_shm_unlock(size, *user);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 9e4f90042eaf..8271cf05c957 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -646,6 +646,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	struct fsnotify_group *group;
 	struct user_struct *user;
 	struct file *filp;
+	struct path path;
 	int fd, ret;
 
 	/* Check the IN_* constants for consistency.  */
@@ -675,8 +676,10 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 
 	atomic_inc(&user->inotify_devs);
 
-	filp = alloc_file(inotify_mnt, dget(inotify_mnt->mnt_root),
-			  FMODE_READ, &inotify_fops);
+	path.mnt = inotify_mnt;
+	path.dentry = inotify_mnt->mnt_root;
+	path_get(&path);
+	filp = alloc_file(&path, FMODE_READ, &inotify_fops);
 	if (!filp)
 		goto Enfile;
 
@@ -689,6 +692,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 
 Enfile:
 	ret = -ENFILE;
+	path_put(&path);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
diff --git a/fs/pipe.c b/fs/pipe.c
index ae17d026aaa3..81288bc2bcbb 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -974,7 +974,7 @@ struct file *create_write_pipe(int flags)
 	int err;
 	struct inode *inode;
 	struct file *f;
-	struct dentry *dentry;
+	struct path path;
 	struct qstr name = { .name = "" };
 
 	err = -ENFILE;
@@ -983,21 +983,22 @@ struct file *create_write_pipe(int flags)
 		goto err;
 
 	err = -ENOMEM;
-	dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
-	if (!dentry)
+	path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+	if (!path.dentry)
 		goto err_inode;
+	path.mnt = mntget(pipe_mnt);
 
-	dentry->d_op = &pipefs_dentry_operations;
+	path.dentry->d_op = &pipefs_dentry_operations;
 	/*
 	 * We dont want to publish this dentry into global dentry hash table.
 	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 	 * This permits a working /proc/$pid/fd/XXX on pipes
 	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, inode);
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, inode);
 
 	err = -ENFILE;
-	f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
+	f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
 	if (!f)
 		goto err_dentry;
 	f->f_mapping = inode->i_mapping;
@@ -1009,7 +1010,7 @@ struct file *create_write_pipe(int flags)
 
  err_dentry:
 	free_pipe_info(inode);
-	dput(dentry);
+	path_put(&path);
 	return ERR_PTR(err);
 
  err_inode:
diff --git a/include/linux/file.h b/include/linux/file.h
index 6a8d3612eb2a..5555508fd517 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,8 +18,9 @@ extern void drop_file_write_access(struct file *file);
 struct file_operations;
 struct vfsmount;
 struct dentry;
-extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry,
-		fmode_t mode, const struct file_operations *fop);
+struct path;
+extern struct file *alloc_file(struct path *, fmode_t mode,
+	const struct file_operations *fop);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
diff --git a/ipc/shm.c b/ipc/shm.c
index 11bec626c228..16e39230aa0d 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -878,8 +878,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	if (err)
 		goto out_unlock;
 
-	path.dentry = dget(shp->shm_file->f_path.dentry);
-	path.mnt    = shp->shm_file->f_path.mnt;
+	path = shp->shm_file->f_path;
+	path_get(&path);
 	shp->shm_nattch++;
 	size = i_size_read(path.dentry->d_inode);
 	shm_unlock(shp);
@@ -889,8 +889,8 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	if (!sfd)
 		goto out_put_dentry;
 
-	file = alloc_file(path.mnt, path.dentry, f_mode,
-			is_file_hugepages(shp->shm_file) ?
+	file = alloc_file(&path, f_mode,
+			  is_file_hugepages(shp->shm_file) ?
 				&shm_file_operations_huge :
 				&shm_file_operations);
 	if (!file)
@@ -950,7 +950,7 @@ out_unlock:
 out_free:
 	kfree(sfd);
 out_put_dentry:
-	dput(path.dentry);
+	path_put(&path);
 	goto out_nattch;
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index ef8f47473c5a..d2ec7f029ff4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2626,7 +2626,8 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	int error;
 	struct file *file;
 	struct inode *inode;
-	struct dentry *dentry, *root;
+	struct path path;
+	struct dentry *root;
 	struct qstr this;
 
 	if (IS_ERR(shm_mnt))
@@ -2643,16 +2644,17 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	this.len = strlen(name);
 	this.hash = 0; /* will go */
 	root = shm_mnt->mnt_root;
-	dentry = d_alloc(root, &this);
-	if (!dentry)
+	path.dentry = d_alloc(root, &this);
+	if (!path.dentry)
 		goto put_memory;
+	path.mnt = mntget(shm_mnt);
 
 	error = -ENOSPC;
 	inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags);
 	if (!inode)
 		goto put_dentry;
 
-	d_instantiate(dentry, inode);
+	d_instantiate(path.dentry, inode);
 	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
 #ifndef CONFIG_MMU
@@ -2662,7 +2664,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 #endif
 
 	error = -ENFILE;
-	file = alloc_file(shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
+	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
 		  &shmem_file_operations);
 	if (!file)
 		goto put_dentry;
@@ -2671,7 +2673,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	return file;
 
 put_dentry:
-	dput(dentry);
+	path_put(&path);
 put_memory:
 	shmem_unacct_size(flags, size);
 	return ERR_PTR(error);
diff --git a/net/socket.c b/net/socket.c
index eaaba3510e81..dbfdfa96d29b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -358,7 +358,7 @@ static const struct dentry_operations sockfs_dentry_operations = {
 static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 {
 	struct qstr name = { .name = "" };
-	struct dentry *dentry;
+	struct path path;
 	struct file *file;
 	int fd;
 
@@ -366,28 +366,29 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	if (unlikely(fd < 0))
 		return fd;
 
-	dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
-	if (unlikely(!dentry)) {
+	path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+	if (unlikely(!path.dentry)) {
 		put_unused_fd(fd);
 		return -ENOMEM;
 	}
+	path.mnt = mntget(sock_mnt);
 
-	dentry->d_op = &sockfs_dentry_operations;
+	path.dentry->d_op = &sockfs_dentry_operations;
 	/*
 	 * We dont want to push this dentry into global dentry hash table.
 	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 	 * This permits a working /proc/$pid/fd/XXX on sockets
 	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
-	d_instantiate(dentry, SOCK_INODE(sock));
+	path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(path.dentry, SOCK_INODE(sock));
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
-	file = alloc_file(sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
+	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
 	if (unlikely(!file)) {
 		/* drop dentry, keep inode */
 		atomic_inc(&path.dentry->d_inode->i_count);
-		dput(dentry);
+		path_put(&path);
 		put_unused_fd(fd);
 		return -ENFILE;
 	}
-- 
cgit v1.2.3


From e9496ff46a20a8592fdc7bdaaf41b45eb808d310 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Aug 2009 18:44:32 +0400
Subject: fix mismerge with Trond's stuff (create_mnt_ns() export is gone now)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c                | 3 +--
 fs/nfs/super.c                | 8 --------
 include/linux/mnt_namespace.h | 1 -
 3 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..faab1273281e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2068,7 +2068,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint
  */
-struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2080,7 +2080,6 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 	}
 	return new_ns;
 }
-EXPORT_SYMBOL(create_mnt_ns);
 
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce907efc5508..d5b112bcf3de 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2648,21 +2648,13 @@ out_freepage:
 static int nfs_follow_remote_path(struct vfsmount *root_mnt,
 		const char *export_path, struct vfsmount *mnt_target)
 {
-	struct mnt_namespace *ns_private;
 	struct nameidata nd;
 	struct super_block *s;
 	int ret;
 
-	ns_private = create_mnt_ns(root_mnt);
-	ret = PTR_ERR(ns_private);
-	if (IS_ERR(ns_private))
-		goto out_mntput;
-
 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
 			export_path, LOOKUP_FOLLOW, &nd);
 
-	put_mnt_ns(ns_private);
-
 	if (ret != 0)
 		goto out_err;
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d74785c2393a..d9ebf1037dfa 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -23,7 +23,6 @@ struct proc_mounts {
 
 struct fs_struct;
 
-extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3


From e81e3f4dca6c54116a24aec217d2c15c6f58ada5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 4 Dec 2009 15:47:36 -0500
Subject: fs: move get_empty_filp() deffinition to internal.h

All users outside of fs/ of get_empty_filp() have been removed.  This patch
moves the definition from the include/ directory to internal.h so no new
users crop up and removes the EXPORT_SYMBOL.  I'd love to see open intents
stop using it too, but that's a problem for another day and a smarter
developer!

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 2 ++
 fs/internal.h      | 1 +
 fs/namei.c         | 2 ++
 fs/open.c          | 2 ++
 include/linux/fs.h | 1 -
 5 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 163cd28314e0..361d76be8295 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -24,6 +24,8 @@
 
 #include <asm/atomic.h>
 
+#include "internal.h"
+
 /* sysctl tunables... */
 struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
diff --git a/fs/internal.h b/fs/internal.h
index 515175b8b72e..f67cd141d9a8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -79,6 +79,7 @@ extern void chroot_fs_refs(struct path *, struct path *);
  * file_table.c
  */
 extern void mark_files_ro(struct super_block *);
+extern struct file *get_empty_filp(void);
 
 /*
  * super.c
diff --git a/fs/namei.c b/fs/namei.c
index 8c8b379b94a4..1fc038b117be 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,6 +35,8 @@
 #include <linux/fs_struct.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
 /* [Feb-1997 T. Schoebel-Theuer]
diff --git a/fs/open.c b/fs/open.c
index b4b31d277f3a..d95651e8be9e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -31,6 +31,8 @@
 #include <linux/falloc.h>
 #include <linux/fs_struct.h>
 
+#include "internal.h"
+
 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	int retval = -ENODEV;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a057f48eb156..cdc23be4edde 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2189,7 +2189,6 @@ static inline void insert_inode_hash(struct inode *inode) {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
-extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 #ifdef CONFIG_BLOCK
-- 
cgit v1.2.3


From 1429b3eca23818f87f9fa569a15d9816de81f698 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 16 Dec 2009 06:38:01 -0500
Subject: Untangling ima mess, part 3: kill dead code in ima

Kill the 'update' argument of ima_path_check(), kill
dead code in ima.

Current rules: ima counters are bumped at the same time
when the file switches from put_filp() fodder to fput()
one.  Which happens exactly in two places - alloc_file()
and __dentry_open().  Nothing else needs to do that at
all.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        |  4 +--
 fs/nfsd/vfs.c                     |  3 +--
 include/linux/ima.h               | 12 ++-------
 security/integrity/ima/ima_main.c | 52 +++------------------------------------
 4 files changed, 9 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index c530e5d32f12..a765e7a741f4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1686,7 +1686,7 @@ do_last:
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
 			error = ima_path_check(&filp->f_path, filp->f_mode &
-				       (MAY_READ | MAY_WRITE | MAY_EXEC), 0);
+				       (MAY_READ | MAY_WRITE | MAY_EXEC));
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1747,7 +1747,7 @@ ok:
 	filp = nameidata_to_filp(&nd, open_flag);
 	if (!IS_ERR(filp)) {
 		error = ima_path_check(&filp->f_path, filp->f_mode &
-			       (MAY_READ | MAY_WRITE | MAY_EXEC), 0);
+			       (MAY_READ | MAY_WRITE | MAY_EXEC));
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c9942b39654e..936f08400db6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2122,8 +2122,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
 	 */
 	path.mnt = exp->ex_path.mnt;
 	path.dentry = dentry;
-	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
-			     IMA_COUNT_LEAVE);
+	err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
 nfsd_out:
 	return err? nfserrno(err) : 0;
 }
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e3f2a4c25f6..99dc6d5cf7e5 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -13,18 +13,14 @@
 #include <linux/fs.h>
 struct linux_binprm;
 
-#define IMA_COUNT_UPDATE 1
-#define IMA_COUNT_LEAVE 0
-
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask, int update_counts);
+extern int ima_path_check(struct path *path, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
-extern void ima_counts_put(struct path *path, int mask);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -42,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask, int update_counts)
+static inline int ima_path_check(struct path *path, int mask)
 {
 	return 0;
 }
@@ -62,9 +58,5 @@ static inline void ima_counts_get(struct file *file)
 	return;
 }
 
-static inline void ima_counts_put(struct path *path, int mask)
-{
-	return;
-}
 #endif /* CONFIG_IMA_H */
 #endif /* _LINUX_IMA_H */
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index e041233b4d2a..16dc57d247d0 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -49,20 +49,13 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
 		iint->writecount++;
 }
 
-/*
- * Update the counts given open flags instead of fmode
- */
-static void ima_inc_counts_flags(struct ima_iint_cache *iint, int flags)
-{
-	ima_inc_counts(iint, (__force fmode_t)((flags+1) & O_ACCMODE));
-}
-
 /*
  * Decrement ima counts
  */
 static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
-			   fmode_t mode)
+			   struct file *file)
 {
+	mode_t mode = file->f_mode;
 	BUG_ON(!mutex_is_locked(&iint->mutex));
 
 	iint->opencount--;
@@ -92,12 +85,6 @@ static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
 	}
 }
 
-static void ima_dec_counts_flags(struct ima_iint_cache *iint,
-				 struct inode *inode, int flags)
-{
-	ima_dec_counts(iint, inode, (__force fmode_t)((flags+1) & O_ACCMODE));
-}
-
 /**
  * ima_file_free - called on __fput()
  * @file: pointer to file structure being freed
@@ -117,7 +104,7 @@ void ima_file_free(struct file *file)
 		return;
 
 	mutex_lock(&iint->mutex);
-	ima_dec_counts(iint, inode, file->f_mode);
+	ima_dec_counts(iint, inode, file);
 	mutex_unlock(&iint->mutex);
 	kref_put(&iint->refcount, iint_free);
 }
@@ -183,7 +170,7 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
  * Always return 0 and audit dentry_open failures.
  * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct path *path, int mask, int update_counts)
+int ima_path_check(struct path *path, int mask)
 {
 	struct inode *inode = path->dentry->d_inode;
 	struct ima_iint_cache *iint;
@@ -197,8 +184,6 @@ int ima_path_check(struct path *path, int mask, int update_counts)
 		return 0;
 
 	mutex_lock(&iint->mutex);
-	if (update_counts)
-		ima_inc_counts_flags(iint, mask);
 
 	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
 	if (rc < 0)
@@ -268,35 +253,6 @@ out:
 	return rc;
 }
 
-/*
- * ima_counts_put - decrement file counts
- *
- * File counts are incremented in ima_path_check. On file open
- * error, such as ETXTBSY, decrement the counts to prevent
- * unnecessary imbalance messages.
- */
-void ima_counts_put(struct path *path, int mask)
-{
-	struct inode *inode = path->dentry->d_inode;
-	struct ima_iint_cache *iint;
-
-	/* The inode may already have been freed, freeing the iint
-	 * with it. Verify the inode is not NULL before dereferencing
-	 * it.
-	 */
-	if (!ima_initialized || !inode || !S_ISREG(inode->i_mode))
-		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-
-	mutex_lock(&iint->mutex);
-	ima_dec_counts_flags(iint, inode, mask);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
-}
-
 /*
  * ima_counts_get - increment file counts
  *
-- 
cgit v1.2.3


From 431547b3c4533b8c7fd150ab36980b9a3147797b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2009 09:52:56 +0000
Subject: sanitize xattr handler prototypes

Add a flags argument to struct xattr_handler and pass it to all xattr
handler methods.  This allows using the same methods for multiple
handlers, e.g. for the ACL methods which perform exactly the same action
for the access and default ACLs, just using a different underlying
attribute.  With a little more groundwork it'll also allow sharing the
methods for the regular user/trusted/secure handlers in extN, ocfs2 and
jffs2 like it's already done for xfs in this patch.

Also change the inode argument to the handlers to a dentry to allow
using the handlers mechnism for filesystems that require it later,
e.g. cifs.

[with GFS2 bits updated by Steven Whitehouse <swhiteho@redhat.com>]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/acl.c               | 47 ++++++------------------
 fs/ext2/acl.c                | 79 +++++++++++++---------------------------
 fs/ext2/xattr.c              | 11 ++++--
 fs/ext2/xattr_security.c     | 16 ++++----
 fs/ext2/xattr_trusted.c      | 16 ++++----
 fs/ext2/xattr_user.c         | 25 +++++++------
 fs/ext3/acl.c                | 74 ++++++++++++-------------------------
 fs/ext3/xattr.c              | 31 +++++++++-------
 fs/ext3/xattr_security.c     | 20 +++++-----
 fs/ext3/xattr_trusted.c      | 18 ++++-----
 fs/ext3/xattr_user.c         | 25 +++++++------
 fs/ext4/acl.c                | 74 ++++++++++++-------------------------
 fs/ext4/xattr.c              | 31 +++++++++-------
 fs/ext4/xattr_security.c     | 20 +++++-----
 fs/ext4/xattr_trusted.c      | 20 +++++-----
 fs/ext4/xattr_user.c         | 25 +++++++------
 fs/gfs2/acl.c                | 16 +++++---
 fs/gfs2/inode.c              |  3 +-
 fs/gfs2/xattr.c              | 69 +++++++++++++----------------------
 fs/gfs2/xattr.h              |  7 ++--
 fs/jffs2/acl.c               | 65 +++++++++++----------------------
 fs/jffs2/security.c          | 18 +++++----
 fs/jffs2/xattr.c             |  6 ++-
 fs/jffs2/xattr_trusted.c     | 18 +++++----
 fs/jffs2/xattr_user.c        | 18 +++++----
 fs/ocfs2/acl.c               | 87 +++++++++++++-------------------------------
 fs/ocfs2/xattr.c             | 72 ++++++++++++++++++------------------
 fs/reiserfs/xattr.c          | 36 +++++++++---------
 fs/reiserfs/xattr_acl.c      | 69 ++++++++++-------------------------
 fs/reiserfs/xattr_security.c | 21 ++++++-----
 fs/reiserfs/xattr_trusted.c  | 21 ++++++-----
 fs/reiserfs/xattr_user.c     | 21 ++++++-----
 fs/xattr.c                   | 28 +++++++-------
 fs/xfs/linux-2.6/xfs_acl.c   | 57 ++++++++++-------------------
 fs/xfs/linux-2.6/xfs_xattr.c | 71 ++++++++----------------------------
 fs/xfs/xfs_acl.h             |  3 +-
 include/linux/xattr.h        | 13 ++++---
 mm/shmem.c                   | 19 +++++-----
 mm/shmem_acl.c               | 78 ++++++++++-----------------------------
 39 files changed, 533 insertions(+), 815 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 361604244271..52cbe47022bf 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-static int btrfs_xattr_get_acl(struct inode *inode, int type,
-			       void *value, size_t size)
+static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
+		void *value, size_t size, int type)
 {
 	struct posix_acl *acl;
 	int ret = 0;
 
-	acl = btrfs_get_acl(inode, type);
+	acl = btrfs_get_acl(dentry->d_inode, type);
 
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
@@ -151,8 +151,8 @@ out:
 	return ret;
 }
 
-static int btrfs_xattr_set_acl(struct inode *inode, int type,
-			       const void *value, size_t size)
+static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	int ret = 0;
 	struct posix_acl *acl = NULL;
@@ -167,38 +167,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
 		}
 	}
 
-	ret = btrfs_set_acl(inode, acl, type);
+	ret = btrfs_set_acl(dentry->d_inode, acl, type);
 
 	posix_acl_release(acl);
 
 	return ret;
 }
 
-
-static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
-				      void *value, size_t size)
-{
-	return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
-				      const void *value, size_t size, int flags)
-{
-	return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
-				       void *value, size_t size)
-{
-	return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
-static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
-			       const void *value, size_t size, int flags)
-{
-	return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 int btrfs_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl;
@@ -303,14 +278,16 @@ int btrfs_acl_chmod(struct inode *inode)
 
 struct xattr_handler btrfs_xattr_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.get	= btrfs_xattr_acl_default_get,
-	.set	= btrfs_xattr_acl_default_set,
+	.flags	= ACL_TYPE_DEFAULT,
+	.get	= btrfs_xattr_acl_get,
+	.set	= btrfs_xattr_acl_set,
 };
 
 struct xattr_handler btrfs_xattr_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.get	= btrfs_xattr_acl_access_get,
-	.set	= btrfs_xattr_acl_access_set,
+	.flags	= ACL_TYPE_ACCESS,
+	.get	= btrfs_xattr_acl_get,
+	.set	= btrfs_xattr_acl_set,
 };
 
 #else /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a63d44256a70..a99e54318c3d 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -339,12 +339,12 @@ ext2_acl_chmod(struct inode *inode)
  * Extended attribut handlers
  */
 static size_t
-ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
-			   const char *name, size_t name_len)
+ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -352,12 +352,12 @@ ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
 }
 
 static size_t
-ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
-			    const char *name, size_t name_len)
+ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -365,15 +365,18 @@ ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext2_get_acl(inode, type);
+	acl = ext2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -385,33 +388,17 @@ ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext2_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext2_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
-	if (!is_owner_or_cap(inode))
+	if (!is_owner_or_cap(dentry->d_inode))
 		return -EPERM;
 
 	if (value) {
@@ -426,41 +413,25 @@ ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
 	} else
 		acl = NULL;
 
-	error = ext2_set_acl(inode, type, acl);
+	error = ext2_set_acl(dentry->d_inode, type, acl);
 
 release_and_out:
 	posix_acl_release(acl);
 	return error;
 }
 
-static int
-ext2_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext2_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext2_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext2_xattr_list_acl_access,
-	.get	= ext2_xattr_get_acl_access,
-	.set	= ext2_xattr_set_acl_access,
+	.get	= ext2_xattr_get_acl,
+	.set	= ext2_xattr_set_acl,
 };
 
 struct xattr_handler ext2_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext2_xattr_list_acl_default,
-	.get	= ext2_xattr_get_acl_default,
-	.set	= ext2_xattr_set_acl_default,
+	.get	= ext2_xattr_get_acl,
+	.set	= ext2_xattr_set_acl,
 };
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7913531ec6d5..904f00642f84 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include <linux/security.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -249,8 +250,9 @@ cleanup:
  * used / required on success.
  */
 static int
-ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	struct ext2_xattr_entry *entry;
 	char *end;
@@ -300,9 +302,10 @@ bad_block:	ext2_error(inode->i_sb, "ext2_xattr_list",
 			ext2_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest) {
 					error = -ERANGE;
@@ -330,7 +333,7 @@ cleanup:
 ssize_t
 ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext2_xattr_list(dentry->d_inode, buffer, size);
+	return ext2_xattr_list(dentry, buffer, size);
 }
 
 /*
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 70c0dbdcdcb7..c8155845ac05 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -11,8 +11,8 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+			 const char *name, size_t name_len, int type)
 {
 	const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -26,22 +26,22 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext2_xattr_security_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name,
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
 			      buffer, size);
 }
 
 static int
-ext2_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext2_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name,
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index e8219f8eae9f..2a26d71f4771 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -13,8 +13,8 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -31,22 +31,22 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
 			      buffer, size);
 }
 
 static int
-ext2_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 92495d28c62f..3f6caf3684b4 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -12,13 +12,13 @@
 #include "xattr.h"
 
 static size_t
-ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -30,27 +30,28 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext2_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext2_xattr_user_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
+	return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext2_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext2_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
 
-	return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext2_xattr_user_handler = {
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c9b0df376b5f..82ba34158661 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -366,12 +366,12 @@ out:
  * Extended attribute handlers
  */
 static size_t
-ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
-			   const char *name, size_t name_len)
+ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -379,12 +379,12 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 }
 
 static size_t
-ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t name_len)
+ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -392,15 +392,18 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 }
 
 static int
-ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext3_get_acl(inode, type);
+	acl = ext3_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -412,31 +415,16 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext3_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext3_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	handle_t *handle;
 	struct posix_acl *acl;
 	int error, retries = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -468,34 +456,18 @@ release_and_out:
 	return error;
 }
 
-static int
-ext3_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext3_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext3_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext3_xattr_list_acl_access,
-	.get	= ext3_xattr_get_acl_access,
-	.set	= ext3_xattr_set_acl_access,
+	.get	= ext3_xattr_get_acl,
+	.set	= ext3_xattr_set_acl,
 };
 
 struct xattr_handler ext3_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext3_xattr_list_acl_default,
-	.get	= ext3_xattr_get_acl_default,
-	.set	= ext3_xattr_set_acl_default,
+	.get	= ext3_xattr_get_acl,
+	.set	= ext3_xattr_set_acl,
 };
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 387d92d00b97..66895ccf76c7 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,7 +99,7 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
 						 struct mb_cache_entry **);
 static void ext3_xattr_rehash(struct ext3_xattr_header *,
 			      struct ext3_xattr_entry *);
-static int ext3_xattr_list(struct inode *inode, char *buffer,
+static int ext3_xattr_list(struct dentry *dentry, char *buffer,
 			   size_t buffer_size);
 
 static struct mb_cache *ext3_xattr_cache;
@@ -147,7 +147,7 @@ ext3_xattr_handler(int name_index)
 ssize_t
 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext3_xattr_list(dentry->d_inode, buffer, size);
+	return ext3_xattr_list(dentry, buffer, size);
 }
 
 static int
@@ -332,7 +332,7 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 
 static int
-ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
 			char *buffer, size_t buffer_size)
 {
 	size_t rest = buffer_size;
@@ -342,9 +342,10 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 			ext3_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest)
 					return -ERANGE;
@@ -357,8 +358,9 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 }
 
 static int
-ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	int error;
 
@@ -383,7 +385,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		goto cleanup;
 	}
 	ext3_xattr_cache_insert(bh);
-	error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+	error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -392,8 +394,9 @@ cleanup:
 }
 
 static int
-ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ext3_xattr_ibody_header *header;
 	struct ext3_inode *raw_inode;
 	struct ext3_iloc iloc;
@@ -411,7 +414,7 @@ ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
 	error = ext3_xattr_check_names(IFIRST(header), end);
 	if (error)
 		goto cleanup;
-	error = ext3_xattr_list_entries(inode, IFIRST(header),
+	error = ext3_xattr_list_entries(dentry, IFIRST(header),
 					buffer, buffer_size);
 
 cleanup:
@@ -430,12 +433,12 @@ cleanup:
  * used / required on success.
  */
 static int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
 
-	down_read(&EXT3_I(inode)->xattr_sem);
-	i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+	down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
+	i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
 	if (i_error < 0) {
 		b_error = 0;
 	} else {
@@ -443,11 +446,11 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 			buffer += i_error;
 			buffer_size -= i_error;
 		}
-		b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+		b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
 		if (b_error < 0)
 			i_error = 0;
 	}
-	up_read(&EXT3_I(inode)->xattr_sem);
+	up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
 	return i_error + b_error;
 }
 
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 37b81097bdf2..474348788dd9 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -12,8 +12,8 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+			 const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext3_xattr_security_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name,
-			      buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext3_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name,
-			      value, size, flags);
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+			      name, value, size, flags);
 }
 
 int
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index c7c41a410c4b..e5562845ed96 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -14,8 +14,8 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -32,22 +32,22 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
-			      buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
 
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 430fe63b31b3..3bcfe9ee0a68 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -13,13 +13,13 @@
 #include "xattr.h"
 
 static size_t
-ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
+	return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext3_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext3_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext3_xattr_user_handler = {
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b0..8a2a29d35a6f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
  * Extended attribute handlers
  */
 static size_t
-ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
-			   const char *name, size_t name_len)
+ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
+			   const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 }
 
 static size_t
-ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t name_len)
+ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 }
 
 static int
-ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!test_opt(inode->i_sb, POSIX_ACL))
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	if (!test_opt(dentry->d_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext4_get_acl(inode, type);
+	acl = ext4_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext4_xattr_get_acl_access(struct inode *inode, const char *name,
-			  void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext4_xattr_get_acl_default(struct inode *inode, const char *name,
-			   void *buffer, size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
-		   size_t size)
+ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		   size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	handle_t *handle;
 	struct posix_acl *acl;
 	int error, retries = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
 	return error;
 }
 
-static int
-ext4_xattr_set_acl_access(struct inode *inode, const char *name,
-			  const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext4_xattr_set_acl_default(struct inode *inode, const char *name,
-			   const void *value, size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ext4_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ext4_xattr_list_acl_access,
-	.get	= ext4_xattr_get_acl_access,
-	.set	= ext4_xattr_set_acl_access,
+	.get	= ext4_xattr_get_acl,
+	.set	= ext4_xattr_set_acl,
 };
 
 struct xattr_handler ext4_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ext4_xattr_list_acl_default,
-	.get	= ext4_xattr_get_acl_default,
-	.set	= ext4_xattr_set_acl_default,
+	.get	= ext4_xattr_get_acl,
+	.set	= ext4_xattr_set_acl,
 };
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 910bf9a59cb3..83218bebbc7c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
 						 struct mb_cache_entry **);
 static void ext4_xattr_rehash(struct ext4_xattr_header *,
 			      struct ext4_xattr_entry *);
-static int ext4_xattr_list(struct inode *inode, char *buffer,
+static int ext4_xattr_list(struct dentry *dentry, char *buffer,
 			   size_t buffer_size);
 
 static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
 ssize_t
 ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext4_xattr_list(dentry->d_inode, buffer, size);
+	return ext4_xattr_list(dentry, buffer, size);
 }
 
 static int
@@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 
 static int
-ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
+ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
 			char *buffer, size_t buffer_size)
 {
 	size_t rest = buffer_size;
@@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
 			ext4_xattr_handler(entry->e_name_index);
 
 		if (handler) {
-			size_t size = handler->list(inode, buffer, rest,
+			size_t size = handler->list(dentry, buffer, rest,
 						    entry->e_name,
-						    entry->e_name_len);
+						    entry->e_name_len,
+						    handler->flags);
 			if (buffer) {
 				if (size > rest)
 					return -ERANGE;
@@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
 }
 
 static int
-ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct buffer_head *bh = NULL;
 	int error;
 
@@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		goto cleanup;
 	}
 	ext4_xattr_cache_insert(bh);
-	error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -385,8 +387,9 @@ cleanup:
 }
 
 static int
-ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ext4_xattr_ibody_header *header;
 	struct ext4_inode *raw_inode;
 	struct ext4_iloc iloc;
@@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
 	error = ext4_xattr_check_names(IFIRST(header), end);
 	if (error)
 		goto cleanup;
-	error = ext4_xattr_list_entries(inode, IFIRST(header),
+	error = ext4_xattr_list_entries(dentry, IFIRST(header),
 					buffer, buffer_size);
 
 cleanup:
@@ -423,12 +426,12 @@ cleanup:
  * used / required on success.
  */
 static int
-ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
 
-	down_read(&EXT4_I(inode)->xattr_sem);
-	i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size);
+	down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
+	i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
 	if (i_error < 0) {
 		b_error = 0;
 	} else {
@@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 			buffer += i_error;
 			buffer_size -= i_error;
 		}
-		b_error = ext4_xattr_block_list(inode, buffer, buffer_size);
+		b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
 		if (b_error < 0)
 			i_error = 0;
 	}
-	up_read(&EXT4_I(inode)->xattr_sem);
+	up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
 	return i_error + b_error;
 }
 
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6cae..983c253999a7 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -12,8 +12,8 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
-			 const char *name, size_t name_len)
+ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_security_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext4_xattr_security_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name,
-			      buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_security_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext4_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+			      name, value, size, flags);
 }
 
 int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a37..15b50edc6587 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_trusted_get(struct inode *inode, const char *name,
-		       void *buffer, size_t size)
+ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name,
-			      buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_trusted_set(struct inode *inode, const char *name,
-		       const void *value, size_t size, int flags)
+ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42aa..c4ce05746ce1 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
 #include "xattr.h"
 
 static size_t
-ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
-		     const char *name, size_t name_len)
+ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+		     const char *name, size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
 
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return 0;
 
 	if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext4_xattr_user_get(struct inode *inode, const char *name,
-		    void *buffer, size_t size)
+ext4_xattr_user_get(struct dentry *dentry, const char *name,
+		    void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size);
+	return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+			      name, buffer, size);
 }
 
 static int
-ext4_xattr_user_set(struct inode *inode, const char *name,
-		    const void *value, size_t size, int flags)
+ext4_xattr_user_set(struct dentry *dentry, const char *name,
+		    const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	if (!test_opt(inode->i_sb, XATTR_USER))
+	if (!test_opt(dentry->d_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name,
-			      value, size, flags);
+	return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+			      name, value, size, flags);
 }
 
 struct xattr_handler ext4_xattr_user_handler = {
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3eb1ea846173..87ee309d4c24 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -126,7 +126,7 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
 	error = posix_acl_to_xattr(acl, data, len);
 	if (error < 0)
 		goto out;
-	error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0);
+	error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
 	if (!error)
 		set_cached_acl(inode, type, acl);
 out:
@@ -232,9 +232,10 @@ static int gfs2_acl_type(const char *name)
 	return -EINVAL;
 }
 
-static int gfs2_xattr_system_get(struct inode *inode, const char *name,
-				 void *buffer, size_t size)
+static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
+				 void *buffer, size_t size, int xtype)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl;
 	int type;
 	int error;
@@ -255,9 +256,11 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name,
 	return error;
 }
 
-static int gfs2_xattr_system_set(struct inode *inode, const char *name,
-				 const void *value, size_t size, int flags)
+static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
+				 const void *value, size_t size, int flags,
+				 int xtype)
 {
+	struct inode *inode = dentry->d_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct posix_acl *acl = NULL;
 	int error = 0, type;
@@ -319,7 +322,7 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name,
 	}
 
 set_acl:
-	error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0);
+	error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS);
 	if (!error) {
 		if (acl)
 			set_cached_acl(inode, type, acl);
@@ -334,6 +337,7 @@ out:
 
 struct xattr_handler gfs2_xattr_system_handler = {
 	.prefix = XATTR_SYSTEM_PREFIX,
+	.flags  = GFS2_EATYPE_SYS,
 	.get    = gfs2_xattr_system_get,
 	.set    = gfs2_xattr_system_set,
 };
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 26ba2a4c4a2d..3ff32fa793da 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -801,7 +801,8 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
 		return err;
 	}
 
-	err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0);
+	err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
+			       GFS2_EATYPE_SECURITY);
 	kfree(value);
 	kfree(name);
 
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 912f5cbc4740..8a04108e0c22 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -567,18 +567,17 @@ out:
 /**
  * gfs2_xattr_get - Get a GFS2 extended attribute
  * @inode: The inode
- * @type: The type of extended attribute
  * @name: The name of the extended attribute
  * @buffer: The buffer to write the result into
  * @size: The size of the buffer
+ * @type: The type of extended attribute
  *
  * Returns: actual size of data on success, -errno on error
  */
-
-int gfs2_xattr_get(struct inode *inode, int type, const char *name,
-		   void *buffer, size_t size)
+static int gfs2_xattr_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
 	struct gfs2_ea_location el;
 	int error;
 
@@ -1119,7 +1118,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 
 /**
  * gfs2_xattr_remove - Remove a GFS2 extended attribute
- * @inode: The inode
+ * @ip: The inode
  * @type: The type of the extended attribute
  * @name: The name of the extended attribute
  *
@@ -1130,9 +1129,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
  * Returns: 0, or errno on failure
  */
 
-static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
+static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
 {
-	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_ea_location el;
 	int error;
 
@@ -1156,24 +1154,24 @@ static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
 }
 
 /**
- * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
- * @inode: The inode
- * @type: The type of the extended attribute
+ * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
+ * @ip: The inode
  * @name: The name of the extended attribute
  * @value: The value of the extended attribute (NULL for remove)
  * @size: The size of the @value argument
  * @flags: Create or Replace
+ * @type: The type of the extended attribute
  *
  * See gfs2_xattr_remove() for details of the removal of xattrs.
  *
  * Returns: 0 or errno on failure
  */
 
-int gfs2_xattr_set(struct inode *inode, int type, const char *name,
-		   const void *value, size_t size, int flags)
+int __gfs2_xattr_set(struct inode *inode, const char *name,
+		   const void *value, size_t size, int flags, int type)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_ea_location el;
 	unsigned int namel = strlen(name);
 	int error;
@@ -1184,7 +1182,7 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
 		return -ERANGE;
 
 	if (value == NULL)
-		return gfs2_xattr_remove(inode, type, name);
+		return gfs2_xattr_remove(ip, type, name);
 
 	if (ea_check_size(sdp, namel, size))
 		return -ERANGE;
@@ -1224,6 +1222,13 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
 	return error;
 }
 
+static int gfs2_xattr_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	return __gfs2_xattr_set(dentry->d_inode, name, value,
+				size, flags, type);
+}
+
 static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
 				  struct gfs2_ea_header *ea, char *data)
 {
@@ -1529,40 +1534,18 @@ out_alloc:
 	return error;
 }
 
-static int gfs2_xattr_user_get(struct inode *inode, const char *name,
-			       void *buffer, size_t size)
-{
-	return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
-}
-
-static int gfs2_xattr_user_set(struct inode *inode, const char *name,
-			       const void *value, size_t size, int flags)
-{
-	return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
-}
-
-static int gfs2_xattr_security_get(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
-{
-	return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
-}
-
-static int gfs2_xattr_security_set(struct inode *inode, const char *name,
-				   const void *value, size_t size, int flags)
-{
-	return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
-}
-
 static struct xattr_handler gfs2_xattr_user_handler = {
 	.prefix = XATTR_USER_PREFIX,
-	.get    = gfs2_xattr_user_get,
-	.set    = gfs2_xattr_user_set,
+	.flags  = GFS2_EATYPE_USR,
+	.get    = gfs2_xattr_get,
+	.set    = gfs2_xattr_set,
 };
 
 static struct xattr_handler gfs2_xattr_security_handler = {
 	.prefix = XATTR_SECURITY_PREFIX,
-	.get    = gfs2_xattr_security_get,
-	.set    = gfs2_xattr_security_set,
+	.flags  = GFS2_EATYPE_SECURITY,
+	.get    = gfs2_xattr_get,
+	.set    = gfs2_xattr_set,
 };
 
 struct xattr_handler *gfs2_xattr_handlers[] = {
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 8d6ae5813c4d..d392f8358f2f 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -53,10 +53,9 @@ struct gfs2_ea_location {
 	struct gfs2_ea_header *el_prev;
 };
 
-extern int gfs2_xattr_get(struct inode *inode, int type, const char *name,
-			  void *buffer, size_t size);
-extern int gfs2_xattr_set(struct inode *inode, int type, const char *name,
-			  const void *value, size_t size, int flags);
+extern int __gfs2_xattr_set(struct inode *inode, const char *name,
+			    const void *value, size_t size,
+			    int flags, int type);
 extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
 extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
 
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7edb62e97419..7cdc3196476a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -350,8 +350,8 @@ int jffs2_acl_chmod(struct inode *inode)
 	return rc;
 }
 
-static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size,
-					 const char *name, size_t name_len)
+static size_t jffs2_acl_access_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
 
@@ -360,8 +360,8 @@ static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t
 	return retlen;
 }
 
-static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size,
-					  const char *name, size_t name_len)
+static size_t jffs2_acl_default_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
@@ -370,12 +370,16 @@ static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_
 	return retlen;
 }
 
-static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size)
+static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	struct posix_acl *acl;
 	int rc;
 
-	acl = jffs2_get_acl(inode, type);
+	if (name[0] != '\0')
+		return -EINVAL;
+
+	acl = jffs2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (!acl)
@@ -386,26 +390,15 @@ static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_
 	return rc;
 }
 
-static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
+static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	struct posix_acl *acl;
 	int rc;
 
-	if (!is_owner_or_cap(inode))
+	if (name[0] != '\0')
+		return -EINVAL;
+	if (!is_owner_or_cap(dentry->d_inode))
 		return -EPERM;
 
 	if (value) {
@@ -420,38 +413,24 @@ static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value,
 	} else {
 		acl = NULL;
 	}
-	rc = jffs2_set_acl(inode, type, acl);
+	rc = jffs2_set_acl(dentry->d_inode, type, acl);
  out:
 	posix_acl_release(acl);
 	return rc;
 }
 
-static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
-				     const void *buffer, size_t size, int flags)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
-				      const void *buffer, size_t size, int flags)
-{
-	if (name[0] != '\0')
-		return -EINVAL;
-	return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
 struct xattr_handler jffs2_acl_access_xattr_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= jffs2_acl_access_listxattr,
-	.get	= jffs2_acl_access_getxattr,
-	.set	= jffs2_acl_access_setxattr,
+	.get	= jffs2_acl_getxattr,
+	.set	= jffs2_acl_setxattr,
 };
 
 struct xattr_handler jffs2_acl_default_xattr_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= jffs2_acl_default_listxattr,
-	.get	= jffs2_acl_default_getxattr,
-	.set	= jffs2_acl_default_setxattr,
+	.get	= jffs2_acl_getxattr,
+	.set	= jffs2_acl_setxattr,
 };
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 02c39c64ecb3..eaccee058583 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -44,26 +44,28 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
 }
 
 /* ---- XATTR Handler for "security.*" ----------------- */
-static int jffs2_security_getxattr(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
+static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
+				   void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
 
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+				 name, buffer, size);
 }
 
-static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer,
-				   size_t size, int flags)
+static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
 
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size,
-				       const char *name, size_t name_len)
+static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4b107881acd5..9e75c62c85d6 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -990,9 +990,11 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 		if (!xhandle)
 			continue;
 		if (buffer) {
-			rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len);
+			rc = xhandle->list(dentry, buffer+len, size-len,
+					   xd->xname, xd->name_len, xd->flags);
 		} else {
-			rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len);
+			rc = xhandle->list(dentry, NULL, 0, xd->xname,
+					   xd->name_len, xd->flags);
 		}
 		if (rc < 0)
 			goto out;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 8ec5765ef348..3e5a5e356e05 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -16,24 +16,26 @@
 #include <linux/mtd/mtd.h>
 #include "nodelist.h"
 
-static int jffs2_trusted_getxattr(struct inode *inode, const char *name,
-				  void *buffer, size_t size)
+static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+				 name, buffer, size);
 }
 
-static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer,
-				  size_t size, int flags)
+static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size,
-				      const char *name, size_t name_len)
+static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8bbeab90ada1..8544af67dffe 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -16,24 +16,26 @@
 #include <linux/mtd/mtd.h>
 #include "nodelist.h"
 
-static int jffs2_user_getxattr(struct inode *inode, const char *name,
-			       void *buffer, size_t size)
+static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
+			       void *buffer, size_t size, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size);
+	return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+				 name, buffer, size);
 }
 
-static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer,
-			       size_t size, int flags)
+static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
+		const void *buffer, size_t size, int flags, int type)
 {
 	if (!strcmp(name, ""))
 		return -EINVAL;
-	return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags);
+	return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+				 name, buffer, size, flags);
 }
 
-static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size,
-				   const char *name, size_t name_len)
+static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
+		size_t list_size, const char *name, size_t name_len, int type)
 {
 	size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
 
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index fbeaec762103..e3e47415d851 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -331,13 +331,14 @@ cleanup:
 	return ret;
 }
 
-static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
+static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
 					  char *list,
 					  size_t list_len,
 					  const char *name,
-					  size_t name_len)
+					  size_t name_len,
+					  int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -348,13 +349,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
 	return size;
 }
 
-static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
+static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
 					   char *list,
 					   size_t list_len,
 					   const char *name,
-					   size_t name_len)
+					   size_t name_len,
+					   int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -365,19 +367,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
 	return size;
 }
 
-static int ocfs2_xattr_get_acl(struct inode *inode,
-			       int type,
-			       void *buffer,
-			       size_t size)
+static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 	struct posix_acl *acl;
 	int ret;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ocfs2_get_acl(inode, type);
+	acl = ocfs2_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -388,35 +390,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode,
 	return ret;
 }
 
-static int ocfs2_xattr_get_acl_access(struct inode *inode,
-				      const char *name,
-				      void *buffer,
-				      size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int ocfs2_xattr_get_acl_default(struct inode *inode,
-				       const char *name,
-				       void *buffer,
-				       size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int ocfs2_xattr_set_acl(struct inode *inode,
-			       int type,
-			       const void *value,
-			       size_t size)
+static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct posix_acl *acl;
 	int ret = 0;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
 		return -EOPNOTSUPP;
 
@@ -442,38 +425,18 @@ cleanup:
 	return ret;
 }
 
-static int ocfs2_xattr_set_acl_access(struct inode *inode,
-				      const char *name,
-				      const void *value,
-				      size_t size,
-				      int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int ocfs2_xattr_set_acl_default(struct inode *inode,
-				       const char *name,
-				       const void *value,
-				       size_t size,
-				       int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
 struct xattr_handler ocfs2_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
 	.list	= ocfs2_xattr_list_acl_access,
-	.get	= ocfs2_xattr_get_acl_access,
-	.set	= ocfs2_xattr_set_acl_access,
+	.get	= ocfs2_xattr_get_acl,
+	.set	= ocfs2_xattr_set_acl,
 };
 
 struct xattr_handler ocfs2_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
 	.list	= ocfs2_xattr_list_acl_default,
-	.get	= ocfs2_xattr_get_acl_default,
-	.set	= ocfs2_xattr_set_acl_default,
+	.get	= ocfs2_xattr_get_acl,
+	.set	= ocfs2_xattr_set_acl,
 };
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fe3419068df2..43c114831c0d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -205,8 +205,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
 					   int offset,
 					   struct ocfs2_xattr_value_root **xv,
 					   struct buffer_head **bh);
-static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags);
 
 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
 {
@@ -6978,9 +6976,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
 
 	ret = ocfs2_init_security_get(inode, dir, &si);
 	if (!ret) {
-		ret = ocfs2_xattr_security_set(inode, si.name,
-					       si.value, si.value_len,
-					       XATTR_CREATE);
+		ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
+				      si.name, si.value, si.value_len,
+				      XATTR_CREATE);
 		if (ret) {
 			mlog_errno(ret);
 			goto leave;
@@ -7008,9 +7006,9 @@ leave:
 /*
  * 'security' attributes support
  */
-static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
 					size_t list_size, const char *name,
-					size_t name_len)
+					size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -7023,23 +7021,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
-				    void *buffer, size_t size)
+static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
+				    void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
-			       buffer, size);
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+			       name, buffer, size);
 }
 
-static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags)
+static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+			       name, value, size, flags);
 }
 
 int ocfs2_init_security_get(struct inode *inode,
@@ -7076,9 +7074,9 @@ struct xattr_handler ocfs2_xattr_security_handler = {
 /*
  * 'trusted' attributes support
  */
-static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
 				       size_t list_size, const char *name,
-				       size_t name_len)
+				       size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
@@ -7091,23 +7089,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
-				   void *buffer, size_t size)
+static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
-			       buffer, size);
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+			       name, buffer, size);
 }
 
-static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
-				   const void *value, size_t size, int flags)
+static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+			       name, value, size, flags);
 }
 
 struct xattr_handler ocfs2_xattr_trusted_handler = {
@@ -7120,13 +7118,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
 /*
  * 'user' attributes support
  */
-static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
 				    size_t list_size, const char *name,
-				    size_t name_len)
+				    size_t name_len, int type)
 {
 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
 	const size_t total_len = prefix_len + name_len + 1;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return 0;
@@ -7139,31 +7137,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
 	return total_len;
 }
 
-static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
-				void *buffer, size_t size)
+static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return -EOPNOTSUPP;
-	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
+	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
 			       buffer, size);
 }
 
-static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
-				const void *value, size_t size, int flags)
+static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
 		return -EOPNOTSUPP;
 
-	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
-			       size, flags);
+	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
+			       name, value, size, flags);
 }
 
 struct xattr_handler ocfs2_xattr_user_handler = {
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..8c7033a8b67e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -48,6 +48,7 @@
 #include <net/checksum.h>
 #include <linux/stat.h>
 #include <linux/quotaops.h>
+#include <linux/security.h>
 
 #define PRIVROOT_NAME ".reiserfs_priv"
 #define XAROOT_NAME   "xattrs"
@@ -726,15 +727,14 @@ ssize_t
 reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
 		  size_t size)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
 
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->get(inode, name, buffer, size);
+	return handler->get(dentry, name, buffer, size, handler->flags);
 }
 
 /*
@@ -746,15 +746,14 @@ int
 reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		  size_t size, int flags)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
 
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->set(inode, name, value, size, flags);
+	return handler->set(dentry, name, value, size, flags, handler->flags);
 }
 
 /*
@@ -764,21 +763,20 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
  */
 int reiserfs_removexattr(struct dentry *dentry, const char *name)
 {
-	struct inode *inode = dentry->d_inode;
 	struct xattr_handler *handler;
-	handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+	handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-	if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+	if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
 		return -EOPNOTSUPP;
 
-	return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+	return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
 }
 
 struct listxattr_buf {
 	size_t size;
 	size_t pos;
 	char *buf;
-	struct inode *inode;
+	struct dentry *dentry;
 };
 
 static int listxattr_filler(void *buf, const char *name, int namelen,
@@ -789,17 +787,19 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
 	if (name[0] != '.' ||
 	    (namelen != 1 && (name[1] != '.' || namelen != 2))) {
 		struct xattr_handler *handler;
-		handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr,
+		handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
 						    name);
 		if (!handler)	/* Unsupported xattr name */
 			return 0;
 		if (b->buf) {
-			size = handler->list(b->inode, b->buf + b->pos,
-					 b->size, name, namelen);
+			size = handler->list(b->dentry, b->buf + b->pos,
+					 b->size, name, namelen,
+					 handler->flags);
 			if (size > b->size)
 				return -ERANGE;
 		} else {
-			size = handler->list(b->inode, NULL, 0, name, namelen);
+			size = handler->list(b->dentry, NULL, 0, name,
+					     namelen, handler->flags);
 		}
 
 		b->pos += size;
@@ -820,7 +820,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
 	int err = 0;
 	loff_t pos = 0;
 	struct listxattr_buf buf = {
-		.inode = dentry->d_inode,
+		.dentry = dentry,
 		.buf = buffer,
 		.size = buffer ? size : 0,
 	};
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 35d6e672a279..cc32e6ada67b 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -15,8 +15,10 @@ static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
 			    struct posix_acl *acl);
 
 static int
-xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
+posix_acl_set(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl;
 	int error, error2;
 	struct reiserfs_transaction_handle th;
@@ -60,15 +62,16 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
 }
 
 static int
-xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
+		size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return -EOPNOTSUPP;
 
-	acl = reiserfs_get_acl(inode, type);
+	acl = reiserfs_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -482,30 +485,12 @@ int reiserfs_acl_chmod(struct inode *inode)
 	return error;
 }
 
-static int
-posix_acl_access_get(struct inode *inode, const char *name,
-		     void *buffer, size_t size)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
-		return -EINVAL;
-	return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-posix_acl_access_set(struct inode *inode, const char *name,
-		     const void *value, size_t size, int flags)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
-		return -EINVAL;
-	return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static size_t posix_acl_access_list(struct inode *inode, char *list,
+static size_t posix_acl_access_list(struct dentry *dentry, char *list,
 				    size_t list_size, const char *name,
-				    size_t name_len)
+				    size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -514,35 +499,18 @@ static size_t posix_acl_access_list(struct inode *inode, char *list,
 
 struct xattr_handler reiserfs_posix_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.get = posix_acl_access_get,
-	.set = posix_acl_access_set,
+	.flags = ACL_TYPE_ACCESS,
+	.get = posix_acl_get,
+	.set = posix_acl_set,
 	.list = posix_acl_access_list,
 };
 
-static int
-posix_acl_default_get(struct inode *inode, const char *name,
-		      void *buffer, size_t size)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
-		return -EINVAL;
-	return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-posix_acl_default_set(struct inode *inode, const char *name,
-		      const void *value, size_t size, int flags)
-{
-	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
-		return -EINVAL;
-	return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
-static size_t posix_acl_default_list(struct inode *inode, char *list,
+static size_t posix_acl_default_list(struct dentry *dentry, char *list,
 				     size_t list_size, const char *name,
-				     size_t name_len)
+				     size_t name_len, int type)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
-	if (!reiserfs_posixacl(inode->i_sb))
+	if (!reiserfs_posixacl(dentry->d_sb))
 		return 0;
 	if (list && size <= list_size)
 		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -551,7 +519,8 @@ static size_t posix_acl_default_list(struct inode *inode, char *list,
 
 struct xattr_handler reiserfs_posix_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.get = posix_acl_default_get,
-	.set = posix_acl_default_set,
+	.flags = ACL_TYPE_DEFAULT,
+	.get = posix_acl_get,
+	.set = posix_acl_set,
 	.list = posix_acl_default_list,
 };
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index a92c8792c0f6..d8b5bfcbdd30 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -8,36 +8,37 @@
 #include <asm/uaccess.h>
 
 static int
-security_get(struct inode *inode, const char *name, void *buffer, size_t size)
+security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+		int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
 		return -EINVAL;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-security_set(struct inode *inode, const char *name, const void *buffer,
-	     size_t size, int flags)
+security_set(struct dentry *dentry, const char *name, const void *buffer,
+	     size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
 		return -EINVAL;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t security_list(struct inode *inode, char *list, size_t list_len,
-			    const char *name, size_t namelen)
+static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
+			    const char *name, size_t namelen, int handler_flags)
 {
 	const size_t len = namelen + 1;
 
-	if (IS_PRIVATE(inode))
+	if (IS_PRIVATE(dentry->d_inode))
 		return 0;
 
 	if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a865042f75e2..5b08aaca3daf 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -8,36 +8,37 @@
 #include <asm/uaccess.h>
 
 static int
-trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
+trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+	    int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-trusted_set(struct inode *inode, const char *name, const void *buffer,
-	    size_t size, int flags)
+trusted_set(struct dentry *dentry, const char *name, const void *buffer,
+	    size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return -EPERM;
 
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t trusted_list(struct inode *inode, char *list, size_t list_size,
-			   const char *name, size_t name_len)
+static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
+			   const char *name, size_t name_len, int handler_flags)
 {
 	const size_t len = name_len + 1;
 
-	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+	if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
 		return 0;
 
 	if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index e3238dc4f3db..75d59c49b911 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -7,34 +7,35 @@
 #include <asm/uaccess.h>
 
 static int
-user_get(struct inode *inode, const char *name, void *buffer, size_t size)
+user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+	 int handler_flags)
 {
 
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return -EOPNOTSUPP;
-	return reiserfs_xattr_get(inode, name, buffer, size);
+	return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
 }
 
 static int
-user_set(struct inode *inode, const char *name, const void *buffer,
-	 size_t size, int flags)
+user_set(struct dentry *dentry, const char *name, const void *buffer,
+	 size_t size, int flags, int handler_flags)
 {
 	if (strlen(name) < sizeof(XATTR_USER_PREFIX))
 		return -EINVAL;
 
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return -EOPNOTSUPP;
-	return reiserfs_xattr_set(inode, name, buffer, size, flags);
+	return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
 }
 
-static size_t user_list(struct inode *inode, char *list, size_t list_size,
-			const char *name, size_t name_len)
+static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
+			const char *name, size_t name_len, int handler_flags)
 {
 	const size_t len = name_len + 1;
 
-	if (!reiserfs_xattrs_user(inode->i_sb))
+	if (!reiserfs_xattrs_user(dentry->d_sb))
 		return 0;
 	if (list && len <= list_size) {
 		memcpy(list, name, name_len);
diff --git a/fs/xattr.c b/fs/xattr.c
index 6d4f6d3449fb..46f87e828b48 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -615,12 +615,11 @@ ssize_t
 generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->get(inode, name, buffer, size);
+	return handler->get(dentry, name, buffer, size, handler->flags);
 }
 
 /*
@@ -630,18 +629,20 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
 ssize_t
 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-	struct inode *inode = dentry->d_inode;
-	struct xattr_handler *handler, **handlers = inode->i_sb->s_xattr;
+	struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
 	unsigned int size = 0;
 
 	if (!buffer) {
-		for_each_xattr_handler(handlers, handler)
-			size += handler->list(inode, NULL, 0, NULL, 0);
+		for_each_xattr_handler(handlers, handler) {
+			size += handler->list(dentry, NULL, 0, NULL, 0,
+					      handler->flags);
+		}
 	} else {
 		char *buf = buffer;
 
 		for_each_xattr_handler(handlers, handler) {
-			size = handler->list(inode, buf, buffer_size, NULL, 0);
+			size = handler->list(dentry, buf, buffer_size,
+					     NULL, 0, handler->flags);
 			if (size > buffer_size)
 				return -ERANGE;
 			buf += size;
@@ -659,14 +660,13 @@ int
 generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
 	if (size == 0)
 		value = "";  /* empty EA, do not remove */
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->set(inode, name, value, size, flags);
+	return handler->set(dentry, name, value, size, 0, handler->flags);
 }
 
 /*
@@ -677,12 +677,12 @@ int
 generic_removexattr(struct dentry *dentry, const char *name)
 {
 	struct xattr_handler *handler;
-	struct inode *inode = dentry->d_inode;
 
-	handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
 		return -EOPNOTSUPP;
-	return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+	return handler->set(dentry, name, NULL, 0,
+			    XATTR_REPLACE, handler->flags);
 }
 
 EXPORT_SYMBOL(generic_getxattr);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 69e598b6986f..2512125dfa7c 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -354,37 +354,14 @@ xfs_acl_chmod(struct inode *inode)
 	return error;
 }
 
-/*
- * System xattr handlers.
- *
- * Currently Posix ACLs are the only system namespace extended attribute
- * handlers supported by XFS, so we just implement the handlers here.
- * If we ever support other system extended attributes this will need
- * some refactoring.
- */
-
 static int
-xfs_decode_acl(const char *name)
-{
-	if (strcmp(name, "posix_acl_access") == 0)
-		return ACL_TYPE_ACCESS;
-	else if (strcmp(name, "posix_acl_default") == 0)
-		return ACL_TYPE_DEFAULT;
-	return -EINVAL;
-}
-
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
-		void *value, size_t size)
+xfs_xattr_acl_get(struct dentry *dentry, const char *name,
+		void *value, size_t size, int type)
 {
 	struct posix_acl *acl;
-	int type, error;
-
-	type = xfs_decode_acl(name);
-	if (type < 0)
-		return type;
+	int error;
 
-	acl = xfs_get_acl(inode, type);
+	acl = xfs_get_acl(dentry->d_inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -397,15 +374,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
 }
 
 static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
+xfs_xattr_acl_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl = NULL;
-	int error = 0, type;
+	int error = 0;
 
-	type = xfs_decode_acl(name);
-	if (type < 0)
-		return type;
 	if (flags & XATTR_CREATE)
 		return -EINVAL;
 	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -462,8 +437,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
 	return error;
 }
 
-struct xattr_handler xfs_xattr_system_handler = {
-	.prefix	= XATTR_SYSTEM_PREFIX,
-	.get	= xfs_xattr_system_get,
-	.set	= xfs_xattr_system_set,
+struct xattr_handler xfs_xattr_acl_access_handler = {
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
+	.get	= xfs_xattr_acl_get,
+	.set	= xfs_xattr_acl_set,
+};
+
+struct xattr_handler xfs_xattr_acl_default_handler = {
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
+	.get	= xfs_xattr_acl_get,
+	.set	= xfs_xattr_acl_set,
 };
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc1..0b1878857fc3 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
 
 
 static int
-__xfs_xattr_get(struct inode *inode, const char *name,
+xfs_xattr_get(struct dentry *dentry, const char *name,
 		void *value, size_t size, int xflags)
 {
-	struct xfs_inode *ip = XFS_I(inode);
+	struct xfs_inode *ip = XFS_I(dentry->d_inode);
 	int error, asize = size;
 
 	if (strcmp(name, "") == 0)
@@ -52,10 +52,10 @@ __xfs_xattr_get(struct inode *inode, const char *name,
 }
 
 static int
-__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
+xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
 		size_t size, int flags, int xflags)
 {
-	struct xfs_inode *ip = XFS_I(inode);
+	struct xfs_inode *ip = XFS_I(dentry->d_inode);
 
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
@@ -71,75 +71,34 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
 	return -xfs_attr_set(ip, name, (void *)value, size, xflags);
 }
 
-static int
-xfs_xattr_user_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, 0);
-}
-
-static int
-xfs_xattr_user_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, 0);
-}
-
 static struct xattr_handler xfs_xattr_user_handler = {
 	.prefix	= XATTR_USER_PREFIX,
-	.get	= xfs_xattr_user_get,
-	.set	= xfs_xattr_user_set,
+	.flags	= 0, /* no flags implies user namespace */
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
-static int
-xfs_xattr_trusted_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
-}
-
-static int
-xfs_xattr_trusted_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
-}
-
 static struct xattr_handler xfs_xattr_trusted_handler = {
 	.prefix	= XATTR_TRUSTED_PREFIX,
-	.get	= xfs_xattr_trusted_get,
-	.set	= xfs_xattr_trusted_set,
+	.flags	= ATTR_ROOT,
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
-static int
-xfs_xattr_secure_get(struct inode *inode, const char *name,
-		void *value, size_t size)
-{
-	return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
-}
-
-static int
-xfs_xattr_secure_set(struct inode *inode, const char *name,
-		const void *value, size_t size, int flags)
-{
-	return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
-}
-
 static struct xattr_handler xfs_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
-	.get	= xfs_xattr_secure_get,
-	.set	= xfs_xattr_secure_set,
+	.flags	= ATTR_SECURE,
+	.get	= xfs_xattr_get,
+	.set	= xfs_xattr_set,
 };
 
-
 struct xattr_handler *xfs_xattr_handlers[] = {
 	&xfs_xattr_user_handler,
 	&xfs_xattr_trusted_handler,
 	&xfs_xattr_security_handler,
 #ifdef CONFIG_XFS_POSIX_ACL
-	&xfs_xattr_system_handler,
+	&xfs_xattr_acl_access_handler,
+	&xfs_xattr_acl_default_handler,
 #endif
 	NULL
 };
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8ed..00fd357c3e46 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
 extern int posix_acl_access_exists(struct inode *inode);
 extern int posix_acl_default_exists(struct inode *inode);
 
-extern struct xattr_handler xfs_xattr_system_handler;
+extern struct xattr_handler xfs_xattr_acl_access_handler;
+extern struct xattr_handler xfs_xattr_acl_default_handler;
 #else
 # define xfs_check_acl					NULL
 # define xfs_get_acl(inode, type)			NULL
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 5c84af8c5f6f..fb9b7e6e1e2d 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -38,12 +38,13 @@ struct dentry;
 
 struct xattr_handler {
 	char *prefix;
-	size_t (*list)(struct inode *inode, char *list, size_t list_size,
-		       const char *name, size_t name_len);
-	int (*get)(struct inode *inode, const char *name, void *buffer,
-		   size_t size);
-	int (*set)(struct inode *inode, const char *name, const void *buffer,
-		   size_t size, int flags);
+	int flags;	/* fs private flags passed back to the handlers */
+	size_t (*list)(struct dentry *dentry, char *list, size_t list_size,
+		       const char *name, size_t name_len, int handler_flags);
+	int (*get)(struct dentry *dentry, const char *name, void *buffer,
+		   size_t size, int handler_flags);
+	int (*set)(struct dentry *dentry, const char *name, const void *buffer,
+		   size_t size, int flags, int handler_flags);
 };
 
 ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
diff --git a/mm/shmem.c b/mm/shmem.c
index adf8033afd52..3cd32c2ea0a0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2042,27 +2042,28 @@ static const struct inode_operations shmem_symlink_inode_operations = {
  * filesystem level, though.
  */
 
-static size_t shmem_xattr_security_list(struct inode *inode, char *list,
+static size_t shmem_xattr_security_list(struct dentry *dentry, char *list,
 					size_t list_len, const char *name,
-					size_t name_len)
+					size_t name_len, int handler_flags)
 {
-	return security_inode_listsecurity(inode, list, list_len);
+	return security_inode_listsecurity(dentry->d_inode, list, list_len);
 }
 
-static int shmem_xattr_security_get(struct inode *inode, const char *name,
-				    void *buffer, size_t size)
+static int shmem_xattr_security_get(struct dentry *dentry, const char *name,
+		void *buffer, size_t size, int handler_flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return xattr_getsecurity(inode, name, buffer, size);
+	return xattr_getsecurity(dentry->d_inode, name, buffer, size);
 }
 
-static int shmem_xattr_security_set(struct inode *inode, const char *name,
-				    const void *value, size_t size, int flags)
+static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int handler_flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return security_inode_setsecurity(inode, name, value, size, flags);
+	return security_inode_setsecurity(dentry->d_inode, name, value,
+					  size, flags);
 }
 
 static struct xattr_handler shmem_xattr_security_handler = {
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index df2c87fdae50..f8d5330ec0d7 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -63,86 +63,48 @@ struct generic_acl_operations shmem_acl_ops = {
 	.setacl = shmem_set_acl,
 };
 
-/**
- * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access,
- * shmem_xattr_acl_access_handler  -  plumbing code to implement the
- * system.posix_acl_access xattr using the generic acl functions.
- */
-
 static size_t
-shmem_list_acl_access(struct inode *inode, char *list, size_t list_size,
-		      const char *name, size_t name_len)
+shmem_xattr_list_acl(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
-	return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS,
-				list, list_size);
+	return generic_acl_list(dentry->d_inode, &shmem_acl_ops,
+				type, list, list_size);
 }
 
 static int
-shmem_get_acl_access(struct inode *inode, const char *name, void *buffer,
-		     size_t size)
+shmem_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+		     size_t size, int type)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer,
-			       size);
+	return generic_acl_get(dentry->d_inode, &shmem_acl_ops, type,
+			       buffer, size);
 }
 
 static int
-shmem_set_acl_access(struct inode *inode, const char *name, const void *value,
-		     size_t size, int flags)
+shmem_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+		     size_t size, int flags, int type)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value,
-			       size);
+	return generic_acl_set(dentry->d_inode, &shmem_acl_ops, type,
+			       value, size);
 }
 
 struct xattr_handler shmem_xattr_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.list	= shmem_list_acl_access,
-	.get	= shmem_get_acl_access,
-	.set	= shmem_set_acl_access,
+	.flags	= ACL_TYPE_ACCESS,
+	.list	= shmem_xattr_list_acl,
+	.get	= shmem_xattr_get_acl,
+	.set	= shmem_xattr_set_acl,
 };
 
-/**
- * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default,
- * shmem_xattr_acl_default_handler  -  plumbing code to implement the
- * system.posix_acl_default xattr using the generic acl functions.
- */
-
-static size_t
-shmem_list_acl_default(struct inode *inode, char *list, size_t list_size,
-		       const char *name, size_t name_len)
-{
-	return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT,
-				list, list_size);
-}
-
-static int
-shmem_get_acl_default(struct inode *inode, const char *name, void *buffer,
-		      size_t size)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer,
-			       size);
-}
-
-static int
-shmem_set_acl_default(struct inode *inode, const char *name, const void *value,
-		      size_t size, int flags)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value,
-			       size);
-}
-
 struct xattr_handler shmem_xattr_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.list	= shmem_list_acl_default,
-	.get	= shmem_get_acl_default,
-	.set	= shmem_set_acl_default,
+	.flags	= ACL_TYPE_DEFAULT,
+	.list	= shmem_xattr_list_acl,
+	.get	= shmem_xattr_get_acl,
+	.set	= shmem_xattr_set_acl,
 };
 
 /**
-- 
cgit v1.2.3


From 1c7c474c31aea6d5cb2fb35f31d9e9e91ae466b1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Nov 2009 16:44:44 +0100
Subject: make generic_acl slightly more generic

Now that we cache the ACL pointers in the generic inode all the generic_acl
cruft can go away and generic_acl.c can directly implement xattr handlers
dealing with the full Posix ACL semantics for in-memory filesystems.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/generic_acl.c            | 158 +++++++++++++++++++++++++-------------------
 include/linux/generic_acl.h |  41 +++---------
 include/linux/shmem_fs.h    |  16 -----
 mm/Makefile                 |   1 -
 mm/shmem.c                  |  17 +++--
 mm/shmem_acl.c              | 133 -------------------------------------
 6 files changed, 109 insertions(+), 257 deletions(-)
 delete mode 100644 mm/shmem_acl.c

(limited to 'include/linux')

diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index e0b53aa7bbec..55458031e501 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -1,62 +1,58 @@
 /*
- * fs/generic_acl.c
- *
  * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
  *
  * This file is released under the GPL.
+ *
+ * Generic ACL support for in-memory filesystems.
  */
 
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/generic_acl.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
 
-/**
- * generic_acl_list  -  Generic xattr_handler->list() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-size_t
-generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
-		 int type, char *list, size_t list_size)
+
+static size_t
+generic_acl_list(struct dentry *dentry, char *list, size_t list_size,
+		const char *name, size_t name_len, int type)
 {
 	struct posix_acl *acl;
-	const char *name;
+	const char *xname;
 	size_t size;
 
-	acl = ops->getacl(inode, type);
+	acl = get_cached_acl(dentry->d_inode, type);
 	if (!acl)
 		return 0;
 	posix_acl_release(acl);
 
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			name = POSIX_ACL_XATTR_ACCESS;
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			name = POSIX_ACL_XATTR_DEFAULT;
-			break;
-
-		default:
-			return 0;
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		xname = POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		xname = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		return 0;
 	}
-	size = strlen(name) + 1;
+	size = strlen(xname) + 1;
 	if (list && size <= list_size)
-		memcpy(list, name, size);
+		memcpy(list, xname, size);
 	return size;
 }
 
-/**
- * generic_acl_get  -  Generic xattr_handler->get() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-int
-generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
-		int type, void *buffer, size_t size)
+static int
+generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
+		     size_t size, int type)
 {
 	struct posix_acl *acl;
 	int error;
 
-	acl = ops->getacl(inode, type);
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+
+	acl = get_cached_acl(dentry->d_inode, type);
 	if (!acl)
 		return -ENODATA;
 	error = posix_acl_to_xattr(acl, buffer, size);
@@ -65,17 +61,16 @@ generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
 	return error;
 }
 
-/**
- * generic_acl_set  -  Generic xattr_handler->set() operation
- * @ops:	Filesystem specific getacl and setacl callbacks
- */
-int
-generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
-		int type, const void *value, size_t size)
+static int
+generic_acl_set(struct dentry *dentry, const char *name, const void *value,
+		     size_t size, int flags, int type)
 {
+	struct inode *inode = dentry->d_inode;
 	struct posix_acl *acl = NULL;
 	int error;
 
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
 	if (!is_owner_or_cap(inode))
@@ -91,28 +86,27 @@ generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
 		error = posix_acl_valid(acl);
 		if (error)
 			goto failed;
-		switch(type) {
-			case ACL_TYPE_ACCESS:
-				mode = inode->i_mode;
-				error = posix_acl_equiv_mode(acl, &mode);
-				if (error < 0)
-					goto failed;
-				inode->i_mode = mode;
-				if (error == 0) {
-					posix_acl_release(acl);
-					acl = NULL;
-				}
-				break;
-
-			case ACL_TYPE_DEFAULT:
-				if (!S_ISDIR(inode->i_mode)) {
-					error = -EINVAL;
-					goto failed;
-				}
-				break;
+		switch (type) {
+		case ACL_TYPE_ACCESS:
+			mode = inode->i_mode;
+			error = posix_acl_equiv_mode(acl, &mode);
+			if (error < 0)
+				goto failed;
+			inode->i_mode = mode;
+			if (error == 0) {
+				posix_acl_release(acl);
+				acl = NULL;
+			}
+			break;
+		case ACL_TYPE_DEFAULT:
+			if (!S_ISDIR(inode->i_mode)) {
+				error = -EINVAL;
+				goto failed;
+			}
+			break;
 		}
 	}
-	ops->setacl(inode, type, acl);
+	set_cached_acl(inode, type, acl);
 	error = 0;
 failed:
 	posix_acl_release(acl);
@@ -121,14 +115,12 @@ failed:
 
 /**
  * generic_acl_init  -  Take care of acl inheritance at @inode create time
- * @ops:	Filesystem specific getacl and setacl callbacks
  *
  * Files created inside a directory with a default ACL inherit the
  * directory's default ACL.
  */
 int
-generic_acl_init(struct inode *inode, struct inode *dir,
-		 struct generic_acl_operations *ops)
+generic_acl_init(struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;
 	mode_t mode = inode->i_mode;
@@ -136,7 +128,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 
 	inode->i_mode = mode & ~current_umask();
 	if (!S_ISLNK(inode->i_mode))
-		acl = ops->getacl(dir, ACL_TYPE_DEFAULT);
+		acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
 	if (acl) {
 		struct posix_acl *clone;
 
@@ -145,7 +137,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 			error = -ENOMEM;
 			if (!clone)
 				goto cleanup;
-			ops->setacl(inode, ACL_TYPE_DEFAULT, clone);
+			set_cached_acl(inode, ACL_TYPE_DEFAULT, clone);
 			posix_acl_release(clone);
 		}
 		clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -156,7 +148,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
 		if (error >= 0) {
 			inode->i_mode = mode;
 			if (error > 0)
-				ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+				set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 		}
 		posix_acl_release(clone);
 	}
@@ -169,20 +161,19 @@ cleanup:
 
 /**
  * generic_acl_chmod  -  change the access acl of @inode upon chmod()
- * @ops:	FIlesystem specific getacl and setacl callbacks
  *
  * A chmod also changes the permissions of the owner, group/mask, and
  * other ACL entries.
  */
 int
-generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
+generic_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
 	int error = 0;
 
 	if (S_ISLNK(inode->i_mode))
 		return -EOPNOTSUPP;
-	acl = ops->getacl(inode, ACL_TYPE_ACCESS);
+	acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
 	if (acl) {
 		clone = posix_acl_clone(acl, GFP_KERNEL);
 		posix_acl_release(acl);
@@ -190,8 +181,37 @@ generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
 			return -ENOMEM;
 		error = posix_acl_chmod_masq(clone, inode->i_mode);
 		if (!error)
-			ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+			set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
 		posix_acl_release(clone);
 	}
 	return error;
 }
+
+int
+generic_check_acl(struct inode *inode, int mask)
+{
+	struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return error;
+	}
+	return -EAGAIN;
+}
+
+struct xattr_handler generic_acl_access_handler = {
+	.prefix = POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
+	.list	= generic_acl_list,
+	.get	= generic_acl_get,
+	.set	= generic_acl_set,
+};
+
+struct xattr_handler generic_acl_default_handler = {
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
+	.list	= generic_acl_list,
+	.get	= generic_acl_get,
+	.set	= generic_acl_set,
+};
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
index 886f5faa08cb..ca666d18ed67 100644
--- a/include/linux/generic_acl.h
+++ b/include/linux/generic_acl.h
@@ -1,36 +1,15 @@
-/*
- * include/linux/generic_acl.h
- *
- * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
- *
- * This file is released under the GPL.
- */
+#ifndef LINUX_GENERIC_ACL_H
+#define LINUX_GENERIC_ACL_H
 
-#ifndef GENERIC_ACL_H
-#define GENERIC_ACL_H
+#include <linux/xattr.h>
 
-#include <linux/posix_acl.h>
-#include <linux/posix_acl_xattr.h>
+struct inode;
 
-/**
- * struct generic_acl_operations  -  filesystem operations
- *
- * Filesystems must make these operations available to the generic
- * operations.
- */
-struct generic_acl_operations {
-	struct posix_acl *(*getacl)(struct inode *, int);
-	void (*setacl)(struct inode *, int, struct posix_acl *);
-};
+extern struct xattr_handler generic_acl_access_handler;
+extern struct xattr_handler generic_acl_default_handler;
 
-size_t generic_acl_list(struct inode *, struct generic_acl_operations *, int,
-			char *, size_t);
-int generic_acl_get(struct inode *, struct generic_acl_operations *, int,
-		    void *, size_t);
-int generic_acl_set(struct inode *, struct generic_acl_operations *, int,
-		    const void *, size_t);
-int generic_acl_init(struct inode *, struct inode *,
-		     struct generic_acl_operations *);
-int generic_acl_chmod(struct inode *, struct generic_acl_operations *);
+int generic_acl_init(struct inode *, struct inode *);
+int generic_acl_chmod(struct inode *);
+int generic_check_acl(struct inode *inode, int mask);
 
-#endif
+#endif /* LINUX_GENERIC_ACL_H */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index deee7afd8d66..e164291fb3e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -41,20 +41,4 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 extern int init_tmpfs(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 
-#ifdef CONFIG_TMPFS_POSIX_ACL
-int shmem_check_acl(struct inode *, int);
-int shmem_acl_init(struct inode *, struct inode *);
-
-extern struct xattr_handler shmem_xattr_acl_access_handler;
-extern struct xattr_handler shmem_xattr_acl_default_handler;
-
-extern struct generic_acl_operations shmem_acl_ops;
-
-#else
-static inline int shmem_acl_init(struct inode *inode, struct inode *dir)
-{
-	return 0;
-}
-#endif  /* CONFIG_TMPFS_POSIX_ACL */
-
 #endif
diff --git a/mm/Makefile b/mm/Makefile
index 82131d0f8d85..7a68d2ab5560 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
-obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_KSM) += ksm.o
diff --git a/mm/shmem.c b/mm/shmem.c
index 3cd32c2ea0a0..f8485062f3ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -41,6 +41,7 @@ static struct vfsmount *shm_mnt;
 
 #include <linux/xattr.h>
 #include <linux/exportfs.h>
+#include <linux/posix_acl.h>
 #include <linux/generic_acl.h>
 #include <linux/mman.h>
 #include <linux/string.h>
@@ -809,7 +810,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 		error = inode_setattr(inode, attr);
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	if (!error && (attr->ia_valid & ATTR_MODE))
-		error = generic_acl_chmod(inode, &shmem_acl_ops);
+		error = generic_acl_chmod(inode);
 #endif
 	if (page)
 		page_cache_release(page);
@@ -1823,11 +1824,13 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 				return error;
 			}
 		}
-		error = shmem_acl_init(inode, dir);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+		error = generic_acl_init(inode, dir);
 		if (error) {
 			iput(inode);
 			return error;
 		}
+#endif
 		if (dir->i_mode & S_ISGID) {
 			inode->i_gid = dir->i_gid;
 			if (S_ISDIR(mode))
@@ -2074,8 +2077,8 @@ static struct xattr_handler shmem_xattr_security_handler = {
 };
 
 static struct xattr_handler *shmem_xattr_handlers[] = {
-	&shmem_xattr_acl_access_handler,
-	&shmem_xattr_acl_default_handler,
+	&generic_acl_access_handler,
+	&generic_acl_default_handler,
 	&shmem_xattr_security_handler,
 	NULL
 };
@@ -2454,7 +2457,7 @@ static const struct inode_operations shmem_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 
 };
@@ -2477,7 +2480,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 };
 
@@ -2488,7 +2491,7 @@ static const struct inode_operations shmem_special_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.check_acl	= shmem_check_acl,
+	.check_acl	= generic_check_acl,
 #endif
 };
 
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
deleted file mode 100644
index f8d5330ec0d7..000000000000
--- a/mm/shmem_acl.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * mm/shmem_acl.c
- *
- * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
- *
- * This file is released under the GPL.
- */
-
-#include <linux/fs.h>
-#include <linux/shmem_fs.h>
-#include <linux/xattr.h>
-#include <linux/generic_acl.h>
-
-/**
- * shmem_get_acl  -   generic_acl_operations->getacl() operation
- */
-static struct posix_acl *
-shmem_get_acl(struct inode *inode, int type)
-{
-	struct posix_acl *acl = NULL;
-
-	spin_lock(&inode->i_lock);
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			acl = posix_acl_dup(inode->i_acl);
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			acl = posix_acl_dup(inode->i_default_acl);
-			break;
-	}
-	spin_unlock(&inode->i_lock);
-
-	return acl;
-}
-
-/**
- * shmem_set_acl  -   generic_acl_operations->setacl() operation
- */
-static void
-shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
-	struct posix_acl *free = NULL;
-
-	spin_lock(&inode->i_lock);
-	switch(type) {
-		case ACL_TYPE_ACCESS:
-			free = inode->i_acl;
-			inode->i_acl = posix_acl_dup(acl);
-			break;
-
-		case ACL_TYPE_DEFAULT:
-			free = inode->i_default_acl;
-			inode->i_default_acl = posix_acl_dup(acl);
-			break;
-	}
-	spin_unlock(&inode->i_lock);
-	posix_acl_release(free);
-}
-
-struct generic_acl_operations shmem_acl_ops = {
-	.getacl = shmem_get_acl,
-	.setacl = shmem_set_acl,
-};
-
-static size_t
-shmem_xattr_list_acl(struct dentry *dentry, char *list, size_t list_size,
-		const char *name, size_t name_len, int type)
-{
-	return generic_acl_list(dentry->d_inode, &shmem_acl_ops,
-				type, list, list_size);
-}
-
-static int
-shmem_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
-		     size_t size, int type)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_get(dentry->d_inode, &shmem_acl_ops, type,
-			       buffer, size);
-}
-
-static int
-shmem_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
-		     size_t size, int flags, int type)
-{
-	if (strcmp(name, "") != 0)
-		return -EINVAL;
-	return generic_acl_set(dentry->d_inode, &shmem_acl_ops, type,
-			       value, size);
-}
-
-struct xattr_handler shmem_xattr_acl_access_handler = {
-	.prefix = POSIX_ACL_XATTR_ACCESS,
-	.flags	= ACL_TYPE_ACCESS,
-	.list	= shmem_xattr_list_acl,
-	.get	= shmem_xattr_get_acl,
-	.set	= shmem_xattr_set_acl,
-};
-
-struct xattr_handler shmem_xattr_acl_default_handler = {
-	.prefix = POSIX_ACL_XATTR_DEFAULT,
-	.flags	= ACL_TYPE_DEFAULT,
-	.list	= shmem_xattr_list_acl,
-	.get	= shmem_xattr_get_acl,
-	.set	= shmem_xattr_set_acl,
-};
-
-/**
- * shmem_acl_init  -  Inizialize the acl(s) of a new inode
- */
-int
-shmem_acl_init(struct inode *inode, struct inode *dir)
-{
-	return generic_acl_init(inode, dir, &shmem_acl_ops);
-}
-
-/**
- * shmem_check_acl  -  check_acl() callback for generic_permission()
- */
-int
-shmem_check_acl(struct inode *inode, int mask)
-{
-	struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
-
-	if (acl) {
-		int error = posix_acl_permission(inode, acl, mask);
-		posix_acl_release(acl);
-		return error;
-	}
-	return -EAGAIN;
-}
-- 
cgit v1.2.3


From 1e431f5ce78f3ae8254d725060288b78ff74f086 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 3 Nov 2009 16:44:53 +0100
Subject: cleanup blockdev_direct_IO locking

Currently the locking in blockdev_direct_IO is a mess, we have three different
locking types and very confusing checks for some of them.  The most
complicated one is DIO_OWN_LOCKING for reads, which happens to not actually be
used.

This patch gets rid of the DIO_OWN_LOCKING - as mentioned above the read case
is unused anyway, and the write side is almost identical to DIO_NO_LOCKING.
The difference is that DIO_NO_LOCKING always sets the create argument for
the get_blocks callback to zero, but we can easily move that to the actual
get_blocks callbacks.  There are four users of the DIO_NO_LOCKING mode:
gfs already ignores the create argument and thus is fine with the new
version, ocfs2 only errors out if create were ever set, and we can remove
this dead code now, the block device code only ever uses create for an
error message if we are fully beyond the device which can never happen,
and last but not least XFS will need the new behavour for writes.

Now we can replace the lock_type variable with a flags one, where no flag
means the DIO_NO_LOCKING behaviour and DIO_LOCKING is kept as the first
flag.  Separate out the check for not allowing to fill holes into a separate
flag, although for now both flags always get set at the same time.

Also revamp the documentation of the locking scheme to actually make sense.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c              | 129 ++++++++++++++++++--------------------------
 fs/ocfs2/aops.c             |  34 ++----------
 fs/xfs/linux-2.6/xfs_aops.c |  20 +++----
 include/linux/fs.h          |  22 +++-----
 4 files changed, 71 insertions(+), 134 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b912270942fa..7dde0df8e8b6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
  *
  * If blkfactor is zero then the user's request was aligned to the filesystem's
  * blocksize.
- *
- * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
- * This determines whether we need to do the fancy locking which prevents
- * direct-IO from being able to read uninitialised disk blocks.  If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
- * not held for the entire direct write (taken briefly, initially, during a
- * direct read though, but its never held for the duration of a direct-IO).
  */
 
 struct dio {
@@ -68,7 +61,7 @@ struct dio {
 	struct inode *inode;
 	int rw;
 	loff_t i_size;			/* i_size when submitted */
-	int lock_type;			/* doesn't change */
+	int flags;			/* doesn't change */
 	unsigned blkbits;		/* doesn't change */
 	unsigned blkfactor;		/* When we're using an alignment which
 					   is finer than the filesystem's soft
@@ -240,7 +233,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
 	if (dio->end_io && dio->result)
 		dio->end_io(dio->iocb, offset, transferred,
 			    dio->map_bh.b_private);
-	if (dio->lock_type == DIO_LOCKING)
+
+	if (dio->flags & DIO_LOCKING)
 		/* lockdep: non-owner release */
 		up_read_non_owner(&dio->inode->i_alloc_sem);
 
@@ -515,21 +509,24 @@ static int get_more_blocks(struct dio *dio)
 		map_bh->b_state = 0;
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
 
+		/*
+		 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
+		 * forbid block creations: only overwrites are permitted.
+		 * We will return early to the caller once we see an
+		 * unmapped buffer head returned, and the caller will fall
+		 * back to buffered I/O.
+		 *
+		 * Otherwise the decision is left to the get_blocks method,
+		 * which may decide to handle it or also return an unmapped
+		 * buffer head.
+		 */
 		create = dio->rw & WRITE;
-		if (dio->lock_type == DIO_LOCKING) {
+		if (dio->flags & DIO_SKIP_HOLES) {
 			if (dio->block_in_file < (i_size_read(dio->inode) >>
 							dio->blkbits))
 				create = 0;
-		} else if (dio->lock_type == DIO_NO_LOCKING) {
-			create = 0;
 		}
 
-		/*
-		 * For writes inside i_size we forbid block creations: only
-		 * overwrites are permitted.  We fall back to buffered writes
-		 * at a higher level for inside-i_size block-instantiating
-		 * writes.
-		 */
 		ret = (*dio->get_block)(dio->inode, fs_startblk,
 						map_bh, create);
 	}
@@ -1039,7 +1036,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	 * we can let i_mutex go now that its achieved its purpose
 	 * of protecting us from looking up uninitialized blocks.
 	 */
-	if ((rw == READ) && (dio->lock_type == DIO_LOCKING))
+	if (rw == READ && (dio->flags & DIO_LOCKING))
 		mutex_unlock(&dio->inode->i_mutex);
 
 	/*
@@ -1086,30 +1083,28 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 
 /*
  * This is a library function for use by filesystem drivers.
- * The locking rules are governed by the dio_lock_type parameter.
  *
- * DIO_NO_LOCKING (no locking, for raw block device access)
- * For writes, i_mutex is not held on entry; it is never taken.
+ * The locking rules are governed by the flags parameter:
+ *  - if the flags value contains DIO_LOCKING we use a fancy locking
+ *    scheme for dumb filesystems.
+ *    For writes this function is called under i_mutex and returns with
+ *    i_mutex held, for reads, i_mutex is not held on entry, but it is
+ *    taken and dropped again before returning.
+ *    For reads and writes i_alloc_sem is taken in shared mode and released
+ *    on I/O completion (which may happen asynchronously after returning to
+ *    the caller).
  *
- * DIO_LOCKING (simple locking for regular files)
- * For writes we are called under i_mutex and return with i_mutex held, even
- * though it is internally dropped.
- * For reads, i_mutex is not held on entry, but it is taken and dropped before
- * returning.
- *
- * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
- *	uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_mutex, return without it, never touch it.
- * For reads we are called under i_mutex and return with i_mutex held, even
- * though it may be internally dropped.
- *
- * Additional i_alloc_sem locking requirements described inline below.
+ *  - if the flags value does NOT contain DIO_LOCKING we don't use any
+ *    internal locking but rather rely on the filesystem to synchronize
+ *    direct I/O reads/writes versus each other and truncate.
+ *    For reads and writes both i_mutex and i_alloc_sem are not held on
+ *    entry and are never taken.
  */
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-	int dio_lock_type)
+	int flags)
 {
 	int seg;
 	size_t size;
@@ -1120,8 +1115,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	ssize_t retval = -EINVAL;
 	loff_t end = offset;
 	struct dio *dio;
-	int release_i_mutex = 0;
-	int acquire_i_mutex = 0;
 
 	if (rw & WRITE)
 		rw = WRITE_ODIRECT_PLUG;
@@ -1156,43 +1149,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (!dio)
 		goto out;
 
-	/*
-	 * For block device access DIO_NO_LOCKING is used,
-	 *	neither readers nor writers do any locking at all
-	 * For regular files using DIO_LOCKING,
-	 *	readers need to grab i_mutex and i_alloc_sem
-	 *	writers need to grab i_alloc_sem only (i_mutex is already held)
-	 * For regular files using DIO_OWN_LOCKING,
-	 *	neither readers nor writers take any locks here
-	 */
-	dio->lock_type = dio_lock_type;
-	if (dio_lock_type != DIO_NO_LOCKING) {
+	dio->flags = flags;
+	if (dio->flags & DIO_LOCKING) {
 		/* watch out for a 0 len io from a tricksy fs */
 		if (rw == READ && end > offset) {
-			struct address_space *mapping;
+			struct address_space *mapping =
+					iocb->ki_filp->f_mapping;
 
-			mapping = iocb->ki_filp->f_mapping;
-			if (dio_lock_type != DIO_OWN_LOCKING) {
-				mutex_lock(&inode->i_mutex);
-				release_i_mutex = 1;
-			}
+			/* will be released by direct_io_worker */
+			mutex_lock(&inode->i_mutex);
 
 			retval = filemap_write_and_wait_range(mapping, offset,
 							      end - 1);
 			if (retval) {
+				mutex_unlock(&inode->i_mutex);
 				kfree(dio);
 				goto out;
 			}
-
-			if (dio_lock_type == DIO_OWN_LOCKING) {
-				mutex_unlock(&inode->i_mutex);
-				acquire_i_mutex = 1;
-			}
 		}
 
-		if (dio_lock_type == DIO_LOCKING)
-			/* lockdep: not the owner will release it */
-			down_read_non_owner(&inode->i_alloc_sem);
+		/*
+		 * Will be released at I/O completion, possibly in a
+		 * different thread.
+		 */
+		down_read_non_owner(&inode->i_alloc_sem);
 	}
 
 	/*
@@ -1210,24 +1190,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
-	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
-	 * it's own meaner.
+	 *
+	 * NOTE: filesystems with their own locking have to handle this
+	 * on their own.
 	 */
-	if (unlikely(retval < 0 && (rw & WRITE))) {
-		loff_t isize = i_size_read(inode);
-
-		if (end > isize && dio_lock_type == DIO_LOCKING)
-			vmtruncate(inode, isize);
+	if (dio->flags & DIO_LOCKING) {
+		if (unlikely((rw & WRITE) && retval < 0)) {
+			loff_t isize = i_size_read(inode);
+			if (end > isize )
+				vmtruncate(inode, isize);
+		}
 	}
 
-	if (rw == READ && dio_lock_type == DIO_LOCKING)
-		release_i_mutex = 0;
-
 out:
-	if (release_i_mutex)
-		mutex_unlock(&inode->i_mutex);
-	else if (acquire_i_mutex)
-		mutex_lock(&inode->i_mutex);
 	return retval;
 }
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index deb2b132ae5e..3dae4a13f6e4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
  *
  * called like this: dio->get_blocks(dio->inode, fs_startblk,
  * 					fs_count, map_bh, dio->rw == WRITE);
+ *
+ * Note that we never bother to allocate blocks here, and thus ignore the
+ * create argument.
  */
 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 				     struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 
 	inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
 
-	/*
-	 * Any write past EOF is not allowed because we'd be extending.
-	 */
-	if (create && (iblock + max_blocks) > inode_blocks) {
-		ret = -EIO;
-		goto bail;
-	}
-
 	/* This figures out the size of the next contiguous block, and
 	 * our logical offset */
 	ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 		goto bail;
 	}
 
-	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
-		ocfs2_error(inode->i_sb,
-			    "Inode %llu has a hole at block %llu\n",
-			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
-			    (unsigned long long)iblock);
-		ret = -EROFS;
-		goto bail;
-	}
-
 	/* We should already CoW the refcounted extent. */
 	BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
 	/*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
 	 */
 	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
 		map_bh(bh_result, inode->i_sb, p_blkno);
-	else {
-		/*
-		 * ocfs2_prepare_inode_for_write() should have caught
-		 * the case where we'd be filling a hole and triggered
-		 * a buffered write instead.
-		 */
-		if (create) {
-			ret = -EIO;
-			mlog_errno(ret);
-			goto bail;
-		}
-
+	else
 		clear_buffer_mapped(bh_result);
-	}
 
 	/* make sure we don't map more than max_blocks blocks here as
 	   that's all the kernel will handle at this point. */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d798c54296eb..66abe36c1213 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1474,19 +1474,13 @@ xfs_vm_direct_IO(
 
 	bdev = xfs_find_bdev_for_inode(XFS_I(inode));
 
-	if (rw == WRITE) {
-		iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
-		ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
-			bdev, iov, offset, nr_segs,
-			xfs_get_blocks_direct,
-			xfs_end_io_direct);
-	} else {
-		iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
-			bdev, iov, offset, nr_segs,
-			xfs_get_blocks_direct,
-			xfs_end_io_direct);
-	}
+	iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
+					IOMAP_UNWRITTEN : IOMAP_READ);
+
+	ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    xfs_end_io_direct);
 
 	if (unlikely(ret != -EIOCBQUEUED && iocb->private))
 		xfs_destroy_ioend(iocb->private);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdc23be4edde..7c8ff12d1995 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2263,9 +2263,11 @@ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	int lock_type);
 
 enum {
-	DIO_LOCKING = 1, /* need locking between buffered and direct access */
-	DIO_NO_LOCKING,  /* bdev; no locking at all between buffered/direct */
-	DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */
+	/* need locking between buffered and direct access */
+	DIO_LOCKING	= 0x01,
+
+	/* filesystem does not support filling holes */
+	DIO_SKIP_HOLES	= 0x02,
 };
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
@@ -2274,7 +2276,8 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_LOCKING);
+				    nr_segs, get_block, end_io,
+				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 
 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
@@ -2283,16 +2286,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
 	dio_iodone_t end_io)
 {
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_NO_LOCKING);
-}
-
-static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
-	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs, get_block_t get_block,
-	dio_iodone_t end_io)
-{
-	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
+				nr_segs, get_block, end_io, 0);
 }
 #endif
 
-- 
cgit v1.2.3


From efbbd05a595343a413964ad85a2ad359b7b7efbd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:40 +0100
Subject: sched: Add pre and post wakeup hooks

As will be apparent in the next patch, we need a pre wakeup hook
for sched_fair task migration, hence rename the post wakeup hook
and one pre wakeup.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.114746117@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  3 ++-
 kernel/sched.c        | 12 ++++++++----
 kernel/sched_rt.c     |  4 ++--
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c858f38e81a..2c9fa1ccebff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1091,7 +1091,8 @@ struct sched_class {
 			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
-	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
 				 const struct cpumask *newmask);
diff --git a/kernel/sched.c b/kernel/sched.c
index 297dc441ff96..6c571bdd5658 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2412,6 +2412,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible--;
 	p->state = TASK_WAKING;
+
+	if (p->sched_class->task_waking)
+		p->sched_class->task_waking(rq, p);
+
 	__task_rq_unlock(rq);
 
 	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
@@ -2475,8 +2479,8 @@ out_running:
 
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_wake_up)
-		p->sched_class->task_wake_up(rq, p);
+	if (p->sched_class->task_woken)
+		p->sched_class->task_woken(rq, p);
 
 	if (unlikely(rq->idle_stamp)) {
 		u64 delta = rq->clock - rq->idle_stamp;
@@ -2666,8 +2670,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	trace_sched_wakeup_new(rq, p, 1);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_wake_up)
-		p->sched_class->task_wake_up(rq, p);
+	if (p->sched_class->task_woken)
+		p->sched_class->task_woken(rq, p);
 #endif
 	task_rq_unlock(rq, &flags);
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d2ea2828164e..f48328ac216f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1472,7 +1472,7 @@ static void post_schedule_rt(struct rq *rq)
  * If we are not running and we are not going to reschedule soon, we should
  * try to push tasks away now
  */
-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
@@ -1753,7 +1753,7 @@ static const struct sched_class rt_sched_class = {
 	.rq_offline             = rq_offline_rt,
 	.pre_schedule		= pre_schedule_rt,
 	.post_schedule		= post_schedule_rt,
-	.task_wake_up		= task_wake_up_rt,
+	.task_woken		= task_woken_rt,
 	.switched_from		= switched_from_rt,
 #endif
 
-- 
cgit v1.2.3


From 88ec22d3edb72b261f8628226cd543589a6d5e1b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Dec 2009 18:04:41 +0100
Subject: sched: Remove the cfs_rq dependency from set_task_cpu()

In order to remove the cfs_rq dependency from set_task_cpu() we
need to ensure the task is cfs_rq invariant for all callsites.

The simple approach is to substract cfs_rq->min_vruntime from
se->vruntime on dequeue, and add cfs_rq->min_vruntime on
enqueue.

However, this has the downside of breaking FAIR_SLEEPERS since
we loose the old vruntime as we only maintain the relative
position.

To solve this, we observe that we only migrate runnable tasks,
we do this using deactivate_task(.sleep=0) and
activate_task(.wakeup=0), therefore we can restrain the
min_vruntime invariance to that state.

The only other case is wakeup balancing, since we want to
maintain the old vruntime we cannot make it relative on dequeue,
but since we don't migrate inactive tasks, we can do so right
before we activate it again.

This is where we need the new pre-wakeup hook, we need to call
this while still holding the old rq->lock. We could fold it into
->select_task_rq(), but since that has multiple callsites and
would obfuscate the locking requirements, that seems like a
fudge.

This leaves the fork() case, simply make sure that ->task_fork()
leaves the ->vruntime in a relative state.

This covers all cases where set_task_cpu() gets called, and
ensures it sees a relative vruntime.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170518.191697025@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        |  6 +-----
 kernel/sched_fair.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 46 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c9fa1ccebff..973b2b89f86d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1116,7 +1116,7 @@ struct sched_class {
 					 struct task_struct *task);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	void (*moved_group) (struct task_struct *p);
+	void (*moved_group) (struct task_struct *p, int on_rq);
 #endif
 };
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 6c571bdd5658..f92ce63edfff 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2038,8 +2038,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 	int old_cpu = task_cpu(p);
-	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
-		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
 
 #ifdef CONFIG_SCHED_DEBUG
 	/*
@@ -2056,8 +2054,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
 				     1, 1, NULL, 0);
 	}
-	p->se.vruntime -= old_cfsrq->min_vruntime -
-					 new_cfsrq->min_vruntime;
 
 	__set_task_cpu(p, new_cpu);
 }
@@ -10102,7 +10098,7 @@ void sched_move_task(struct task_struct *tsk)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->moved_group)
-		tsk->sched_class->moved_group(tsk);
+		tsk->sched_class->moved_group(tsk, on_rq);
 #endif
 
 	if (unlikely(running))
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ec1d2715620c..42ac3c9f66f6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	curr->sum_exec_runtime += delta_exec;
 	schedstat_add(cfs_rq, exec_clock, delta_exec);
 	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
+
 	curr->vruntime += delta_exec_weighted;
 	update_min_vruntime(cfs_rq);
 }
@@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	se->vruntime = vruntime;
 }
 
+#define ENQUEUE_WAKEUP	1
+#define ENQUEUE_MIGRATE 2
+
 static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+	/*
+	 * Update the normalized vruntime before updating min_vruntime
+	 * through callig update_curr().
+	 */
+	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+		se->vruntime += cfs_rq->min_vruntime;
+
 	/*
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
 	account_entity_enqueue(cfs_rq, se);
 
-	if (wakeup) {
+	if (flags & ENQUEUE_WAKEUP) {
 		place_entity(cfs_rq, se, 0);
 		enqueue_sleeper(cfs_rq, se);
 	}
@@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 		__dequeue_entity(cfs_rq, se);
 	account_entity_dequeue(cfs_rq, se);
 	update_min_vruntime(cfs_rq);
+
+	/*
+	 * Normalize the entity after updating the min_vruntime because the
+	 * update can refer to the ->curr item and we need to reflect this
+	 * movement in our normalized position.
+	 */
+	if (!sleep)
+		se->vruntime -= cfs_rq->min_vruntime;
 }
 
 /*
@@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
+	int flags = 0;
+
+	if (wakeup)
+		flags |= ENQUEUE_WAKEUP;
+	if (p->state == TASK_WAKING)
+		flags |= ENQUEUE_MIGRATE;
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
 			break;
 		cfs_rq = cfs_rq_of(se);
-		enqueue_entity(cfs_rq, se, wakeup);
-		wakeup = 1;
+		enqueue_entity(cfs_rq, se, flags);
+		flags = ENQUEUE_WAKEUP;
 	}
 
 	hrtick_update(rq);
@@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
 
 #ifdef CONFIG_SMP
 
+static void task_waking_fair(struct rq *rq, struct task_struct *p)
+{
+	struct sched_entity *se = &p->se;
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	se->vruntime -= cfs_rq->min_vruntime;
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * effective_load() calculates the load change as seen from the root_task_group
@@ -1978,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p)
 		resched_task(rq->curr);
 	}
 
+	se->vruntime -= cfs_rq->min_vruntime;
+
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
@@ -2031,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void moved_group_fair(struct task_struct *p)
+static void moved_group_fair(struct task_struct *p, int on_rq)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
 
 	update_curr(cfs_rq);
-	place_entity(cfs_rq, &p->se, 1);
+	if (!on_rq)
+		place_entity(cfs_rq, &p->se, 1);
 }
 #endif
 
@@ -2076,6 +2112,8 @@ static const struct sched_class fair_sched_class = {
 	.move_one_task		= move_one_task_fair,
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
+
+	.task_waking		= task_waking_fair,
 #endif
 
 	.set_curr_task          = set_curr_task_fair,
-- 
cgit v1.2.3


From 70023de88c58a81a730ab4d13c51a30e537ec76e Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:04:28 +0800
Subject: ACPI: Add a generic API for _OSC -v2

v2->v1:
.improve debug info as suggedted by Bjorn,Kenji
.API is using uuid string as suggested by Alexey

Add an API to execute _OSC. A lot of devices can have this method, so add a
generic API.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c   | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h |   9 ++++
 2 files changed, 131 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 741191524353..12240be58f27 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -344,6 +344,128 @@ bool acpi_bus_can_wakeup(acpi_handle handle)
 
 EXPORT_SYMBOL(acpi_bus_can_wakeup);
 
+static void acpi_print_osc_error(acpi_handle handle,
+	struct acpi_osc_context *context, char *error)
+{
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER};
+	int i;
+
+	if (ACPI_FAILURE(acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer)))
+		printk(KERN_DEBUG "%s\n", error);
+	else {
+		printk(KERN_DEBUG "%s:%s\n", (char *)buffer.pointer, error);
+		kfree(buffer.pointer);
+	}
+	printk(KERN_DEBUG"_OSC request data:");
+	for (i = 0; i < context->cap.length; i += sizeof(u32))
+		printk("%x ", *((u32 *)(context->cap.pointer + i)));
+	printk("\n");
+}
+
+static u8 hex_val(unsigned char c)
+{
+	return isdigit(c) ? c - '0' : toupper(c) - 'A' + 10;
+}
+
+static acpi_status acpi_str_to_uuid(char *str, u8 *uuid)
+{
+	int i;
+	static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21,
+		24, 26, 28, 30, 32, 34};
+
+	if (strlen(str) != 36)
+		return AE_BAD_PARAMETER;
+	for (i = 0; i < 36; i++) {
+		if (i == 8 || i == 13 || i == 18 || i == 23) {
+			if (str[i] != '-')
+				return AE_BAD_PARAMETER;
+		} else if (!isxdigit(str[i]))
+			return AE_BAD_PARAMETER;
+	}
+	for (i = 0; i < 16; i++) {
+		uuid[i] = hex_val(str[opc_map_to_uuid[i]]) << 4;
+		uuid[i] |= hex_val(str[opc_map_to_uuid[i] + 1]);
+	}
+	return AE_OK;
+}
+
+acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context)
+{
+	acpi_status status;
+	struct acpi_object_list input;
+	union acpi_object in_params[4];
+	union acpi_object *out_obj;
+	u8 uuid[16];
+	u32 errors;
+
+	if (!context)
+		return AE_ERROR;
+	if (ACPI_FAILURE(acpi_str_to_uuid(context->uuid_str, uuid)))
+		return AE_ERROR;
+	context->ret.length = ACPI_ALLOCATE_BUFFER;
+	context->ret.pointer = NULL;
+
+	/* Setting up input parameters */
+	input.count = 4;
+	input.pointer = in_params;
+	in_params[0].type 		= ACPI_TYPE_BUFFER;
+	in_params[0].buffer.length 	= 16;
+	in_params[0].buffer.pointer	= uuid;
+	in_params[1].type 		= ACPI_TYPE_INTEGER;
+	in_params[1].integer.value 	= context->rev;
+	in_params[2].type 		= ACPI_TYPE_INTEGER;
+	in_params[2].integer.value	= context->cap.length/sizeof(u32);
+	in_params[3].type		= ACPI_TYPE_BUFFER;
+	in_params[3].buffer.length 	= context->cap.length;
+	in_params[3].buffer.pointer 	= context->cap.pointer;
+
+	status = acpi_evaluate_object(handle, "_OSC", &input, &context->ret);
+	if (ACPI_FAILURE(status))
+		return status;
+
+	/* return buffer should have the same length as cap buffer */
+	if (context->ret.length != context->cap.length)
+		return AE_NULL_OBJECT;
+
+	out_obj = context->ret.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		acpi_print_osc_error(handle, context,
+			"_OSC evaluation returned wrong type");
+		status = AE_TYPE;
+		goto out_kfree;
+	}
+	/* Need to ignore the bit0 in result code */
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		if (errors & OSC_REQUEST_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC request failed");
+		if (errors & OSC_INVALID_UUID_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC invalid UUID");
+		if (errors & OSC_INVALID_REVISION_ERROR)
+			acpi_print_osc_error(handle, context,
+				"_OSC invalid revision");
+		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
+			if (((u32 *)context->cap.pointer)[OSC_QUERY_TYPE]
+			    & OSC_QUERY_ENABLE)
+				goto out_success;
+			status = AE_SUPPORT;
+			goto out_kfree;
+		}
+		status = AE_ERROR;
+		goto out_kfree;
+	}
+out_success:
+	return AE_OK;
+
+out_kfree:
+	kfree(context->ret.pointer);
+	context->ret.pointer = NULL;
+	return status;
+}
+EXPORT_SYMBOL(acpi_run_osc);
+
 /* --------------------------------------------------------------------------
                                 Event Management
    -------------------------------------------------------------------------- */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dfcd920c3e54..3247e09db20d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -253,6 +253,13 @@ void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
 #endif /* CONFIG_PM_SLEEP */
 
+struct acpi_osc_context {
+	char *uuid_str; /* uuid string */
+	int rev;
+	struct acpi_buffer cap; /* arg2/arg3 */
+	struct acpi_buffer ret; /* free by caller if success */
+};
+
 #define OSC_QUERY_TYPE			0
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
@@ -265,6 +272,8 @@ void __init acpi_s4_no_nvs(void);
 #define OSC_INVALID_REVISION_ERROR	8
 #define OSC_CAPABILITIES_MASK_ERROR	16
 
+acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
+
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
 #define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
-- 
cgit v1.2.3


From 3a9622dc4659af44a8098a233f65c51e495ff0a5 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:04:50 +0800
Subject: ACPI: cleanup pci_root _OSC code.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/pci_root.c | 76 +++++++++----------------------------------------
 include/linux/acpi.h    |  5 ++--
 2 files changed, 17 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 1af808171d46..101cce3681d1 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -202,72 +202,24 @@ static void acpi_pci_bridge_scan(struct acpi_device *device)
 		}
 }
 
-static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40,
-			  0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66};
+static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766";
 
 static acpi_status acpi_pci_run_osc(acpi_handle handle,
 				    const u32 *capbuf, u32 *retval)
 {
+	struct acpi_osc_context context = {
+		.uuid_str = pci_osc_uuid_str,
+		.rev = 1,
+		.cap.length = 12,
+		.cap.pointer = (void *)capbuf,
+	};
 	acpi_status status;
-	struct acpi_object_list input;
-	union acpi_object in_params[4];
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object *out_obj;
-	u32 errors;
-
-	/* Setting up input parameters */
-	input.count = 4;
-	input.pointer = in_params;
-	in_params[0].type 		= ACPI_TYPE_BUFFER;
-	in_params[0].buffer.length 	= 16;
-	in_params[0].buffer.pointer	= OSC_UUID;
-	in_params[1].type 		= ACPI_TYPE_INTEGER;
-	in_params[1].integer.value 	= 1;
-	in_params[2].type 		= ACPI_TYPE_INTEGER;
-	in_params[2].integer.value	= 3;
-	in_params[3].type		= ACPI_TYPE_BUFFER;
-	in_params[3].buffer.length 	= 12;
-	in_params[3].buffer.pointer 	= (u8 *)capbuf;
-
-	status = acpi_evaluate_object(handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return status;
 
-	if (!output.length)
-		return AE_NULL_OBJECT;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		printk(KERN_DEBUG "_OSC evaluation returned wrong type\n");
-		status = AE_TYPE;
-		goto out_kfree;
-	}
-	/* Need to ignore the bit0 in result code */
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		if (errors & OSC_REQUEST_ERROR)
-			printk(KERN_DEBUG "_OSC request failed\n");
-		if (errors & OSC_INVALID_UUID_ERROR)
-			printk(KERN_DEBUG "_OSC invalid UUID\n");
-		if (errors & OSC_INVALID_REVISION_ERROR)
-			printk(KERN_DEBUG "_OSC invalid revision\n");
-		if (errors & OSC_CAPABILITIES_MASK_ERROR) {
-			if (capbuf[OSC_QUERY_TYPE] & OSC_QUERY_ENABLE)
-				goto out_success;
-			printk(KERN_DEBUG
-			       "Firmware did not grant requested _OSC control\n");
-			status = AE_SUPPORT;
-			goto out_kfree;
-		}
-		status = AE_ERROR;
-		goto out_kfree;
+	status = acpi_run_osc(handle, &context);
+	if (ACPI_SUCCESS(status)) {
+		*retval = *((u32 *)(context.ret.pointer + 8));
+		kfree(context.ret.pointer);
 	}
-out_success:
-	*retval = *((u32 *)(out_obj->buffer.pointer + 8));
-	status = AE_OK;
-
-out_kfree:
-	kfree(output.pointer);
 	return status;
 }
 
@@ -277,10 +229,10 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags)
 	u32 support_set, result, capbuf[3];
 
 	/* do _OSC query for all possible controls */
-	support_set = root->osc_support_set | (flags & OSC_SUPPORT_MASKS);
+	support_set = root->osc_support_set | (flags & OSC_PCI_SUPPORT_MASKS);
 	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
 	capbuf[OSC_SUPPORT_TYPE] = support_set;
-	capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS;
+	capbuf[OSC_CONTROL_TYPE] = OSC_PCI_CONTROL_MASKS;
 
 	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
 	if (ACPI_SUCCESS(status)) {
@@ -427,7 +379,7 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags)
 	if (ACPI_FAILURE(status))
 		return status;
 
-	control_req = (flags & OSC_CONTROL_MASKS);
+	control_req = (flags & OSC_PCI_CONTROL_MASKS);
 	if (!control_req)
 		return AE_TYPE;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3247e09db20d..535beecc37cf 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -263,7 +263,6 @@ struct acpi_osc_context {
 #define OSC_QUERY_TYPE			0
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
-#define OSC_SUPPORT_MASKS		0x1f
 
 /* _OSC DW0 Definition */
 #define OSC_QUERY_ENABLE		1
@@ -274,12 +273,14 @@ struct acpi_osc_context {
 
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
+/* PCI defined _OSC bits */
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
 #define OSC_ACTIVE_STATE_PWR_SUPPORT 		2
 #define OSC_CLOCK_PWR_CAPABILITY_SUPPORT	4
 #define OSC_PCI_SEGMENT_GROUPS_SUPPORT		8
 #define OSC_MSI_SUPPORT				16
+#define OSC_PCI_SUPPORT_MASKS			0x1f
 
 /* _OSC DW1 Definition (OS Control Fields) */
 #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	1
@@ -288,7 +289,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_PCI_EXPRESS_AER_CONTROL		8
 #define OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL	16
 
-#define OSC_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
+#define OSC_PCI_CONTROL_MASKS 	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | 	\
 				OSC_SHPC_NATIVE_HP_CONTROL | 		\
 				OSC_PCI_EXPRESS_PME_CONTROL |		\
 				OSC_PCI_EXPRESS_AER_CONTROL |		\
-- 
cgit v1.2.3


From 3563ff964fdc36358cef0330936fdac28e65142a Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 29 Oct 2009 11:05:05 +0800
Subject: ACPI: Add platform-wide _OSC support.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c   | 26 ++++++++++++++++++++++++++
 include/linux/acpi.h |  7 +++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 12240be58f27..65f7e335f122 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -466,6 +466,30 @@ out_kfree:
 }
 EXPORT_SYMBOL(acpi_run_osc);
 
+static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
+static void acpi_bus_osc_support(void)
+{
+	u32 capbuf[2];
+	struct acpi_osc_context context = {
+		.uuid_str = sb_uuid_str,
+		.rev = 1,
+		.cap.length = 8,
+		.cap.pointer = capbuf,
+	};
+	acpi_handle handle;
+
+	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_TYPE] = OSC_SB_PR3_SUPPORT; /* _PR3 is in use */
+#ifdef CONFIG_ACPI_PROCESSOR_AGGREGATOR
+	capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PAD_SUPPORT;
+#endif
+	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
+		return;
+	if (ACPI_SUCCESS(acpi_run_osc(handle, &context)))
+		kfree(context.ret.pointer);
+	/* do we need to check the returned cap? Sounds no */
+}
+
 /* --------------------------------------------------------------------------
                                 Event Management
    -------------------------------------------------------------------------- */
@@ -856,6 +880,8 @@ static int __init acpi_bus_init(void)
 	status = acpi_ec_ecdt_probe();
 	/* Ignore result. Not having an ECDT is not fatal. */
 
+	acpi_bus_osc_support();
+
 	status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION);
 	if (ACPI_FAILURE(status)) {
 		printk(KERN_ERR PREFIX "Unable to initialize ACPI objects\n");
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 535beecc37cf..e11090d462d2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -273,6 +273,13 @@ struct acpi_osc_context {
 
 acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 
+/* platform-wide _OSC bits */
+#define OSC_SB_PAD_SUPPORT		1
+#define OSC_SB_PPC_OST_SUPPORT		2
+#define OSC_SB_PR3_SUPPORT		4
+#define OSC_SB_CPUHP_OST_SUPPORT	8
+#define OSC_SB_APEI_SUPPORT		16
+
 /* PCI defined _OSC bits */
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
-- 
cgit v1.2.3


From 2ee1abad73a12df5521cd3f017f081f1f684a361 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dgc@sgi.com>
Date: Tue, 24 Nov 2009 18:03:15 +0000
Subject: xfs: improve metadata I/O merging in the elevator

Change all async metadata buffers to use [READ|WRITE]_META I/O types
so that the I/O doesn't get issued immediately. This allows merging of
adjacent metadata requests but still prioritises them over bulk data.
This shows a 10-15% improvement in sequential create speed of small
files.

Don't include the log buffers in this classification - leave them as
sync types so they are issued immediately.

Signed-off-by: Dave Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 6 +++++-
 fs/xfs/linux-2.6/xfs_buf.h | 1 +
 fs/xfs/xfs_log.c           | 2 ++
 include/linux/fs.h         | 1 +
 4 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b4c7d4248aac..162359b664ca 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1149,10 +1149,14 @@ _xfs_buf_ioapply(
 	if (bp->b_flags & XBF_ORDERED) {
 		ASSERT(!(bp->b_flags & XBF_READ));
 		rw = WRITE_BARRIER;
-	} else if (bp->b_flags & _XBF_RUN_QUEUES) {
+	} else if (bp->b_flags & XBF_LOG_BUFFER) {
 		ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
 		bp->b_flags &= ~_XBF_RUN_QUEUES;
 		rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+	} else if (bp->b_flags & _XBF_RUN_QUEUES) {
+		ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+		bp->b_flags &= ~_XBF_RUN_QUEUES;
+		rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
 	} else {
 		rw = (bp->b_flags & XBF_WRITE) ? WRITE :
 		     (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a509f4addc2a..a34c7b54822d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
 	XBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
  	XBF_ORDERED = (1 << 11),    /* use ordered writes		   */
 	XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead		   */
+	XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log   */
 
 	/* flags used only as arguments to access routines */
 	XBF_LOCK = (1 << 14),       /* lock requested			   */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 4cb1792040e3..600b5b06aaeb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1441,6 +1441,7 @@ xlog_sync(xlog_t		*log,
 	XFS_BUF_ZEROFLAGS(bp);
 	XFS_BUF_BUSY(bp);
 	XFS_BUF_ASYNC(bp);
+	bp->b_flags |= XBF_LOG_BUFFER;
 	/*
 	 * Do an ordered write for the log block.
 	 * Its unnecessary to flush the first split block in the log wrap case.
@@ -1478,6 +1479,7 @@ xlog_sync(xlog_t		*log,
 		XFS_BUF_ZEROFLAGS(bp);
 		XFS_BUF_BUSY(bp);
 		XFS_BUF_ASYNC(bp);
+		bp->b_flags |= XBF_LOG_BUFFER;
 		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
 			XFS_BUF_ORDERED(bp);
 		dptr = XFS_BUF_PTR(bp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b23a7018eb90..cf7fc8a7fe6a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -152,6 +152,7 @@ struct inodes_stat_t {
 #define WRITE_SYNC_PLUG	(WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define WRITE_SYNC	(WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
 #define WRITE_ODIRECT_PLUG	(WRITE | (1 << BIO_RW_SYNCIO))
+#define WRITE_META	(WRITE | (1 << BIO_RW_META))
 #define SWRITE_SYNC_PLUG	\
 			(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define SWRITE_SYNC	(SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-- 
cgit v1.2.3


From 6e1415467614e854fee660ff6648bd10fa976e95 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 15 Dec 2009 19:27:45 +0000
Subject: NOMMU: Optimise away the {dac_,}mmap_min_addr tests

In NOMMU mode clamp dac_mmap_min_addr to zero to cause the tests on it to be
skipped by the compiler.  We do this as the minimum mmap address doesn't make
any sense in NOMMU mode.

mmap_min_addr and round_hint_to_min() can be discarded entirely in NOMMU mode.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 7 +++++++
 kernel/sysctl.c          | 2 ++
 mm/Kconfig               | 1 +
 security/Makefile        | 3 ++-
 4 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 466cbadbd1ef..2c627d361c02 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,8 +95,13 @@ struct seq_file;
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
+#ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
+#else
+#define dac_mmap_min_addr	0UL
+#endif
+
 /*
  * Values used in the task_security_ops calls
  */
@@ -121,6 +126,7 @@ struct request_sock;
 #define LSM_UNSAFE_PTRACE	2
 #define LSM_UNSAFE_PTRACE_CAP	4
 
+#ifdef CONFIG_MMU
 /*
  * If a hint addr is less than mmap_min_addr change hint to be as
  * low as possible but still greater than mmap_min_addr
@@ -135,6 +141,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
 }
 extern int mmap_min_addr_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
 
 #ifdef CONFIG_SECURITY
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 45e4bef0012a..856a24eadf7e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1214,6 +1214,7 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 #endif
+#ifdef CONFIG_MMU
 	{
 		.procname	= "mmap_min_addr",
 		.data		= &dac_mmap_min_addr,
@@ -1221,6 +1222,7 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= mmap_min_addr_handler,
 	},
+#endif
 #ifdef CONFIG_NUMA
 	{
 		.procname	= "numa_zonelist_order",
diff --git a/mm/Kconfig b/mm/Kconfig
index 43ea8c3a2bbf..ee9f3e0f2b69 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -221,6 +221,7 @@ config KSM
 
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
+	depends on MMU
         default 4096
         help
 	  This is the portion of low virtual memory which should be protected
diff --git a/security/Makefile b/security/Makefile
index bb44e350c618..da20a193c8dd 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -8,7 +8,8 @@ subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 subdir-$(CONFIG_SECURITY_TOMOYO)        += tomoyo
 
 # always enable default capabilities
-obj-y		+= commoncap.o min_addr.o
+obj-y					+= commoncap.o
+obj-$(CONFIG_MMU)			+= min_addr.o
 
 # Object file lists
 obj-$(CONFIG_SECURITY)			+= security.o capability.o
-- 
cgit v1.2.3


From 47376ceba54600cec4dd9e7c4fe8b98e4269633a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 23:25:50 +0100
Subject: reiserfs: Fix reiserfs lock <-> inode mutex dependency inversion

The reiserfs lock -> inode mutex dependency gets inverted when we
relax the lock while walking to the tree.

To fix this, use a specialized version of reiserfs_mutex_lock_safe
that takes care of mutex subclasses. Then we can grab the inode
mutex with I_MUTEX_XATTR subclass without any reiserfs lock
dependency.

This fixes the following report:

[ INFO: possible circular locking dependency detected ]
2.6.32-06793-gf405425-dirty #2
-------------------------------------------------------
mv/18566 is trying to acquire lock:
 (&REISERFS_SB(s)->lock){+.+.+.}, at: [<c1110708>] reiserfs_write_lock+0x28=
/0x40

but task is already holding lock:
 (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&sb->s_type->i_mutex_key#5/3){+.+.+.}:
       [<c104f723>] validate_chain+0xa23/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c11102b4>] reiserfs_for_each_xattr+0x84/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c109cc0b>] do_unlinkat+0xdb/0x160
       [<c109cca0>] sys_unlink+0x10/0x20
       [<c1002c50>] sysenter_do_call+0x12/0x36

-> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
       [<c104fc68>] validate_chain+0xf68/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c1110708>] reiserfs_write_lock+0x28/0x40
       [<c1103d6b>] search_by_key+0x1f7b/0x21b0
       [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
       [<c10e77f7>] reiserfs_find_entry+0x77/0x400
       [<c10e81e5>] reiserfs_lookup+0x85/0x130
       [<c109a144>] __lookup_hash+0xb4/0x110
       [<c109b763>] lookup_one_len+0xb3/0x100
       [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c10a1c4f>] dentry_iput+0x6f/0xf0
       [<c10a1d74>] d_kill+0x24/0x50
       [<c10a396b>] dput+0x5b/0x120
       [<c109ca89>] sys_renameat+0x1b9/0x230
       [<c109cb28>] sys_rename+0x28/0x30
       [<c1002c50>] sysenter_do_call+0x12/0x36

other info that might help us debug this:

2 locks held by mv/18566:
 #0:  (&sb->s_type->i_mutex_key#5/1){+.+.+.}, at: [<c109b6ac>]
lock_rename+0xcc/0xd0
 #1:  (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

stack backtrace:
Pid: 18566, comm: mv Tainted: G         C 2.6.32-06793-gf405425-dirty #2
Call Trace:
 [<c134b252>] ? printk+0x18/0x1e
 [<c104e790>] print_circular_bug+0xc0/0xd0
 [<c104fc68>] validate_chain+0xf68/0xf70
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1050155>] __lock_acquire+0x4e5/0xa70
 [<c105075a>] lock_acquire+0x7a/0xa0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134b60a>] ? schedule+0x27a/0x440
 [<c1110708>] reiserfs_write_lock+0x28/0x40
 [<c1103d6b>] search_by_key+0x1f7b/0x21b0
 [<c1050176>] ? __lock_acquire+0x506/0xa70
 [<c1051267>] ? lock_release_non_nested+0x1e7/0x340
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c104e3ab>] ? trace_hardirqs_on+0xb/0x10
 [<c1042a55>] ? T.316+0x15/0x1a0
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
 [<c134bf2a>] ? __mutex_unlock_slowpath+0x9a/0x120
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c10e77f7>] reiserfs_find_entry+0x77/0x400
 [<c10e81e5>] reiserfs_lookup+0x85/0x130
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c109a144>] __lookup_hash+0xb4/0x110
 [<c109b763>] lookup_one_len+0xb3/0x100
 [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
 [<c110ffe0>] ? delete_one_xattr+0x0/0x1c0
 [<c1003342>] ? math_error+0x22/0x150
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
 [<c10a561f>] ? generic_delete_inode+0x5f/0x150
 [<c10ef4f0>] ? reiserfs_delete_inode+0x0/0x140
 [<c10a565c>] generic_delete_inode+0x9c/0x150
 [<c10a574d>] generic_drop_inode+0x3d/0x60
 [<c10a4667>] iput+0x47/0x50
 [<c10a1c4f>] dentry_iput+0x6f/0xf0
 [<c10a1d74>] d_kill+0x24/0x50
 [<c10a396b>] dput+0x5b/0x120
 [<c109ca89>] sys_renameat+0x1b9/0x230
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1042dde>] ? cpu_clock+0x4e/0x60
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c1041816>] ? up_read+0x16/0x30
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c109cb28>] sys_rename+0x28/0x30
 [<c1002c50>] sysenter_do_call+0x12/0x36

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 fs/reiserfs/xattr.c         | 3 ++-
 include/linux/reiserfs_fs.h | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f5..8891cd88a3f4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -243,7 +243,8 @@ static int reiserfs_for_each_xattr(struct inode *inode,
 		goto out_dir;
 	}
 
-	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
+	reiserfs_mutex_lock_nested_safe(&dir->d_inode->i_mutex, I_MUTEX_XATTR,
+					inode->i_sb);
 	buf.xadir = dir;
 	err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
 	while ((err == 0 || err == -ENOSPC) && buf.count) {
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index a05b4a20768d..4351b49e2b1e 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -97,6 +97,15 @@ static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
+			       struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	mutex_lock_nested(m, subclass);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From 329962503692b42d8088f31584e42d52db179d52 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 15 Dec 2009 17:59:02 -0800
Subject: x86: Fix checking of SRAT when node 0 ram is not from 0

Found one system that boot from socket1 instead of socket0, SRAT get rejected...

[    0.000000] SRAT: Node 1 PXM 0 0-a0000
[    0.000000] SRAT: Node 1 PXM 0 100000-80000000
[    0.000000] SRAT: Node 1 PXM 0 100000000-2080000000
[    0.000000] SRAT: Node 0 PXM 1 2080000000-4080000000
[    0.000000] SRAT: Node 2 PXM 2 4080000000-6080000000
[    0.000000] SRAT: Node 3 PXM 3 6080000000-8080000000
[    0.000000] SRAT: Node 4 PXM 4 8080000000-a080000000
[    0.000000] SRAT: Node 5 PXM 5 a080000000-c080000000
[    0.000000] SRAT: Node 6 PXM 6 c080000000-e080000000
[    0.000000] SRAT: Node 7 PXM 7 e080000000-10080000000
...
[    0.000000] NUMA: Allocated memnodemap from 500000 - 701040
[    0.000000] NUMA: Using 20 for the hash shift.
[    0.000000] Adding active range (0, 0x2080000, 0x4080000) 0 entries of 3200 used
[    0.000000] Adding active range (1, 0x0, 0x96) 1 entries of 3200 used
[    0.000000] Adding active range (1, 0x100, 0x7f750) 2 entries of 3200 used
[    0.000000] Adding active range (1, 0x100000, 0x2080000) 3 entries of 3200 used
[    0.000000] Adding active range (2, 0x4080000, 0x6080000) 4 entries of 3200 used
[    0.000000] Adding active range (3, 0x6080000, 0x8080000) 5 entries of 3200 used
[    0.000000] Adding active range (4, 0x8080000, 0xa080000) 6 entries of 3200 used
[    0.000000] Adding active range (5, 0xa080000, 0xc080000) 7 entries of 3200 used
[    0.000000] Adding active range (6, 0xc080000, 0xe080000) 8 entries of 3200 used
[    0.000000] Adding active range (7, 0xe080000, 0x10080000) 9 entries of 3200 used
[    0.000000] SRAT: PXMs only cover 917504MB of your 1048566MB e820 RAM. Not used.
[    0.000000] SRAT: SRAT not used.

the early_node_map is not sorted because node0 with non zero start come first.

so try to sort it right away after all regions are registered.

also fixs refression by 8716273c (x86: Export srat physical topology)

-v2: make it more solid to handle cross node case like node0 [0,4g), [8,12g) and node1 [4g, 8g), [12g, 16g)
-v3: update comments.

Reported-and-tested-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B2579D2.3010201@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/srat_32.c | 2 ++
 arch/x86/mm/srat_64.c | 4 +++-
 include/linux/mm.h    | 3 +++
 mm/page_alloc.c       | 4 ++--
 4 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6f8aa33031c7..9324f13492d5 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void)
 		e820_register_active_regions(chunk->nid, chunk->start_pfn,
 					     min(chunk->end_pfn, max_pfn));
 	}
+	/* for out of order entries in SRAT */
+	sort_node_map();
 
 	for_each_online_node(nid) {
 		unsigned long start = node_start_pfn[nid];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index d89075489664..a27124185fc1 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -317,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
 		unsigned long s = nodes[i].start >> PAGE_SHIFT;
 		unsigned long e = nodes[i].end >> PAGE_SHIFT;
 		pxmram += e - s;
-		pxmram -= absent_pages_in_range(s, e);
+		pxmram -= __absent_pages_in_range(i, s, e);
 		if ((long)pxmram < 0)
 			pxmram = 0;
 	}
@@ -373,6 +373,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 	for_each_node_mask(i, nodes_parsed)
 		e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
 						nodes[i].end >> PAGE_SHIFT);
+	/* for out of order entries in SRAT */
+	sort_node_map();
 	if (!nodes_cover_memory(nodes)) {
 		bad_srat();
 		return -1;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 24c395694f4d..20a2036f3a94 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1022,6 +1022,9 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
 					unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
+void sort_node_map(void);
+unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
+						unsigned long end_pfn);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
 						unsigned long end_pfn);
 extern void get_pfn_range_for_nid(unsigned int nid,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bc2ac63f41e..873c86308b4e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3573,7 +3573,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
  * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
  * then all holes in the requested range will be accounted for.
  */
-static unsigned long __meminit __absent_pages_in_range(int nid,
+unsigned long __meminit __absent_pages_in_range(int nid,
 				unsigned long range_start_pfn,
 				unsigned long range_end_pfn)
 {
@@ -4102,7 +4102,7 @@ static int __init cmp_node_active_region(const void *a, const void *b)
 }
 
 /* sort the node_map by start_pfn */
-static void __init sort_node_map(void)
+void __init sort_node_map(void)
 {
 	sort(early_node_map, (size_t)nr_nodemap_entries,
 			sizeof(struct node_active_region),
-- 
cgit v1.2.3


From a4636818f8e0991f32d9528f39cf4f3d6a7d30a3 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 17 Dec 2009 11:43:29 -0600
Subject: cpumask: rename tsk_cpumask to tsk_cpus_allowed

Noone uses this wrapper yet, and Ingo asked that it be kept consistent
with current task_struct usage.

(One user crept in via linux-next: fixed)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au.
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +-
 include/linux/sched.h                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a9df9441a9a2..f125e5c551c0 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1136,7 +1136,7 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
 		return -ENOMEM;
 
-	cpumask_copy(oldmask, tsk_cpumask(current));
+	cpumask_copy(oldmask, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
 
 	if (smp_processor_id() != pol->cpu) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 244c287a5ac1..4d7adb282bdd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1555,7 +1555,7 @@ struct task_struct {
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
-#define tsk_cpumask(tsk) (&(tsk)->cpus_allowed)
+#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
-- 
cgit v1.2.3


From 2d1c861871d767153538a77c498752b36d4bb4b8 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 9 Dec 2009 17:52:13 +1100
Subject: PCI/cardbus: Add a fixup hook and fix powerpc

The cardbus code creates PCI devices without ever going through the
necessary fixup bits and pieces that normal PCI devices go through.

There's in fact a commented out call to pcibios_fixup_bus() in there,
it's commented because ... it doesn't work.

I could make pcibios_fixup_bus() do the right thing on powerpc easily
but I felt it cleaner instead to provide a specific hook pci_fixup_cardbus
for which a weak empty implementation is provided by the PCI core.

This fixes cardbus on powerbooks and probably all other PowerPC
platforms which was broken completely for ever on some platforms and
since 2.6.31 on others such as PowerBooks when we made the DMA ops
mandatory (since those are setup by the fixups).

Acked-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci-common.c | 13 +++++++++++++
 drivers/pci/pci.c                |  5 +++++
 drivers/pcmcia/cardbus.c         |  2 +-
 include/linux/pci.h              |  3 +++
 4 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e8dfdbd9327a..cadbed679fbb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1107,6 +1107,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct dev_archdata *sd = &dev->dev.archdata;
 
+		/* Cardbus can call us to add new devices to a bus, so ignore
+		 * those who are already fully discovered
+		 */
+		if (dev->is_added)
+			continue;
+
 		/* Setup OF node pointer in archdata */
 		sd->of_node = pci_device_to_OF_node(dev);
 
@@ -1147,6 +1153,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
 
+void __devinit pci_fixup_cardbus(struct pci_bus *bus)
+{
+	/* Now fixup devices on that bus */
+	pcibios_setup_bus_devices(bus);
+}
+
+
 static int skip_isa_ioresource_align(struct pci_dev *dev)
 {
 	if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) &&
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d50522bf16b1..864e703cf737 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2798,6 +2798,11 @@ int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev)
 	return 1;
 }
 
+void __weak pci_fixup_cardbus(struct pci_bus *bus)
+{
+}
+EXPORT_SYMBOL(pci_fixup_cardbus);
+
 static int __init pci_setup(char *str)
 {
 	while (str) {
diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index cdf50f3bc2df..d99f846451a3 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -222,7 +222,7 @@ int __ref cb_alloc(struct pcmcia_socket *s)
 	unsigned int max, pass;
 
 	s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0));
-/*	pcibios_fixup_bus(bus); */
+	pci_fixup_cardbus(bus); 
 
 	max = bus->secondary;
 	for (pass = 0; pass < 2; pass++)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bf1e67080849..5da0690d9cee 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t,
 				resource_size_t);
 void pcibios_update_irq(struct pci_dev *, int irq);
 
+/* Weak but can be overriden by arch */
+void pci_fixup_cardbus(struct pci_bus *);
+
 /* Generic PCI functions used internally */
 
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
-- 
cgit v1.2.3


From 4f924ba5b5aaf1477daafeae779495d39549186d Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Thu, 17 Dec 2009 00:08:33 +0900
Subject: sony-laptop: add AVMode key mapping

Some models are equipped with an "AVMode" function key that sends
  sony-laptop: Unknown event: 0x100 0xa1
  sony-laptop: Unknown event: 0x100 0x21
for press and release respectively.

Cc: "Matthew W. S. Bell" <matthew@bells23.org.uk>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/platform/x86/sony-laptop.c | 4 ++++
 include/linux/sonypi.h             | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index a2a742c8ff7e..9710f70040ba 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -232,6 +232,7 @@ static int sony_laptop_input_index[] = {
 	56,	/* 69 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	57,	/* 70 SONYPI_EVENT_VOLUME_DEC_PRESSED */
 	-1,	/* 71 SONYPI_EVENT_BRIGHTNESS_PRESSED */
+	58,	/* 72 SONYPI_EVENT_MEDIA_PRESSED */
 };
 
 static int sony_laptop_input_keycode_map[] = {
@@ -293,6 +294,7 @@ static int sony_laptop_input_keycode_map[] = {
 	KEY_F15,	/* 55 SONYPI_EVENT_SETTINGKEY_PRESSED */
 	KEY_VOLUMEUP,	/* 56 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	KEY_VOLUMEDOWN,	/* 57 SONYPI_EVENT_VOLUME_DEC_PRESSED */
+	KEY_MEDIA,	/* 58 SONYPI_EVENT_MEDIA_PRESSED */
 };
 
 /* release buttons after a short delay if pressed */
@@ -890,6 +892,8 @@ static struct sony_nc_event sony_100_events[] = {
 	{ 0x0C, SONYPI_EVENT_FNKEY_RELEASED },
 	{ 0x9f, SONYPI_EVENT_CD_EJECT_PRESSED },
 	{ 0x1f, SONYPI_EVENT_ANYBUTTON_RELEASED },
+	{ 0xa1, SONYPI_EVENT_MEDIA_PRESSED },
+	{ 0x21, SONYPI_EVENT_ANYBUTTON_RELEASED },
 	{ 0, 0 },
 };
 
diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h
index 34c4475ac4a2..4f95c1aac2fd 100644
--- a/include/linux/sonypi.h
+++ b/include/linux/sonypi.h
@@ -111,6 +111,7 @@
 #define SONYPI_EVENT_VOLUME_INC_PRESSED		69
 #define SONYPI_EVENT_VOLUME_DEC_PRESSED		70
 #define SONYPI_EVENT_BRIGHTNESS_PRESSED		71
+#define SONYPI_EVENT_MEDIA_PRESSED		72
 
 /* get/set brightness */
 #define SONYPI_IOCGBRT		_IOR('v', 0, __u8)
-- 
cgit v1.2.3


From 234da7bcdc7aaa935846534c3b726dbc79a9cdd5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 20:21:05 +0100
Subject: sched: Teach might_sleep() about preemptible RCU

In practice, it is harmless to voluntarily sleep in a
rcu_read_lock() section if we are running under preempt rcu, but
it is illegal if we build a kernel running non-preemptable rcu.

Currently, might_sleep() doesn't notice sleepable operations
under rcu_read_lock() sections if we are running under
preemptable rcu because preempt_count() is left untouched after
rcu_read_lock() in this case. But we want developers who test
their changes under such config to notice the "sleeping while
atomic" issues.

So we add rcu_read_lock_nesting to prempt_count() in
might_sleep() checks.

[ v2: Handle rcu-tiny ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1260991265-8451-1-git-send-regression-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h |  5 +++++
 include/linux/rcutree.h | 11 +++++++++++
 kernel/sched.c          |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index c4ba9a78721e..96cc307ed9f4 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -101,4 +101,9 @@ static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c93eee5911b0..8044b1b94333 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,6 +45,12 @@ extern void __rcu_read_unlock(void);
 extern void synchronize_rcu(void);
 extern void exit_rcu(void);
 
+/*
+ * Defined as macro as it is a very low level header
+ * included from areas that don't even know about current
+ */
+#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
+
 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock(void)
@@ -63,6 +69,11 @@ static inline void exit_rcu(void)
 {
 }
 
+static inline int rcu_preempt_depth(void)
+{
+	return 0;
+}
+
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 static inline void __rcu_read_lock_bh(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index af7dfa74e6bb..7be88a7be047 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9682,7 +9682,7 @@ void __init sched_init(void)
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 static inline int preempt_count_equals(int preempt_offset)
 {
-	int nested = preempt_count() & ~PREEMPT_ACTIVE;
+	int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
 
 	return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
 }
-- 
cgit v1.2.3


From 5b74ed4729ad2b2017453add68104a83206caefb Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 16 Dec 2009 10:09:45 -0500
Subject: perf events: Remove unused perf_counter.h header file

Since nothing includes the <linux/perf_counter.h> file and it's
also not exported to user space, remove it.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <alpine.LFD.2.00.0912161007430.8198@localhost>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 444 -------------------------------------------
 1 file changed, 444 deletions(-)
 delete mode 100644 include/linux/perf_counter.h

(limited to 'include/linux')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
deleted file mode 100644
index e3fb25606706..000000000000
--- a/include/linux/perf_counter.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- *  NOTE: this file will be removed in a future kernel release, it is
- *  provided as a courtesy copy of user-space code that relies on the
- *  old (pre-rename) symbols and constants.
- *
- *  Performance events:
- *
- *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
- *
- *  Data type definitions, declarations, prototypes.
- *
- *    Started by: Thomas Gleixner and Ingo Molnar
- *
- *  For licencing details see kernel-base/COPYING
- */
-#ifndef _LINUX_PERF_COUNTER_H
-#define _LINUX_PERF_COUNTER_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <asm/byteorder.h>
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * attr.type
- */
-enum perf_type_id {
-	PERF_TYPE_HARDWARE			= 0,
-	PERF_TYPE_SOFTWARE			= 1,
-	PERF_TYPE_TRACEPOINT			= 2,
-	PERF_TYPE_HW_CACHE			= 3,
-	PERF_TYPE_RAW				= 4,
-
-	PERF_TYPE_MAX,				/* non-ABI */
-};
-
-/*
- * Generalized performance counter event types, used by the
- * attr.event_id parameter of the sys_perf_counter_open()
- * syscall:
- */
-enum perf_hw_id {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_HW_CPU_CYCLES		= 0,
-	PERF_COUNT_HW_INSTRUCTIONS		= 1,
-	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
-	PERF_COUNT_HW_CACHE_MISSES		= 3,
-	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_HW_BRANCH_MISSES		= 5,
-	PERF_COUNT_HW_BUS_CYCLES		= 6,
-
-	PERF_COUNT_HW_MAX,			/* non-ABI */
-};
-
-/*
- * Generalized hardware cache counters:
- *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
- *       { read, write, prefetch } x
- *       { accesses, misses }
- */
-enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D			= 0,
-	PERF_COUNT_HW_CACHE_L1I			= 1,
-	PERF_COUNT_HW_CACHE_LL			= 2,
-	PERF_COUNT_HW_CACHE_DTLB		= 3,
-	PERF_COUNT_HW_CACHE_ITLB		= 4,
-	PERF_COUNT_HW_CACHE_BPU			= 5,
-
-	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ		= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
-
-	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
-	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
-
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
-};
-
-/*
- * Special "software" counters provided by the kernel, even if the hardware
- * does not support performance counters. These counters measure various
- * physical and sw events of the kernel (and allow the profiling of them as
- * well):
- */
-enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK			= 0,
-	PERF_COUNT_SW_TASK_CLOCK		= 1,
-	PERF_COUNT_SW_PAGE_FAULTS		= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
-	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
-	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
-
-	PERF_COUNT_SW_MAX,			/* non-ABI */
-};
-
-/*
- * Bits that can be set in attr.sample_type to request information
- * in the overflow packets.
- */
-enum perf_counter_sample_format {
-	PERF_SAMPLE_IP				= 1U << 0,
-	PERF_SAMPLE_TID				= 1U << 1,
-	PERF_SAMPLE_TIME			= 1U << 2,
-	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_READ			= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
-	PERF_SAMPLE_ID				= 1U << 6,
-	PERF_SAMPLE_CPU				= 1U << 7,
-	PERF_SAMPLE_PERIOD			= 1U << 8,
-	PERF_SAMPLE_STREAM_ID			= 1U << 9,
-	PERF_SAMPLE_RAW				= 1U << 10,
-
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
-};
-
-/*
- * The format of the data returned by read() on a perf counter fd,
- * as specified by attr.read_format:
- *
- * struct read_format {
- *	{ u64		value;
- *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_RUNNING
- *	  { u64		id;           } && PERF_FORMAT_ID
- *	} && !PERF_FORMAT_GROUP
- *
- *	{ u64		nr;
- *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- *	  { u64		time_running; } && PERF_FORMAT_RUNNING
- *	  { u64		value;
- *	    { u64	id;           } && PERF_FORMAT_ID
- *	  }		cntr[nr];
- *	} && PERF_FORMAT_GROUP
- * };
- */
-enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
-	PERF_FORMAT_ID				= 1U << 2,
-	PERF_FORMAT_GROUP			= 1U << 3,
-
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
-};
-
-#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_attr {
-
-	/*
-	 * Major type: hardware/software/tracepoint/etc.
-	 */
-	__u32			type;
-
-	/*
-	 * Size of the attr structure, for fwd/bwd compat.
-	 */
-	__u32			size;
-
-	/*
-	 * Type specific configuration information.
-	 */
-	__u64			config;
-
-	union {
-		__u64		sample_period;
-		__u64		sample_freq;
-	};
-
-	__u64			sample_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-				mmap           :  1, /* include mmap data     */
-				comm	       :  1, /* include comm data     */
-				freq           :  1, /* use freq, not period  */
-				inherit_stat   :  1, /* per task counts       */
-				enable_on_exec :  1, /* next exec enables     */
-				task           :  1, /* trace fork/exit       */
-				watermark      :  1, /* wakeup_watermark      */
-
-				__reserved_1   : 49;
-
-	union {
-		__u32		wakeup_events;	  /* wakeup every n events */
-		__u32		wakeup_watermark; /* bytes before wakeup   */
-	};
-	__u32			__reserved_2;
-
-	__u64			__reserved_3;
-};
-
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
-#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
-#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
-#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
-#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
-#define PERF_COUNTER_IOC_SET_FILTER	_IOW('$', 6, char *)
-
-enum perf_counter_ioc_flags {
-	PERF_IOC_FLAG_GROUP		= 1U << 0,
-};
-
-/*
- * Structure of the page that can be mapped via mmap
- */
-struct perf_counter_mmap_page {
-	__u32	version;		/* version number of this structure */
-	__u32	compat_version;		/* lowest version this is compat with */
-
-	/*
-	 * Bits needed to read the hw counters in user-space.
-	 *
-	 *   u32 seq;
-	 *   s64 count;
-	 *
-	 *   do {
-	 *     seq = pc->lock;
-	 *
-	 *     barrier()
-	 *     if (pc->index) {
-	 *       count = pmc_read(pc->index - 1);
-	 *       count += pc->offset;
-	 *     } else
-	 *       goto regular_read;
-	 *
-	 *     barrier();
-	 *   } while (pc->lock != seq);
-	 *
-	 * NOTE: for obvious reason this only works on self-monitoring
-	 *       processes.
-	 */
-	__u32	lock;			/* seqlock for synchronization */
-	__u32	index;			/* hardware counter identifier */
-	__s64	offset;			/* add to hardware counter value */
-	__u64	time_enabled;		/* time counter active */
-	__u64	time_running;		/* time counter on cpu */
-
-		/*
-		 * Hole for extension of the self monitor capabilities
-		 */
-
-	__u64	__reserved[123];	/* align to 1k */
-
-	/*
-	 * Control data for the mmap() data buffer.
-	 *
-	 * User-space reading the @data_head value should issue an rmb(), on
-	 * SMP capable platforms, after reading this value -- see
-	 * perf_counter_wakeup().
-	 *
-	 * When the mapping is PROT_WRITE the @data_tail value should be
-	 * written by userspace to reflect the last read data. In this case
-	 * the kernel will not over-write unread data.
-	 */
-	__u64   data_head;		/* head in the data section */
-	__u64	data_tail;		/* user-space written tail */
-};
-
-#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
-#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
-#define PERF_EVENT_MISC_KERNEL			(1 << 0)
-#define PERF_EVENT_MISC_USER			(2 << 0)
-#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
-
-struct perf_event_header {
-	__u32	type;
-	__u16	misc;
-	__u16	size;
-};
-
-enum perf_event_type {
-
-	/*
-	 * The MMAP events record the PROT_EXEC mappings so that we can
-	 * correlate userspace IPs to code. They have the following structure:
-	 *
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	u64				addr;
-	 *	u64				len;
-	 *	u64				pgoff;
-	 *	char				filename[];
-	 * };
-	 */
-	PERF_EVENT_MMAP			= 1,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				id;
-	 *	u64				lost;
-	 * };
-	 */
-	PERF_EVENT_LOST			= 2,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	char				comm[];
-	 * };
-	 */
-	PERF_EVENT_COMM			= 3,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_EXIT			= 4,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				time;
-	 *	u64				id;
-	 *	u64				stream_id;
-	 * };
-	 */
-	PERF_EVENT_THROTTLE		= 5,
-	PERF_EVENT_UNTHROTTLE		= 6,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_FORK			= 7,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, tid;
-	 *
-	 *	struct read_format		values;
-	 * };
-	 */
-	PERF_EVENT_READ			= 8,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
-	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
-	 *	{ u64			time;     } && PERF_SAMPLE_TIME
-	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
-	 *	{ u64			id;	  } && PERF_SAMPLE_ID
-	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
-	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
-	 *
-	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
-	 *
-	 *	{ u64			nr,
-	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
-	 *
-	 *	#
-	 *	# The RAW record below is opaque data wrt the ABI
-	 *	#
-	 *	# That is, the ABI doesn't make any promises wrt to
-	 *	# the stability of its content, it may vary depending
-	 *	# on event, hardware, kernel version and phase of
-	 *	# the moon.
-	 *	#
-	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 *	#
-	 *
-	 *	{ u32			size;
-	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
-	 * };
-	 */
-	PERF_EVENT_SAMPLE		= 9,
-
-	PERF_EVENT_MAX,			/* non-ABI */
-};
-
-enum perf_callchain_context {
-	PERF_CONTEXT_HV			= (__u64)-32,
-	PERF_CONTEXT_KERNEL		= (__u64)-128,
-	PERF_CONTEXT_USER		= (__u64)-512,
-
-	PERF_CONTEXT_GUEST		= (__u64)-2048,
-	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
-	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
-
-	PERF_CONTEXT_MAX		= (__u64)-4095,
-};
-
-#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
-#define PERF_FLAG_FD_OUTPUT		(1U << 1)
-
-/*
- * In case some app still references the old symbols:
- */
-
-#define __NR_perf_counter_open		__NR_perf_event_open
-
-#define PR_TASK_PERF_COUNTERS_DISABLE	PR_TASK_PERF_EVENTS_DISABLE
-#define PR_TASK_PERF_COUNTERS_ENABLE	PR_TASK_PERF_EVENTS_ENABLE
-
-#endif /* _LINUX_PERF_COUNTER_H */
-- 
cgit v1.2.3


From 27f37e4bfed803be338dcc78845d4a67eefb40a0 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 25 Sep 2009 09:39:26 +0200
Subject: regulator: add driver for MAX8660/8661

Tested with a MX25-based custom board.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig         |   7 +
 drivers/regulator/Makefile        |   1 +
 drivers/regulator/max8660.c       | 510 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/max8660.h |  57 +++++
 4 files changed, 575 insertions(+)
 create mode 100644 drivers/regulator/max8660.c
 create mode 100644 include/linux/regulator/max8660.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 7cfdd65bebb4..9e0aa14dc6af 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -69,6 +69,13 @@ config REGULATOR_MAX1586
 	  regulator via I2C bus. The provided regulator is suitable
 	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
 
+config REGULATOR_MAX8660
+	tristate "Maxim 8660/8661 voltage regulator"
+	depends on I2C
+	help
+	  This driver controls a Maxim 8660/8661 voltage output
+	  regulator via I2C bus.
+
 config REGULATOR_TWL4030
 	bool "TI TWL4030/TWL5030/TWL6030/TPS695x0 PMIC"
 	depends on TWL4030_CORE
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 9ae3cc44e668..12285e41beec 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
+obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-ldo.o
diff --git a/drivers/regulator/max8660.c b/drivers/regulator/max8660.c
new file mode 100644
index 000000000000..acc2fb7b6087
--- /dev/null
+++ b/drivers/regulator/max8660.c
@@ -0,0 +1,510 @@
+/*
+ * max8660.c  --  Voltage regulation for the Maxim 8660/8661
+ *
+ * based on max1586.c and wm8400-regulator.c
+ *
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix e.K.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Some info:
+ *
+ * Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8660-MAX8661.pdf
+ *
+ * This chip is a bit nasty because it is a write-only device. Thus, the driver
+ * uses shadow registers to keep track of its values. The main problem appears
+ * to be the initialization: When Linux boots up, we cannot know if the chip is
+ * in the default state or not, so we would have to pass such information in
+ * platform_data. As this adds a bit of complexity to the driver, this is left
+ * out for now until it is really needed.
+ *
+ * [A|S|M]DTV1 registers are currently not used, but [A|S|M]DTV2.
+ *
+ * If the driver is feature complete, it might be worth to check if one set of
+ * functions for V3-V7 is sufficient. For maximum flexibility during
+ * development, they are separated for now.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/max8660.h>
+
+#define MAX8660_DCDC_MIN_UV	 725000
+#define MAX8660_DCDC_MAX_UV	1800000
+#define MAX8660_DCDC_STEP	  25000
+#define MAX8660_DCDC_MAX_SEL	0x2b
+
+#define MAX8660_LDO5_MIN_UV	1700000
+#define MAX8660_LDO5_MAX_UV	2000000
+#define MAX8660_LDO5_STEP	  25000
+#define MAX8660_LDO5_MAX_SEL	0x0c
+
+#define MAX8660_LDO67_MIN_UV	1800000
+#define MAX8660_LDO67_MAX_UV	3300000
+#define MAX8660_LDO67_STEP	 100000
+#define MAX8660_LDO67_MAX_SEL	0x0f
+
+enum {
+	MAX8660_OVER1,
+	MAX8660_OVER2,
+	MAX8660_VCC1,
+	MAX8660_ADTV1,
+	MAX8660_ADTV2,
+	MAX8660_SDTV1,
+	MAX8660_SDTV2,
+	MAX8660_MDTV1,
+	MAX8660_MDTV2,
+	MAX8660_L12VCR,
+	MAX8660_FPWM,
+	MAX8660_N_REGS,	/* not a real register */
+};
+
+struct max8660 {
+	struct i2c_client *client;
+	u8 shadow_regs[MAX8660_N_REGS];		/* as chip is write only */
+	struct regulator_dev *rdev[];
+};
+
+static int max8660_write(struct max8660 *max8660, u8 reg, u8 mask, u8 val)
+{
+	static const u8 max8660_addresses[MAX8660_N_REGS] =
+	  { 0x10, 0x12, 0x20, 0x23, 0x24, 0x29, 0x2a, 0x32, 0x33, 0x39, 0x80 };
+
+	int ret;
+	u8 reg_val = (max8660->shadow_regs[reg] & mask) | val;
+	dev_vdbg(&max8660->client->dev, "Writing reg %02x with %02x\n",
+			max8660_addresses[reg], reg_val);
+
+	ret = i2c_smbus_write_byte_data(max8660->client,
+			max8660_addresses[reg], reg_val);
+	if (ret == 0)
+		max8660->shadow_regs[reg] = reg_val;
+
+	return ret;
+}
+
+
+/*
+ * DCDC functions
+ */
+
+static int max8660_dcdc_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 val = max8660->shadow_regs[MAX8660_OVER1];
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V3) ? 1 : 4;
+	return !!(val & mask);
+}
+
+static int max8660_dcdc_enable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 bit = (rdev_get_id(rdev) == MAX8660_V3) ? 1 : 4;
+	return max8660_write(max8660, MAX8660_OVER1, 0xff, bit);
+}
+
+static int max8660_dcdc_disable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V3) ? ~1 : ~4;
+	return max8660_write(max8660, MAX8660_OVER1, mask, 0);
+}
+
+static int max8660_dcdc_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_DCDC_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_DCDC_MIN_UV + selector * MAX8660_DCDC_STEP;
+}
+
+static int max8660_dcdc_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 reg = (rdev_get_id(rdev) == MAX8660_V3) ? MAX8660_ADTV2 : MAX8660_SDTV2;
+	u8 selector = max8660->shadow_regs[reg];
+	return MAX8660_DCDC_MIN_UV + selector * MAX8660_DCDC_STEP;
+}
+
+static int max8660_dcdc_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 reg, selector, bits;
+	int ret;
+
+	if (min_uV < MAX8660_DCDC_MIN_UV || min_uV > MAX8660_DCDC_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_DCDC_MIN_UV || max_uV > MAX8660_DCDC_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_DCDC_MIN_UV - MAX8660_DCDC_STEP + 1))
+			/ MAX8660_DCDC_STEP;
+
+	ret = max8660_dcdc_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	reg = (rdev_get_id(rdev) == MAX8660_V3) ? MAX8660_ADTV2 : MAX8660_SDTV2;
+	ret = max8660_write(max8660, reg, 0, selector);
+	if (ret)
+		return ret;
+
+	/* Select target voltage register and activate regulation */
+	bits = (rdev_get_id(rdev) == MAX8660_V3) ? 0x03 : 0x30;
+	return max8660_write(max8660, MAX8660_VCC1, 0xff, bits);
+}
+
+static struct regulator_ops max8660_dcdc_ops = {
+	.is_enabled = max8660_dcdc_is_enabled,
+	.list_voltage = max8660_dcdc_list,
+	.set_voltage = max8660_dcdc_set,
+	.get_voltage = max8660_dcdc_get,
+};
+
+
+/*
+ * LDO5 functions
+ */
+
+static int max8660_ldo5_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_LDO5_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_LDO5_MIN_UV + selector * MAX8660_LDO5_STEP;
+}
+
+static int max8660_ldo5_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector = max8660->shadow_regs[MAX8660_MDTV2];
+
+	return MAX8660_LDO5_MIN_UV + selector * MAX8660_LDO5_STEP;
+}
+
+static int max8660_ldo5_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector;
+	int ret;
+
+	if (min_uV < MAX8660_LDO5_MIN_UV || min_uV > MAX8660_LDO5_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_LDO5_MIN_UV || max_uV > MAX8660_LDO5_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_LDO5_MIN_UV - MAX8660_LDO5_STEP + 1))
+			/ MAX8660_LDO5_STEP;
+	ret = max8660_ldo5_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	ret = max8660_write(max8660, MAX8660_MDTV2, 0, selector);
+	if (ret)
+		return ret;
+
+	/* Select target voltage register and activate regulation */
+	return max8660_write(max8660, MAX8660_VCC1, 0xff, 0xc0);
+}
+
+static struct regulator_ops max8660_ldo5_ops = {
+	.list_voltage = max8660_ldo5_list,
+	.set_voltage = max8660_ldo5_set,
+	.get_voltage = max8660_ldo5_get,
+};
+
+
+/*
+ * LDO67 functions
+ */
+
+static int max8660_ldo67_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 val = max8660->shadow_regs[MAX8660_OVER2];
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V6) ? 2 : 4;
+	return !!(val & mask);
+}
+
+static int max8660_ldo67_enable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 bit = (rdev_get_id(rdev) == MAX8660_V6) ? 2 : 4;
+	return max8660_write(max8660, MAX8660_OVER2, 0xff, bit);
+}
+
+static int max8660_ldo67_disable(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 mask = (rdev_get_id(rdev) == MAX8660_V6) ? ~2 : ~4;
+	return max8660_write(max8660, MAX8660_OVER2, mask, 0);
+}
+
+static int max8660_ldo67_list(struct regulator_dev *rdev, unsigned selector)
+{
+	if (selector > MAX8660_LDO67_MAX_SEL)
+		return -EINVAL;
+	return MAX8660_LDO67_MIN_UV + selector * MAX8660_LDO67_STEP;
+}
+
+static int max8660_ldo67_get(struct regulator_dev *rdev)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 shift = (rdev_get_id(rdev) == MAX8660_V6) ? 0 : 4;
+	u8 selector = (max8660->shadow_regs[MAX8660_L12VCR] >> shift) & 0xf;
+
+	return MAX8660_LDO67_MIN_UV + selector * MAX8660_LDO67_STEP;
+}
+
+static int max8660_ldo67_set(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+	struct max8660 *max8660 = rdev_get_drvdata(rdev);
+	u8 selector;
+	int ret;
+
+	if (min_uV < MAX8660_LDO67_MIN_UV || min_uV > MAX8660_LDO67_MAX_UV)
+		return -EINVAL;
+	if (max_uV < MAX8660_LDO67_MIN_UV || max_uV > MAX8660_LDO67_MAX_UV)
+		return -EINVAL;
+
+	selector = (min_uV - (MAX8660_LDO67_MIN_UV - MAX8660_LDO67_STEP + 1))
+			/ MAX8660_LDO67_STEP;
+
+	ret = max8660_ldo67_list(rdev, selector);
+	if (ret < 0 || ret > max_uV)
+		return -EINVAL;
+
+	if (rdev_get_id(rdev) == MAX8660_V6)
+		return max8660_write(max8660, MAX8660_L12VCR, 0xf0, selector);
+	else
+		return max8660_write(max8660, MAX8660_L12VCR, 0x0f, selector << 4);
+}
+
+static struct regulator_ops max8660_ldo67_ops = {
+	.is_enabled = max8660_ldo67_is_enabled,
+	.enable = max8660_ldo67_enable,
+	.disable = max8660_ldo67_disable,
+	.list_voltage = max8660_ldo67_list,
+	.get_voltage = max8660_ldo67_get,
+	.set_voltage = max8660_ldo67_set,
+};
+
+static struct regulator_desc max8660_reg[] = {
+	{
+		.name = "V3(DCDC)",
+		.id = MAX8660_V3,
+		.ops = &max8660_dcdc_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_DCDC_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V4(DCDC)",
+		.id = MAX8660_V4,
+		.ops = &max8660_dcdc_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_DCDC_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V5(LDO)",
+		.id = MAX8660_V5,
+		.ops = &max8660_ldo5_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO5_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V6(LDO)",
+		.id = MAX8660_V6,
+		.ops = &max8660_ldo67_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO67_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "V7(LDO)",
+		.id = MAX8660_V7,
+		.ops = &max8660_ldo67_ops,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = MAX8660_LDO67_MAX_SEL + 1,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int max8660_probe(struct i2c_client *client,
+			      const struct i2c_device_id *i2c_id)
+{
+	struct regulator_dev **rdev;
+	struct max8660_platform_data *pdata = client->dev.platform_data;
+	struct max8660 *max8660;
+	int boot_on, i, id, ret = -EINVAL;
+
+	if (pdata->num_subdevs > MAX8660_V_END) {
+		dev_err(&client->dev, "Too much regulators found!\n");
+		goto out;
+	}
+
+	max8660 = kzalloc(sizeof(struct max8660) +
+			sizeof(struct regulator_dev *) * MAX8660_V_END,
+			GFP_KERNEL);
+	if (!max8660) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	max8660->client = client;
+	rdev = max8660->rdev;
+
+	if (pdata->en34_is_high) {
+		/* Simulate always on */
+		max8660->shadow_regs[MAX8660_OVER1] = 5;
+	} else {
+		/* Otherwise devices can be toggled via software */
+		max8660_dcdc_ops.enable = max8660_dcdc_enable;
+		max8660_dcdc_ops.disable = max8660_dcdc_disable;
+	}
+
+	/*
+	 * First, set up shadow registers to prevent glitches. As some
+	 * registers are shared between regulators, everything must be properly
+	 * set up for all regulators in advance.
+	 */
+	max8660->shadow_regs[MAX8660_ADTV1] =
+		max8660->shadow_regs[MAX8660_ADTV2] =
+		max8660->shadow_regs[MAX8660_SDTV1] =
+		max8660->shadow_regs[MAX8660_SDTV2] = 0x1b;
+	max8660->shadow_regs[MAX8660_MDTV1] =
+		max8660->shadow_regs[MAX8660_MDTV2] = 0x04;
+
+	for (i = 0; i < pdata->num_subdevs; i++) {
+
+		if (!pdata->subdevs[i].platform_data)
+			goto err_free;
+
+		boot_on = pdata->subdevs[i].platform_data->constraints.boot_on;
+
+		switch (pdata->subdevs[i].id) {
+		case MAX8660_V3:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER1] |= 1;
+			break;
+
+		case MAX8660_V4:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER1] |= 4;
+			break;
+
+		case MAX8660_V5:
+			break;
+
+		case MAX8660_V6:
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER2] |= 2;
+			break;
+
+		case MAX8660_V7:
+			if (!strcmp(i2c_id->name, "max8661")) {
+				dev_err(&client->dev, "Regulator not on this chip!\n");
+				goto err_free;
+			}
+
+			if (boot_on)
+				max8660->shadow_regs[MAX8660_OVER2] |= 4;
+			break;
+
+		default:
+			dev_err(&client->dev, "invalid regulator %s\n",
+				 pdata->subdevs[i].name);
+			goto err_free;
+		}
+	}
+
+	/* Finally register devices */
+	for (i = 0; i < pdata->num_subdevs; i++) {
+
+		id = pdata->subdevs[i].id;
+
+		rdev[i] = regulator_register(&max8660_reg[id], &client->dev,
+					     pdata->subdevs[i].platform_data,
+					     max8660);
+		if (IS_ERR(rdev[i])) {
+			ret = PTR_ERR(rdev[i]);
+			dev_err(&client->dev, "failed to register %s\n",
+				max8660_reg[id].name);
+			goto err_unregister;
+		}
+	}
+
+	i2c_set_clientdata(client, rdev);
+	dev_info(&client->dev, "Maxim 8660/8661 regulator driver loaded\n");
+	return 0;
+
+err_unregister:
+	while (--i >= 0)
+		regulator_unregister(rdev[i]);
+err_free:
+	kfree(max8660);
+out:
+	return ret;
+}
+
+static int max8660_remove(struct i2c_client *client)
+{
+	struct regulator_dev **rdev = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < MAX8660_V_END; i++)
+		if (rdev[i])
+			regulator_unregister(rdev[i]);
+	kfree(rdev);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static const struct i2c_device_id max8660_id[] = {
+	{ "max8660", 0 },
+	{ "max8661", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max8660_id);
+
+static struct i2c_driver max8660_driver = {
+	.probe = max8660_probe,
+	.remove = max8660_remove,
+	.driver		= {
+		.name	= "max8660",
+	},
+	.id_table	= max8660_id,
+};
+
+static int __init max8660_init(void)
+{
+	return i2c_add_driver(&max8660_driver);
+}
+subsys_initcall(max8660_init);
+
+static void __exit max8660_exit(void)
+{
+	i2c_del_driver(&max8660_driver);
+}
+module_exit(max8660_exit);
+
+/* Module information */
+MODULE_DESCRIPTION("MAXIM 8660/8661 voltage regulator driver");
+MODULE_AUTHOR("Wolfram Sang");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/max8660.h b/include/linux/regulator/max8660.h
new file mode 100644
index 000000000000..9936763621c7
--- /dev/null
+++ b/include/linux/regulator/max8660.h
@@ -0,0 +1,57 @@
+/*
+ * max8660.h  --  Voltage regulation for the Maxim 8660/8661
+ *
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix e.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __LINUX_REGULATOR_MAX8660_H
+#define __LINUX_REGULATOR_MAX8660_H
+
+#include <linux/regulator/machine.h>
+
+enum {
+	MAX8660_V3,
+	MAX8660_V4,
+	MAX8660_V5,
+	MAX8660_V6,
+	MAX8660_V7,
+	MAX8660_V_END,
+};
+
+/**
+ * max8660_subdev_data - regulator subdev data
+ * @id: regulator id
+ * @name: regulator name
+ * @platform_data: regulator init data
+ */
+struct max8660_subdev_data {
+	int				id;
+	char				*name;
+	struct regulator_init_data	*platform_data;
+};
+
+/**
+ * max8660_platform_data - platform data for max8660
+ * @num_subdevs: number of regulators used
+ * @subdevs: pointer to regulators used
+ * @en34_is_high: if EN34 is driven high, regulators cannot be en-/disabled.
+ */
+struct max8660_platform_data {
+	int num_subdevs;
+	struct max8660_subdev_data *subdevs;
+	unsigned en34_is_high:1;
+};
+#endif
-- 
cgit v1.2.3


From e24a04c44cf312e88b50006a91ad7ffc1c0d97a5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 22 Sep 2009 08:47:11 -0700
Subject: regulator: Implement WM831x BuckWise DC-DC convertor DVS support

The BuckWise DC-DC convertors in WM831x devices support switching to
a second output voltage using the logic level on one of the device
pins. This is intended to allow rapid voltage switching for uses like
cpufreq, replacing the I2C or SPI write used to configure the voltage
of the regulator with a much faster GPIO status change.

This is implemented by keeping the DVS voltage configured as the
maximum voltage permitted for the regulator. If a request is made
for the maximum voltage then the GPIO is used to switch to the DVS
voltage, otherwise the normal ON voltage is updated and used. This
follows the idiom used by most cpufreq drivers, which drop the
minimum voltage as the core frequency is dropped but use a constant
maximum - raising the voltage should normally be fast, but lowering
it may be slower.

Configuration of the DVS MFP on the device should be done externally,
for example via OTP.

Support is present in the hardware for monitoring the status of the
transition using a second GPIO. This is not currently implemented
but platform data is provided for it - the driver currently assumes
that the device will be configured to transition immediately - but
platform data is provided to reduce merge issues once it is.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/wm831x-dcdc.c  | 207 +++++++++++++++++++++++++++++++++++----
 include/linux/mfd/wm831x/pdata.h |  17 ++++
 2 files changed, 206 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 2eefc1a0cf08..0a6577577e8d 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -19,6 +19,8 @@
 #include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/gpio.h>
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/regulator.h>
@@ -39,6 +41,7 @@
 #define WM831X_DCDC_CONTROL_2     1
 #define WM831X_DCDC_ON_CONFIG     2
 #define WM831X_DCDC_SLEEP_CONTROL 3
+#define WM831X_DCDC_DVS_CONTROL   4
 
 /*
  * Shared
@@ -50,6 +53,10 @@ struct wm831x_dcdc {
 	int base;
 	struct wm831x *wm831x;
 	struct regulator_dev *regulator;
+	int dvs_gpio;
+	int dvs_gpio_state;
+	int on_vsel;
+	int dvs_vsel;
 };
 
 static int wm831x_dcdc_is_enabled(struct regulator_dev *rdev)
@@ -240,11 +247,9 @@ static int wm831x_buckv_list_voltage(struct regulator_dev *rdev,
 	return -EINVAL;
 }
 
-static int wm831x_buckv_set_voltage_int(struct regulator_dev *rdev, int reg,
-					 int min_uV, int max_uV)
+static int wm831x_buckv_select_min_voltage(struct regulator_dev *rdev,
+					   int min_uV, int max_uV)
 {
-	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	struct wm831x *wm831x = dcdc->wm831x;
 	u16 vsel;
 
 	if (min_uV < 600000)
@@ -257,39 +262,126 @@ static int wm831x_buckv_set_voltage_int(struct regulator_dev *rdev, int reg,
 	if (wm831x_buckv_list_voltage(rdev, vsel) > max_uV)
 		return -EINVAL;
 
-	return wm831x_set_bits(wm831x, reg, WM831X_DC1_ON_VSEL_MASK, vsel);
+	return vsel;
+}
+
+static int wm831x_buckv_select_max_voltage(struct regulator_dev *rdev,
+					   int min_uV, int max_uV)
+{
+	u16 vsel;
+
+	if (max_uV < 600000 || max_uV > 1800000)
+		return -EINVAL;
+
+	vsel = ((max_uV - 600000) / 12500) + 8;
+
+	if (wm831x_buckv_list_voltage(rdev, vsel) < min_uV ||
+	    wm831x_buckv_list_voltage(rdev, vsel) < max_uV)
+		return -EINVAL;
+
+	return vsel;
+}
+
+static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+
+	if (state == dcdc->dvs_gpio_state)
+		return 0;
+
+	dcdc->dvs_gpio_state = state;
+	gpio_set_value(dcdc->dvs_gpio, state);
+
+	/* Should wait for DVS state change to be asserted if we have
+	 * a GPIO for it, for now assume the device is configured
+	 * for the fastest possible transition.
+	 */
+
+	return 0;
 }
 
 static int wm831x_buckv_set_voltage(struct regulator_dev *rdev,
-				     int min_uV, int max_uV)
+				    int min_uV, int max_uV)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	struct wm831x *wm831x = dcdc->wm831x;
+	int on_reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int dvs_reg = dcdc->base + WM831X_DCDC_DVS_CONTROL;
+	int vsel, ret;
+
+	vsel = wm831x_buckv_select_min_voltage(rdev, min_uV, max_uV);
+	if (vsel < 0)
+		return vsel;
+
+	/* If this value is already set then do a GPIO update if we can */
+	if (dcdc->dvs_gpio && dcdc->on_vsel == vsel)
+		return wm831x_buckv_set_dvs(rdev, 0);
+
+	if (dcdc->dvs_gpio && dcdc->dvs_vsel == vsel)
+		return wm831x_buckv_set_dvs(rdev, 1);
+
+	/* Always set the ON status to the minimum voltage */
+	ret = wm831x_set_bits(wm831x, on_reg, WM831X_DC1_ON_VSEL_MASK, vsel);
+	if (ret < 0)
+		return ret;
+	dcdc->on_vsel = vsel;
+
+	if (!dcdc->dvs_gpio)
+		return ret;
+
+	/* Kick the voltage transition now */
+	ret = wm831x_buckv_set_dvs(rdev, 0);
+	if (ret < 0)
+		return ret;
+
+	/* Set the high voltage as the DVS voltage.  This is optimised
+	 * for CPUfreq usage, most processors will keep the maximum
+	 * voltage constant and lower the minimum with the frequency. */
+	vsel = wm831x_buckv_select_max_voltage(rdev, min_uV, max_uV);
+	if (vsel < 0) {
+		/* This should never happen - at worst the same vsel
+		 * should be chosen */
+		WARN_ON(vsel < 0);
+		return 0;
+	}
+
+	/* Don't bother if it's the same VSEL we're already using */
+	if (vsel == dcdc->on_vsel)
+		return 0;
 
-	return wm831x_buckv_set_voltage_int(rdev, reg, min_uV, max_uV);
+	ret = wm831x_set_bits(wm831x, dvs_reg, WM831X_DC1_DVS_VSEL_MASK, vsel);
+	if (ret == 0)
+		dcdc->dvs_vsel = vsel;
+	else
+		dev_warn(wm831x->dev, "Failed to set DCDC DVS VSEL: %d\n",
+			 ret);
+
+	return 0;
 }
 
 static int wm831x_buckv_set_suspend_voltage(struct regulator_dev *rdev,
-					     int uV)
+					    int uV)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
 	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+	int vsel;
+
+	vsel = wm831x_buckv_select_min_voltage(rdev, uV, uV);
+	if (vsel < 0)
+		return vsel;
 
-	return wm831x_buckv_set_voltage_int(rdev, reg, uV, uV);
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_SLP_VSEL_MASK, vsel);
 }
 
 static int wm831x_buckv_get_voltage(struct regulator_dev *rdev)
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
-	struct wm831x *wm831x = dcdc->wm831x;
-	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
-	int val;
 
-	val = wm831x_reg_read(wm831x, reg);
-	if (val < 0)
-		return val;
-
-	return wm831x_buckv_list_voltage(rdev, val & WM831X_DC1_ON_VSEL_MASK);
+	if (dcdc->dvs_gpio && dcdc->dvs_gpio_state)
+		return wm831x_buckv_list_voltage(rdev, dcdc->dvs_vsel);
+	else
+		return wm831x_buckv_list_voltage(rdev, dcdc->on_vsel);
 }
 
 /* Current limit options */
@@ -346,6 +438,64 @@ static struct regulator_ops wm831x_buckv_ops = {
 	.set_suspend_mode = wm831x_dcdc_set_suspend_mode,
 };
 
+/*
+ * Set up DVS control.  We just log errors since we can still run
+ * (with reduced performance) if we fail.
+ */
+static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc,
+					    struct wm831x_buckv_pdata *pdata)
+{
+	struct wm831x *wm831x = dcdc->wm831x;
+	int ret;
+	u16 ctrl;
+
+	if (!pdata || !pdata->dvs_gpio)
+		return;
+
+	switch (pdata->dvs_control_src) {
+	case 1:
+		ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT;
+		break;
+	case 2:
+		ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n",
+			pdata->dvs_control_src, dcdc->name);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL,
+			      WM831X_DC1_DVS_SRC_MASK, ctrl);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n",
+			dcdc->name, ret);
+		return;
+	}
+
+	ret = gpio_request(pdata->dvs_gpio, "DCDC DVS");
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n",
+			dcdc->name, ret);
+		return;
+	}
+
+	/* gpiolib won't let us read the GPIO status so pick the higher
+	 * of the two existing voltages so we take it as platform data.
+	 */
+	dcdc->dvs_gpio_state = pdata->dvs_init_state;
+
+	ret = gpio_direction_output(pdata->dvs_gpio, dcdc->dvs_gpio_state);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to enable %s DVS GPIO: %d\n",
+			dcdc->name, ret);
+		gpio_free(pdata->dvs_gpio);
+		return;
+	}
+
+	dcdc->dvs_gpio = pdata->dvs_gpio;
+}
+
 static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
@@ -384,6 +534,23 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 	dcdc->desc.ops = &wm831x_buckv_ops;
 	dcdc->desc.owner = THIS_MODULE;
 
+	ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read ON VSEL: %d\n", ret);
+		goto err;
+	}
+	dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK;
+
+	ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret);
+		goto err;
+	}
+	dcdc->dvs_vsel = ret & WM831X_DC1_DVS_VSEL_MASK;
+
+	if (pdata->dcdc[id])
+		wm831x_buckv_dvs_init(dcdc, pdata->dcdc[id]->driver_data);
+
 	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
 					     pdata->dcdc[id], dcdc);
 	if (IS_ERR(dcdc->regulator)) {
@@ -422,6 +589,8 @@ err_uv:
 err_regulator:
 	regulator_unregister(dcdc->regulator);
 err:
+	if (dcdc->dvs_gpio)
+		gpio_free(dcdc->dvs_gpio);
 	kfree(dcdc);
 	return ret;
 }
@@ -434,6 +603,8 @@ static __devexit int wm831x_buckv_remove(struct platform_device *pdev)
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "HC"), dcdc);
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
+	if (dcdc->dvs_gpio)
+		gpio_free(dcdc->dvs_gpio);
 	kfree(dcdc);
 
 	return 0;
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 415c228743d5..fd322aca33ba 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -41,6 +41,23 @@ struct wm831x_battery_pdata {
 	int timeout;        /** Charge cycle timeout, in minutes */
 };
 
+/**
+ * Configuration for the WM831x DC-DC BuckWise convertors.  This
+ * should be passed as driver_data in the regulator_init_data.
+ *
+ * Currently all the configuration is for the fast DVS switching
+ * support of the devices.  This allows MFPs on the device to be
+ * configured as an input to switch between two output voltages,
+ * allowing voltage transitions without the expense of an access over
+ * I2C or SPI buses.
+ */
+struct wm831x_buckv_pdata {
+	int dvs_gpio;        /** CPU GPIO to use for DVS switching */
+	int dvs_control_src; /** Hardware DVS source to use (1 or 2) */
+	int dvs_init_state;  /** DVS state to expect on startup */
+	int dvs_state_gpio;  /** CPU GPIO to use for monitoring status */
+};
+
 /* Sources for status LED configuration.  Values are register values
  * plus 1 to allow for a zero default for preserve.
  */
-- 
cgit v1.2.3


From 638f85c54f4fed0f8f1fbc23745a8f334112e892 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 22 Oct 2009 16:31:33 +0100
Subject: regulator: Handle regulators without suspend mode configuration

Since some regulators in the system may not support suspend mode
configuration we need to allow some regulators to have a missing
suspend mode configuration. Do this by requiring that disabled
regulators are explicitly flagged and then skip over regulators
that have no state specified.

Try to avoid surprises by warning the if we could set the state
but no configuration is provided.  This also ensures that an all
zeros configuration generates a warning rather than silently
disabling the regulator.

Reported-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c          | 25 ++++++++++++++++++++++---
 include/linux/regulator/machine.h |  6 +++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 7d0c0d7d90ca..2dab0d9e11f5 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -581,10 +581,29 @@ static int suspend_set_state(struct regulator_dev *rdev,
 	struct regulator_state *rstate)
 {
 	int ret = 0;
+	bool can_set_state;
 
-	/* enable & disable are mandatory for suspend control */
-	if (!rdev->desc->ops->set_suspend_enable ||
-		!rdev->desc->ops->set_suspend_disable) {
+	can_set_state = rdev->desc->ops->set_suspend_enable &&
+		rdev->desc->ops->set_suspend_disable;
+
+	/* If we have no suspend mode configration don't set anything;
+	 * only warn if the driver actually makes the suspend mode
+	 * configurable.
+	 */
+	if (!rstate->enabled && !rstate->disabled) {
+		if (can_set_state)
+			printk(KERN_WARNING "%s: No configuration for %s\n",
+			       __func__, rdev_get_name(rdev));
+		return 0;
+	}
+
+	if (rstate->enabled && rstate->disabled) {
+		printk(KERN_ERR "%s: invalid configuration for %s\n",
+		       __func__, rdev_get_name(rdev));
+		return -EINVAL;
+	}
+
+	if (!can_set_state) {
 		printk(KERN_ERR "%s: no way to set suspend state\n",
 			__func__);
 		return -EINVAL;
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 87f5f176d4ef..234a8476cba8 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -43,16 +43,20 @@ struct regulator;
 /**
  * struct regulator_state - regulator state during low power system states
  *
- * This describes a regulators state during a system wide low power state.
+ * This describes a regulators state during a system wide low power
+ * state.  One of enabled or disabled must be set for the
+ * configuration to be applied.
  *
  * @uV: Operating voltage during suspend.
  * @mode: Operating mode during suspend.
  * @enabled: Enabled during suspend.
+ * @disabled: Disabled during suspend.
  */
 struct regulator_state {
 	int uV;	/* suspend voltage */
 	unsigned int mode; /* suspend regulator operating mode */
 	int enabled; /* is regulator enabled in this suspend state */
+	int disabled; /* is the regulator disbled in this suspend state */
 };
 
 /**
-- 
cgit v1.2.3


From b56daf13eb77ee24f48f0bb34c2492f46a432ec4 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <lrg@slimlogic.co.uk>
Date: Wed, 11 Nov 2009 14:16:10 +0000
Subject: regulator: consumer.h - fix build when consumer.h is #included first.

consumer.h requires device.h for stand alone build.

Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/consumer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 490c5b37b6d7..030d92255c7a 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -35,6 +35,8 @@
 #ifndef __LINUX_REGULATOR_CONSUMER_H_
 #define __LINUX_REGULATOR_CONSUMER_H_
 
+#include <linux/device.h>
+
 /*
  * Regulator operating modes.
  *
-- 
cgit v1.2.3


From d4cc6a2eee98faebf2c7d3ebc4b35541c1d47d21 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Mon, 7 Dec 2009 15:08:13 +0100
Subject: leds: Add LED class driver for regulator driven LEDs.

This driver provides an interface for controlling LEDs (or vibrators)
connected to PMICs for which there is a regulator framework driver.

This driver can be used, for instance, to control vibrator on all Motorola EZX
phones using the pcap-regulator driver services.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/leds/Kconfig           |   6 +
 drivers/leds/Makefile          |   1 +
 drivers/leds/leds-regulator.c  | 242 +++++++++++++++++++++++++++++++++++++++++
 include/linux/leds-regulator.h |  46 ++++++++
 4 files changed, 295 insertions(+)
 create mode 100644 drivers/leds/leds-regulator.c
 create mode 100644 include/linux/leds-regulator.h

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index f12a99676628..8a0e1ec95e4a 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -229,6 +229,12 @@ config LEDS_PWM
 	help
 	  This option enables support for pwm driven LEDs
 
+config LEDS_REGULATOR
+	tristate "REGULATOR driven LED support"
+	depends on LEDS_CLASS && REGULATOR
+	help
+	  This option enables support for regulator driven LEDs.
+
 config LEDS_BD2802
 	tristate "LED driver for BD2802 RGB LED"
 	depends on LEDS_CLASS && I2C
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 176f0c674751..9e63869d7c0d 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
 obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
 obj-$(CONFIG_LEDS_WM8350)		+= leds-wm8350.o
 obj-$(CONFIG_LEDS_PWM)			+= leds-pwm.o
+obj-$(CONFIG_LEDS_REGULATOR)		+= leds-regulator.o
 obj-$(CONFIG_LEDS_INTEL_SS4200)		+= leds-ss4200.o
 obj-$(CONFIG_LEDS_LT3593)		+= leds-lt3593.o
 obj-$(CONFIG_LEDS_ADP5520)		+= leds-adp5520.o
diff --git a/drivers/leds/leds-regulator.c b/drivers/leds/leds-regulator.c
new file mode 100644
index 000000000000..7f00de3ef922
--- /dev/null
+++ b/drivers/leds/leds-regulator.c
@@ -0,0 +1,242 @@
+/*
+ * leds-regulator.c - LED class driver for regulator driven LEDs.
+ *
+ * Copyright (C) 2009 Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * Inspired by leds-wm8350 driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/workqueue.h>
+#include <linux/leds.h>
+#include <linux/leds-regulator.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+
+#define to_regulator_led(led_cdev) \
+	container_of(led_cdev, struct regulator_led, cdev)
+
+struct regulator_led {
+	struct led_classdev cdev;
+	enum led_brightness value;
+	int enabled;
+	struct mutex mutex;
+	struct work_struct work;
+
+	struct regulator *vcc;
+};
+
+static inline int led_regulator_get_max_brightness(struct regulator *supply)
+{
+	int ret;
+	int voltage = regulator_list_voltage(supply, 0);
+
+	if (voltage <= 0)
+		return 1;
+
+	/* even if regulator can't change voltages,
+	 * we still assume it can change status
+	 * and the LED can be turned on and off.
+	 */
+	ret = regulator_set_voltage(supply, voltage, voltage);
+	if (ret < 0)
+		return 1;
+
+	return regulator_count_voltages(supply);
+}
+
+static int led_regulator_get_voltage(struct regulator *supply,
+		enum led_brightness brightness)
+{
+	if (brightness == 0)
+		return -EINVAL;
+
+	return regulator_list_voltage(supply, brightness - 1);
+}
+
+
+static void regulator_led_enable(struct regulator_led *led)
+{
+	int ret;
+
+	if (led->enabled)
+		return;
+
+	ret = regulator_enable(led->vcc);
+	if (ret != 0) {
+		dev_err(led->cdev.dev, "Failed to enable vcc: %d\n", ret);
+		return;
+	}
+
+	led->enabled = 1;
+}
+
+static void regulator_led_disable(struct regulator_led *led)
+{
+	int ret;
+
+	if (!led->enabled)
+		return;
+
+	ret = regulator_disable(led->vcc);
+	if (ret != 0) {
+		dev_err(led->cdev.dev, "Failed to disable vcc: %d\n", ret);
+		return;
+	}
+
+	led->enabled = 0;
+}
+
+static void regulator_led_set_value(struct regulator_led *led)
+{
+	int voltage;
+	int ret;
+
+	mutex_lock(&led->mutex);
+
+	if (led->value == LED_OFF) {
+		regulator_led_disable(led);
+		goto out;
+	}
+
+	if (led->cdev.max_brightness > 1) {
+		voltage = led_regulator_get_voltage(led->vcc, led->value);
+		dev_dbg(led->cdev.dev, "brightness: %d voltage: %d\n",
+				led->value, voltage);
+
+		ret = regulator_set_voltage(led->vcc, voltage, voltage);
+		if (ret != 0)
+			dev_err(led->cdev.dev, "Failed to set voltage %d: %d\n",
+				voltage, ret);
+	}
+
+	regulator_led_enable(led);
+
+out:
+	mutex_unlock(&led->mutex);
+}
+
+static void led_work(struct work_struct *work)
+{
+	struct regulator_led *led;
+
+	led = container_of(work, struct regulator_led, work);
+	regulator_led_set_value(led);
+}
+
+static void regulator_led_brightness_set(struct led_classdev *led_cdev,
+			   enum led_brightness value)
+{
+	struct regulator_led *led = to_regulator_led(led_cdev);
+
+	led->value = value;
+	schedule_work(&led->work);
+}
+
+static int __devinit regulator_led_probe(struct platform_device *pdev)
+{
+	struct led_regulator_platform_data *pdata = pdev->dev.platform_data;
+	struct regulator_led *led;
+	struct regulator *vcc;
+	int ret = 0;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -ENODEV;
+	}
+
+	vcc = regulator_get_exclusive(&pdev->dev, "vled");
+	if (IS_ERR(vcc)) {
+		dev_err(&pdev->dev, "Cannot get vcc for %s\n", pdata->name);
+		return PTR_ERR(vcc);
+	}
+
+	led = kzalloc(sizeof(*led), GFP_KERNEL);
+	if (led == NULL) {
+		ret = -ENOMEM;
+		goto err_vcc;
+	}
+
+	led->cdev.max_brightness = led_regulator_get_max_brightness(vcc);
+	if (pdata->brightness > led->cdev.max_brightness) {
+		dev_err(&pdev->dev, "Invalid default brightness %d\n",
+				pdata->brightness);
+		ret = -EINVAL;
+		goto err_led;
+	}
+	led->value = pdata->brightness;
+
+	led->cdev.brightness_set = regulator_led_brightness_set;
+	led->cdev.name = pdata->name;
+	led->cdev.flags |= LED_CORE_SUSPENDRESUME;
+	led->vcc = vcc;
+
+	mutex_init(&led->mutex);
+	INIT_WORK(&led->work, led_work);
+
+	platform_set_drvdata(pdev, led);
+
+	ret = led_classdev_register(&pdev->dev, &led->cdev);
+	if (ret < 0) {
+		cancel_work_sync(&led->work);
+		goto err_led;
+	}
+
+	/* to expose the default value to userspace */
+	led->cdev.brightness = led->value;
+
+	/* Set the default led status */
+	regulator_led_set_value(led);
+
+	return 0;
+
+err_led:
+	kfree(led);
+err_vcc:
+	regulator_put(vcc);
+	return ret;
+}
+
+static int __devexit regulator_led_remove(struct platform_device *pdev)
+{
+	struct regulator_led *led = platform_get_drvdata(pdev);
+
+	led_classdev_unregister(&led->cdev);
+	cancel_work_sync(&led->work);
+	regulator_led_disable(led);
+	regulator_put(led->vcc);
+	kfree(led);
+	return 0;
+}
+
+static struct platform_driver regulator_led_driver = {
+	.driver = {
+		   .name  = "leds-regulator",
+		   .owner = THIS_MODULE,
+		   },
+	.probe  = regulator_led_probe,
+	.remove = __devexit_p(regulator_led_remove),
+};
+
+static int __init regulator_led_init(void)
+{
+	return platform_driver_register(&regulator_led_driver);
+}
+module_init(regulator_led_init);
+
+static void __exit regulator_led_exit(void)
+{
+	platform_driver_unregister(&regulator_led_driver);
+}
+module_exit(regulator_led_exit);
+
+MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>");
+MODULE_DESCRIPTION("Regulator driven LED driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:leds-regulator");
diff --git a/include/linux/leds-regulator.h b/include/linux/leds-regulator.h
new file mode 100644
index 000000000000..5a8eb389aab8
--- /dev/null
+++ b/include/linux/leds-regulator.h
@@ -0,0 +1,46 @@
+/*
+ * leds-regulator.h - platform data structure for regulator driven LEDs.
+ *
+ * Copyright (C) 2009 Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __LINUX_LEDS_REGULATOR_H
+#define __LINUX_LEDS_REGULATOR_H
+
+/*
+ * Use "vled" as supply id when declaring the regulator consumer:
+ *
+ * static struct regulator_consumer_supply pcap_regulator_VVIB_consumers [] = {
+ * 	{ .dev_name = "leds-regulator.0", supply = "vled" },
+ * };
+ *
+ * If you have several regulator driven LEDs, you can append a numerical id to
+ * .dev_name as done above, and use the same id when declaring the platform
+ * device:
+ *
+ * static struct led_regulator_platform_data a780_vibrator_data = {
+ * 	.name   = "a780::vibrator",
+ * };
+ *
+ * static struct platform_device a780_vibrator = {
+ * 	.name = "leds-regulator",
+ * 	.id   = 0,
+ * 	.dev  = {
+ * 		.platform_data = &a780_vibrator_data,
+ * 	},
+ * };
+ */
+
+#include <linux/leds.h>
+
+struct led_regulator_platform_data {
+	char *name;                     /* LED name as expected by LED class */
+	enum led_brightness brightness; /* initial brightness value */
+};
+
+#endif /* __LINUX_LEDS_REGULATOR_H */
-- 
cgit v1.2.3


From 9695fff8f84d7ab849139750036e443b85804edd Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Sat, 28 Nov 2009 12:55:51 +0100
Subject: leds: leds-pca9532.h- indent with tabs, not spaces

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds-pca9532.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
index 96eea90f01a8..f158eb1149aa 100644
--- a/include/linux/leds-pca9532.h
+++ b/include/linux/leds-pca9532.h
@@ -32,7 +32,7 @@ struct pca9532_led {
 	struct i2c_client *client;
 	char *name;
 	struct led_classdev ldev;
-       struct work_struct work;
+	struct work_struct work;
 	enum pca9532_type type;
 	enum pca9532_state state;
 };
-- 
cgit v1.2.3


From 1998111582f5d726ca4dbf9d68935d9e7c994374 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Sun, 29 Nov 2009 13:12:21 +0100
Subject: leds: leds-lp3944.h - remove unneeded includes

These were needed in the first version of the driver because we used to expose
workqueue and led class details in the header file, now we don't.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds-lp3944.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leds-lp3944.h b/include/linux/leds-lp3944.h
index afc9f9fd70f5..2618aa9063bc 100644
--- a/include/linux/leds-lp3944.h
+++ b/include/linux/leds-lp3944.h
@@ -12,9 +12,6 @@
 #ifndef __LINUX_LEDS_LP3944_H
 #define __LINUX_LEDS_LP3944_H
 
-#include <linux/leds.h>
-#include <linux/workqueue.h>
-
 #define LP3944_LED0 0
 #define LP3944_LED1 1
 #define LP3944_LED2 2
-- 
cgit v1.2.3


From cfc3899fcd0b3b990b29d3d33f75f4edf715e7d1 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Tue, 10 Nov 2009 17:20:40 +0000
Subject: backlight: Pass device through notify callback in the pwm driver

Add the device to the notify callback's arguments in the PWM backlight
driver. This brings the notify callback into line with the other
callbacks defined by this driver.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Simtec Linux Team <linux@simtec.co.uk>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/pwm_bl.c | 9 ++++++---
 include/linux/pwm_backlight.h    | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index df9e0b32cf39..9d2ec2a1cce8 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -22,8 +22,10 @@
 
 struct pwm_bl_data {
 	struct pwm_device	*pwm;
+	struct device		*dev;
 	unsigned int		period;
-	int			(*notify)(int brightness);
+	int			(*notify)(struct device *,
+					  int brightness);
 };
 
 static int pwm_backlight_update_status(struct backlight_device *bl)
@@ -39,7 +41,7 @@ static int pwm_backlight_update_status(struct backlight_device *bl)
 		brightness = 0;
 
 	if (pb->notify)
-		brightness = pb->notify(brightness);
+		brightness = pb->notify(pb->dev, brightness);
 
 	if (brightness == 0) {
 		pwm_config(pb->pwm, 0, pb->period);
@@ -88,6 +90,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
 	pb->period = data->pwm_period_ns;
 	pb->notify = data->notify;
+	pb->dev = &pdev->dev;
 
 	pb->pwm = pwm_request(data->pwm_id, "backlight");
 	if (IS_ERR(pb->pwm)) {
@@ -146,7 +149,7 @@ static int pwm_backlight_suspend(struct platform_device *pdev,
 	struct pwm_bl_data *pb = dev_get_drvdata(&bl->dev);
 
 	if (pb->notify)
-		pb->notify(0);
+		pb->notify(pb->dev, 0);
 	pwm_config(pb->pwm, 0, pb->period);
 	pwm_disable(pb->pwm);
 	return 0;
diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index 7a9754c96775..01b3d759f1fc 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -10,7 +10,7 @@ struct platform_pwm_backlight_data {
 	unsigned int dft_brightness;
 	unsigned int pwm_period_ns;
 	int (*init)(struct device *dev);
-	int (*notify)(int brightness);
+	int (*notify)(struct device *dev, int brightness);
 	void (*exit)(struct device *dev);
 };
 
-- 
cgit v1.2.3


From 733421516b42c44b9e21f1793c430cc801ef8324 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:27 +0100
Subject: sched: Move TASK_STATE_TO_CHAR_STR near the TASK_state bits

So that we don't keep forgetting about it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121829.815779372@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 973b2b89f86d..c28ed1b1d7c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -193,6 +193,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
 
+#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
@@ -2595,8 +2597,6 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
-
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From 44d90df6b757c59651ddd55f1a84f28132b50d29 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:28 +0100
Subject: sched: Add missing state chars to TASK_STATE_TO_CHAR_STR

We grew 3 new task states since the last time someone touched
it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121829.892737686@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c28ed1b1d7c2..94858df38a87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -193,7 +193,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
 
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
-- 
cgit v1.2.3


From e1781538cf5c870ab696e9b8f0a5c498d3900f2f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 13:16:30 +0100
Subject: sched: Assert task state bits at build time

Since everybody is lazy and prone to forgetting things, make the
compiler help us a bit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20091217121830.060186433@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/array.c       | 18 ++++++++++--------
 include/linux/sched.h |  4 ++++
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 96361e8fa3a8..f560325c444f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -134,14 +134,14 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
  * simple bit tests.
  */
 static const char *task_state_array[] = {
-	"R (running)",		/*  0 */
-	"S (sleeping)",		/*  1 */
-	"D (disk sleep)",	/*  2 */
-	"T (stopped)",		/*  4 */
-	"t (tracing stop)",	/*  8 */
-	"Z (zombie)",		/* 16 */
-	"X (dead)",		/* 32 */
-	"x (dead)",		/* 64 */
+	"R (running)",		/*   0 */
+	"S (sleeping)",		/*   1 */
+	"D (disk sleep)",	/*   2 */
+	"T (stopped)",		/*   4 */
+	"t (tracing stop)",	/*   8 */
+	"Z (zombie)",		/*  16 */
+	"X (dead)",		/*  32 */
+	"x (dead)",		/*  64 */
 	"K (wakekill)",		/* 128 */
 	"W (waking)",		/* 256 */
 };
@@ -151,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk)
 	unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
 	const char **p = &task_state_array[0];
 
+	BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+
 	while (state) {
 		p++;
 		state >>= 1;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 94858df38a87..37543876ddf5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -192,9 +192,13 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define TASK_DEAD		64
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
+#define TASK_STATE_MAX		512
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
 
+extern char ___assert_task_state[1 - 2*!!(
+		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
+
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
-- 
cgit v1.2.3


From e24c745272072fd2abe55209f1949b7b7ee602a7 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Mon, 14 Dec 2009 14:20:22 -0800
Subject: spi: controller driver for Designware SPI core

Driver for the Designware SPI core, it supports multipul interfaces like
PCI/APB etc.  User can use "dw_apb_ssi_db.pdf" from Synopsys as HW
datasheet.

[randy.dunlap@oracle.com: fix build]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Feng Tang <feng.tang@intel.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig        |  10 +
 drivers/spi/Makefile       |   2 +
 drivers/spi/dw_spi.c       | 944 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/spi/dw_spi_pci.c   | 169 ++++++++
 include/linux/spi/dw_spi.h | 212 ++++++++++
 5 files changed, 1337 insertions(+)
 create mode 100644 drivers/spi/dw_spi.c
 create mode 100644 drivers/spi/dw_spi_pci.c
 create mode 100644 include/linux/spi/dw_spi.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 07e5453f7b18..d7c1741c4c5b 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -292,6 +292,16 @@ config SPI_NUC900
 # Add new SPI master controllers in alphabetical order above this line
 #
 
+config SPI_DESIGNWARE
+	bool "DesignWare SPI controller core support"
+	depends on SPI_MASTER
+	help
+	  general driver for SPI controller core from DesignWare
+
+config SPI_DW_PCI
+	tristate "PCI interface driver for DW SPI core"
+	depends on SPI_DESIGNWARE && PCI
+
 #
 # There are lots of SPI device types, with sensors and memory
 # being probably the most widely used ones.
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index ed8c1675b52f..a909e39f7e7c 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -16,6 +16,8 @@ obj-$(CONFIG_SPI_BFIN)			+= spi_bfin5xx.o
 obj-$(CONFIG_SPI_BITBANG)		+= spi_bitbang.o
 obj-$(CONFIG_SPI_AU1550)		+= au1550_spi.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o
+obj-$(CONFIG_SPI_DESIGNWARE)		+= dw_spi.o
+obj-$(CONFIG_SPI_DW_PCI)		+= dw_spi_pci.o
 obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
 obj-$(CONFIG_SPI_IMX)			+= spi_imx.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c
new file mode 100644
index 000000000000..31620fae77be
--- /dev/null
+++ b/drivers/spi/dw_spi.c
@@ -0,0 +1,944 @@
+/*
+ * dw_spi.c - Designware SPI core controller driver (refer pxa2xx_spi.c)
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/delay.h>
+
+#include <linux/spi/dw_spi.h>
+#include <linux/spi/spi.h>
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
+
+#define START_STATE	((void *)0)
+#define RUNNING_STATE	((void *)1)
+#define DONE_STATE	((void *)2)
+#define ERROR_STATE	((void *)-1)
+
+#define QUEUE_RUNNING	0
+#define QUEUE_STOPPED	1
+
+#define MRST_SPI_DEASSERT	0
+#define MRST_SPI_ASSERT		1
+
+/* Slave spi_dev related */
+struct chip_data {
+	u16 cr0;
+	u8 cs;			/* chip select pin */
+	u8 n_bytes;		/* current is a 1/2/4 byte op */
+	u8 tmode;		/* TR/TO/RO/EEPROM */
+	u8 type;		/* SPI/SSP/MicroWire */
+
+	u8 poll_mode;		/* 1 means use poll mode */
+
+	u32 dma_width;
+	u32 rx_threshold;
+	u32 tx_threshold;
+	u8 enable_dma;
+	u8 bits_per_word;
+	u16 clk_div;		/* baud rate divider */
+	u32 speed_hz;		/* baud rate */
+	int (*write)(struct dw_spi *dws);
+	int (*read)(struct dw_spi *dws);
+	void (*cs_control)(u32 command);
+};
+
+#ifdef CONFIG_DEBUG_FS
+static int spi_show_regs_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+#define SPI_REGS_BUFSIZE	1024
+static ssize_t  spi_show_regs(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct dw_spi *dws;
+	char *buf;
+	u32 len = 0;
+	ssize_t ret;
+
+	dws = file->private_data;
+
+	buf = kzalloc(SPI_REGS_BUFSIZE, GFP_KERNEL);
+	if (!buf)
+		return 0;
+
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"MRST SPI0 registers:\n");
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"=================================\n");
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"CTRL0: \t\t0x%08x\n", dw_readl(dws, ctrl0));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"CTRL1: \t\t0x%08x\n", dw_readl(dws, ctrl1));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SSIENR: \t0x%08x\n", dw_readl(dws, ssienr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SER: \t\t0x%08x\n", dw_readl(dws, ser));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"BAUDR: \t\t0x%08x\n", dw_readl(dws, baudr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"TXFTLR: \t0x%08x\n", dw_readl(dws, txfltr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"RXFTLR: \t0x%08x\n", dw_readl(dws, rxfltr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"TXFLR: \t\t0x%08x\n", dw_readl(dws, txflr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"RXFLR: \t\t0x%08x\n", dw_readl(dws, rxflr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"SR: \t\t0x%08x\n", dw_readl(dws, sr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"IMR: \t\t0x%08x\n", dw_readl(dws, imr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"ISR: \t\t0x%08x\n", dw_readl(dws, isr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMACR: \t\t0x%08x\n", dw_readl(dws, dmacr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMATDLR: \t0x%08x\n", dw_readl(dws, dmatdlr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"DMARDLR: \t0x%08x\n", dw_readl(dws, dmardlr));
+	len += snprintf(buf + len, SPI_REGS_BUFSIZE - len,
+			"=================================\n");
+
+	ret =  simple_read_from_buffer(user_buf, count, ppos, buf, len);
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations mrst_spi_regs_ops = {
+	.owner		= THIS_MODULE,
+	.open		= spi_show_regs_open,
+	.read		= spi_show_regs,
+};
+
+static int mrst_spi_debugfs_init(struct dw_spi *dws)
+{
+	dws->debugfs = debugfs_create_dir("mrst_spi", NULL);
+	if (!dws->debugfs)
+		return -ENOMEM;
+
+	debugfs_create_file("registers", S_IFREG | S_IRUGO,
+		dws->debugfs, (void *)dws, &mrst_spi_regs_ops);
+	return 0;
+}
+
+static void mrst_spi_debugfs_remove(struct dw_spi *dws)
+{
+	if (dws->debugfs)
+		debugfs_remove_recursive(dws->debugfs);
+}
+
+#else
+static inline int mrst_spi_debugfs_init(struct dw_spi *dws)
+{
+}
+
+static inline void mrst_spi_debugfs_remove(struct dw_spi *dws)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static void wait_till_not_busy(struct dw_spi *dws)
+{
+	unsigned long end = jiffies + usecs_to_jiffies(1000);
+
+	while (time_before(jiffies, end)) {
+		if (!(dw_readw(dws, sr) & SR_BUSY))
+			return;
+	}
+	dev_err(&dws->master->dev,
+		"DW SPI: Stutus keeps busy for 1000us after a read/write!\n");
+}
+
+static void flush(struct dw_spi *dws)
+{
+	while (dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		dw_readw(dws, dr);
+
+	wait_till_not_busy(dws);
+}
+
+static void null_cs_control(u32 command)
+{
+}
+
+static int null_writer(struct dw_spi *dws)
+{
+	u8 n_bytes = dws->n_bytes;
+
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+	dw_writew(dws, dr, 0);
+	dws->tx += n_bytes;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int null_reader(struct dw_spi *dws)
+{
+	u8 n_bytes = dws->n_bytes;
+
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		dw_readw(dws, dr);
+		dws->rx += n_bytes;
+	}
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static int u8_writer(struct dw_spi *dws)
+{
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+
+	dw_writew(dws, dr, *(u8 *)(dws->tx));
+	++dws->tx;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int u8_reader(struct dw_spi *dws)
+{
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		*(u8 *)(dws->rx) = dw_readw(dws, dr);
+		++dws->rx;
+	}
+
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static int u16_writer(struct dw_spi *dws)
+{
+	if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL)
+		|| (dws->tx == dws->tx_end))
+		return 0;
+
+	dw_writew(dws, dr, *(u16 *)(dws->tx));
+	dws->tx += 2;
+
+	wait_till_not_busy(dws);
+	return 1;
+}
+
+static int u16_reader(struct dw_spi *dws)
+{
+	u16 temp;
+
+	while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT)
+		&& (dws->rx < dws->rx_end)) {
+		temp = dw_readw(dws, dr);
+		*(u16 *)(dws->rx) = temp;
+		dws->rx += 2;
+	}
+
+	wait_till_not_busy(dws);
+	return dws->rx == dws->rx_end;
+}
+
+static void *next_transfer(struct dw_spi *dws)
+{
+	struct spi_message *msg = dws->cur_msg;
+	struct spi_transfer *trans = dws->cur_transfer;
+
+	/* Move to next transfer */
+	if (trans->transfer_list.next != &msg->transfers) {
+		dws->cur_transfer =
+			list_entry(trans->transfer_list.next,
+					struct spi_transfer,
+					transfer_list);
+		return RUNNING_STATE;
+	} else
+		return DONE_STATE;
+}
+
+/*
+ * Note: first step is the protocol driver prepares
+ * a dma-capable memory, and this func just need translate
+ * the virt addr to physical
+ */
+static int map_dma_buffers(struct dw_spi *dws)
+{
+	if (!dws->cur_msg->is_dma_mapped || !dws->dma_inited
+		|| !dws->cur_chip->enable_dma)
+		return 0;
+
+	if (dws->cur_transfer->tx_dma)
+		dws->tx_dma = dws->cur_transfer->tx_dma;
+
+	if (dws->cur_transfer->rx_dma)
+		dws->rx_dma = dws->cur_transfer->rx_dma;
+
+	return 1;
+}
+
+/* Caller already set message->status; dma and pio irqs are blocked */
+static void giveback(struct dw_spi *dws)
+{
+	struct spi_transfer *last_transfer;
+	unsigned long flags;
+	struct spi_message *msg;
+
+	spin_lock_irqsave(&dws->lock, flags);
+	msg = dws->cur_msg;
+	dws->cur_msg = NULL;
+	dws->cur_transfer = NULL;
+	dws->prev_chip = dws->cur_chip;
+	dws->cur_chip = NULL;
+	dws->dma_mapped = 0;
+	queue_work(dws->workqueue, &dws->pump_messages);
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	last_transfer = list_entry(msg->transfers.prev,
+					struct spi_transfer,
+					transfer_list);
+
+	if (!last_transfer->cs_change)
+		dws->cs_control(MRST_SPI_DEASSERT);
+
+	msg->state = NULL;
+	if (msg->complete)
+		msg->complete(msg->context);
+}
+
+static void int_error_stop(struct dw_spi *dws, const char *msg)
+{
+	/* Stop and reset hw */
+	flush(dws);
+	spi_enable_chip(dws, 0);
+
+	dev_err(&dws->master->dev, "%s\n", msg);
+	dws->cur_msg->state = ERROR_STATE;
+	tasklet_schedule(&dws->pump_transfers);
+}
+
+static void transfer_complete(struct dw_spi *dws)
+{
+	/* Update total byte transfered return count actual bytes read */
+	dws->cur_msg->actual_length += dws->len;
+
+	/* Move to next transfer */
+	dws->cur_msg->state = next_transfer(dws);
+
+	/* Handle end of message */
+	if (dws->cur_msg->state == DONE_STATE) {
+		dws->cur_msg->status = 0;
+		giveback(dws);
+	} else
+		tasklet_schedule(&dws->pump_transfers);
+}
+
+static irqreturn_t interrupt_transfer(struct dw_spi *dws)
+{
+	u16 irq_status, irq_mask = 0x3f;
+
+	irq_status = dw_readw(dws, isr) & irq_mask;
+	/* Error handling */
+	if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) {
+		dw_readw(dws, txoicr);
+		dw_readw(dws, rxoicr);
+		dw_readw(dws, rxuicr);
+		int_error_stop(dws, "interrupt_transfer: fifo overrun");
+		return IRQ_HANDLED;
+	}
+
+	/* INT comes from tx */
+	if (dws->tx && (irq_status & SPI_INT_TXEI)) {
+		while (dws->tx < dws->tx_end)
+			dws->write(dws);
+
+		if (dws->tx == dws->tx_end) {
+			spi_mask_intr(dws, SPI_INT_TXEI);
+			transfer_complete(dws);
+		}
+	}
+
+	/* INT comes from rx */
+	if (dws->rx && (irq_status & SPI_INT_RXFI)) {
+		if (dws->read(dws))
+			transfer_complete(dws);
+	}
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t dw_spi_irq(int irq, void *dev_id)
+{
+	struct dw_spi *dws = dev_id;
+
+	if (!dws->cur_msg) {
+		spi_mask_intr(dws, SPI_INT_TXEI);
+		/* Never fail */
+		return IRQ_HANDLED;
+	}
+
+	return dws->transfer_handler(dws);
+}
+
+/* Must be called inside pump_transfers() */
+static void poll_transfer(struct dw_spi *dws)
+{
+	if (dws->tx) {
+		while (dws->write(dws))
+			dws->read(dws);
+	}
+
+	dws->read(dws);
+	transfer_complete(dws);
+}
+
+static void dma_transfer(struct dw_spi *dws, int cs_change)
+{
+}
+
+static void pump_transfers(unsigned long data)
+{
+	struct dw_spi *dws = (struct dw_spi *)data;
+	struct spi_message *message = NULL;
+	struct spi_transfer *transfer = NULL;
+	struct spi_transfer *previous = NULL;
+	struct spi_device *spi = NULL;
+	struct chip_data *chip = NULL;
+	u8 bits = 0;
+	u8 imask = 0;
+	u8 cs_change = 0;
+	u16 clk_div = 0;
+	u32 speed = 0;
+	u32 cr0 = 0;
+
+	/* Get current state information */
+	message = dws->cur_msg;
+	transfer = dws->cur_transfer;
+	chip = dws->cur_chip;
+	spi = message->spi;
+
+	if (message->state == ERROR_STATE) {
+		message->status = -EIO;
+		goto early_exit;
+	}
+
+	/* Handle end of message */
+	if (message->state == DONE_STATE) {
+		message->status = 0;
+		goto early_exit;
+	}
+
+	/* Delay if requested at end of transfer*/
+	if (message->state == RUNNING_STATE) {
+		previous = list_entry(transfer->transfer_list.prev,
+					struct spi_transfer,
+					transfer_list);
+		if (previous->delay_usecs)
+			udelay(previous->delay_usecs);
+	}
+
+	dws->n_bytes = chip->n_bytes;
+	dws->dma_width = chip->dma_width;
+	dws->cs_control = chip->cs_control;
+
+	dws->rx_dma = transfer->rx_dma;
+	dws->tx_dma = transfer->tx_dma;
+	dws->tx = (void *)transfer->tx_buf;
+	dws->tx_end = dws->tx + transfer->len;
+	dws->rx = transfer->rx_buf;
+	dws->rx_end = dws->rx + transfer->len;
+	dws->write = dws->tx ? chip->write : null_writer;
+	dws->read = dws->rx ? chip->read : null_reader;
+	dws->cs_change = transfer->cs_change;
+	dws->len = dws->cur_transfer->len;
+	if (chip != dws->prev_chip)
+		cs_change = 1;
+
+	cr0 = chip->cr0;
+
+	/* Handle per transfer options for bpw and speed */
+	if (transfer->speed_hz) {
+		speed = chip->speed_hz;
+
+		if (transfer->speed_hz != speed) {
+			speed = transfer->speed_hz;
+			if (speed > dws->max_freq) {
+				printk(KERN_ERR "MRST SPI0: unsupported"
+					"freq: %dHz\n", speed);
+				message->status = -EIO;
+				goto early_exit;
+			}
+
+			/* clk_div doesn't support odd number */
+			clk_div = dws->max_freq / speed;
+			clk_div = (clk_div >> 1) << 1;
+
+			chip->speed_hz = speed;
+			chip->clk_div = clk_div;
+		}
+	}
+	if (transfer->bits_per_word) {
+		bits = transfer->bits_per_word;
+
+		switch (bits) {
+		case 8:
+			dws->n_bytes = 1;
+			dws->dma_width = 1;
+			dws->read = (dws->read != null_reader) ?
+					u8_reader : null_reader;
+			dws->write = (dws->write != null_writer) ?
+					u8_writer : null_writer;
+			break;
+		case 16:
+			dws->n_bytes = 2;
+			dws->dma_width = 2;
+			dws->read = (dws->read != null_reader) ?
+					u16_reader : null_reader;
+			dws->write = (dws->write != null_writer) ?
+					u16_writer : null_writer;
+			break;
+		default:
+			printk(KERN_ERR "MRST SPI0: unsupported bits:"
+				"%db\n", bits);
+			message->status = -EIO;
+			goto early_exit;
+		}
+
+		cr0 = (bits - 1)
+			| (chip->type << SPI_FRF_OFFSET)
+			| (spi->mode << SPI_MODE_OFFSET)
+			| (chip->tmode << SPI_TMOD_OFFSET);
+	}
+	message->state = RUNNING_STATE;
+
+	/* Check if current transfer is a DMA transaction */
+	dws->dma_mapped = map_dma_buffers(dws);
+
+	if (!dws->dma_mapped && !chip->poll_mode) {
+		if (dws->rx)
+			imask |= SPI_INT_RXFI;
+		if (dws->tx)
+			imask |= SPI_INT_TXEI;
+		dws->transfer_handler = interrupt_transfer;
+	}
+
+	/*
+	 * Reprogram registers only if
+	 *	1. chip select changes
+	 *	2. clk_div is changed
+	 *	3. control value changes
+	 */
+	if (dw_readw(dws, ctrl0) != cr0 || cs_change || clk_div) {
+		spi_enable_chip(dws, 0);
+
+		if (dw_readw(dws, ctrl0) != cr0)
+			dw_writew(dws, ctrl0, cr0);
+
+		/* Set the interrupt mask, for poll mode just diable all int */
+		spi_mask_intr(dws, 0xff);
+		if (!chip->poll_mode)
+			spi_umask_intr(dws, imask);
+
+		spi_set_clk(dws, clk_div ? clk_div : chip->clk_div);
+		spi_chip_sel(dws, spi->chip_select);
+		spi_enable_chip(dws, 1);
+
+		if (cs_change)
+			dws->prev_chip = chip;
+	}
+
+	if (dws->dma_mapped)
+		dma_transfer(dws, cs_change);
+
+	if (chip->poll_mode)
+		poll_transfer(dws);
+
+	return;
+
+early_exit:
+	giveback(dws);
+	return;
+}
+
+static void pump_messages(struct work_struct *work)
+{
+	struct dw_spi *dws =
+		container_of(work, struct dw_spi, pump_messages);
+	unsigned long flags;
+
+	/* Lock queue and check for queue work */
+	spin_lock_irqsave(&dws->lock, flags);
+	if (list_empty(&dws->queue) || dws->run == QUEUE_STOPPED) {
+		dws->busy = 0;
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return;
+	}
+
+	/* Make sure we are not already running a message */
+	if (dws->cur_msg) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return;
+	}
+
+	/* Extract head of queue */
+	dws->cur_msg = list_entry(dws->queue.next, struct spi_message, queue);
+	list_del_init(&dws->cur_msg->queue);
+
+	/* Initial message state*/
+	dws->cur_msg->state = START_STATE;
+	dws->cur_transfer = list_entry(dws->cur_msg->transfers.next,
+						struct spi_transfer,
+						transfer_list);
+	dws->cur_chip = spi_get_ctldata(dws->cur_msg->spi);
+
+	/* Mark as busy and launch transfers */
+	tasklet_schedule(&dws->pump_transfers);
+
+	dws->busy = 1;
+	spin_unlock_irqrestore(&dws->lock, flags);
+}
+
+/* spi_device use this to queue in their spi_msg */
+static int dw_spi_transfer(struct spi_device *spi, struct spi_message *msg)
+{
+	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dws->lock, flags);
+
+	if (dws->run == QUEUE_STOPPED) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return -ESHUTDOWN;
+	}
+
+	msg->actual_length = 0;
+	msg->status = -EINPROGRESS;
+	msg->state = START_STATE;
+
+	list_add_tail(&msg->queue, &dws->queue);
+
+	if (dws->run == QUEUE_RUNNING && !dws->busy) {
+
+		if (dws->cur_transfer || dws->cur_msg)
+			queue_work(dws->workqueue,
+					&dws->pump_messages);
+		else {
+			/* If no other data transaction in air, just go */
+			spin_unlock_irqrestore(&dws->lock, flags);
+			pump_messages(&dws->pump_messages);
+			return 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&dws->lock, flags);
+	return 0;
+}
+
+/* This may be called twice for each spi dev */
+static int dw_spi_setup(struct spi_device *spi)
+{
+	struct dw_spi_chip *chip_info = NULL;
+	struct chip_data *chip;
+
+	if (spi->bits_per_word != 8 && spi->bits_per_word != 16)
+		return -EINVAL;
+
+	/* Only alloc on first setup */
+	chip = spi_get_ctldata(spi);
+	if (!chip) {
+		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
+		if (!chip)
+			return -ENOMEM;
+
+		chip->cs_control = null_cs_control;
+		chip->enable_dma = 0;
+	}
+
+	/*
+	 * Protocol drivers may change the chip settings, so...
+	 * if chip_info exists, use it
+	 */
+	chip_info = spi->controller_data;
+
+	/* chip_info doesn't always exist */
+	if (chip_info) {
+		if (chip_info->cs_control)
+			chip->cs_control = chip_info->cs_control;
+
+		chip->poll_mode = chip_info->poll_mode;
+		chip->type = chip_info->type;
+
+		chip->rx_threshold = 0;
+		chip->tx_threshold = 0;
+
+		chip->enable_dma = chip_info->enable_dma;
+	}
+
+	if (spi->bits_per_word <= 8) {
+		chip->n_bytes = 1;
+		chip->dma_width = 1;
+		chip->read = u8_reader;
+		chip->write = u8_writer;
+	} else if (spi->bits_per_word <= 16) {
+		chip->n_bytes = 2;
+		chip->dma_width = 2;
+		chip->read = u16_reader;
+		chip->write = u16_writer;
+	} else {
+		/* Never take >16b case for MRST SPIC */
+		dev_err(&spi->dev, "invalid wordsize\n");
+		return -EINVAL;
+	}
+	chip->bits_per_word = spi->bits_per_word;
+
+	chip->speed_hz = spi->max_speed_hz;
+	if (chip->speed_hz)
+		chip->clk_div = 25000000 / chip->speed_hz;
+	else
+		chip->clk_div = 8;	/* default value */
+
+	chip->tmode = 0; /* Tx & Rx */
+	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
+	chip->cr0 = (chip->bits_per_word - 1)
+			| (chip->type << SPI_FRF_OFFSET)
+			| (spi->mode  << SPI_MODE_OFFSET)
+			| (chip->tmode << SPI_TMOD_OFFSET);
+
+	spi_set_ctldata(spi, chip);
+	return 0;
+}
+
+static void dw_spi_cleanup(struct spi_device *spi)
+{
+	struct chip_data *chip = spi_get_ctldata(spi);
+	kfree(chip);
+}
+
+static int __init init_queue(struct dw_spi *dws)
+{
+	INIT_LIST_HEAD(&dws->queue);
+	spin_lock_init(&dws->lock);
+
+	dws->run = QUEUE_STOPPED;
+	dws->busy = 0;
+
+	tasklet_init(&dws->pump_transfers,
+			pump_transfers,	(unsigned long)dws);
+
+	INIT_WORK(&dws->pump_messages, pump_messages);
+	dws->workqueue = create_singlethread_workqueue(
+					dev_name(dws->master->dev.parent));
+	if (dws->workqueue == NULL)
+		return -EBUSY;
+
+	return 0;
+}
+
+static int start_queue(struct dw_spi *dws)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dws->lock, flags);
+
+	if (dws->run == QUEUE_RUNNING || dws->busy) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		return -EBUSY;
+	}
+
+	dws->run = QUEUE_RUNNING;
+	dws->cur_msg = NULL;
+	dws->cur_transfer = NULL;
+	dws->cur_chip = NULL;
+	dws->prev_chip = NULL;
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	queue_work(dws->workqueue, &dws->pump_messages);
+
+	return 0;
+}
+
+static int stop_queue(struct dw_spi *dws)
+{
+	unsigned long flags;
+	unsigned limit = 50;
+	int status = 0;
+
+	spin_lock_irqsave(&dws->lock, flags);
+	dws->run = QUEUE_STOPPED;
+	while (!list_empty(&dws->queue) && dws->busy && limit--) {
+		spin_unlock_irqrestore(&dws->lock, flags);
+		msleep(10);
+		spin_lock_irqsave(&dws->lock, flags);
+	}
+
+	if (!list_empty(&dws->queue) || dws->busy)
+		status = -EBUSY;
+	spin_unlock_irqrestore(&dws->lock, flags);
+
+	return status;
+}
+
+static int destroy_queue(struct dw_spi *dws)
+{
+	int status;
+
+	status = stop_queue(dws);
+	if (status != 0)
+		return status;
+	destroy_workqueue(dws->workqueue);
+	return 0;
+}
+
+/* Restart the controller, disable all interrupts, clean rx fifo */
+static void spi_hw_init(struct dw_spi *dws)
+{
+	spi_enable_chip(dws, 0);
+	spi_mask_intr(dws, 0xff);
+	spi_enable_chip(dws, 1);
+	flush(dws);
+}
+
+int __devinit dw_spi_add_host(struct dw_spi *dws)
+{
+	struct spi_master *master;
+	int ret;
+
+	BUG_ON(dws == NULL);
+
+	master = spi_alloc_master(dws->parent_dev, 0);
+	if (!master) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	dws->master = master;
+	dws->type = SSI_MOTO_SPI;
+	dws->prev_chip = NULL;
+	dws->dma_inited = 0;
+	dws->dma_addr = (dma_addr_t)(dws->paddr + 0x60);
+
+	ret = request_irq(dws->irq, dw_spi_irq, 0,
+			"dw_spi", dws);
+	if (ret < 0) {
+		dev_err(&master->dev, "can not get IRQ\n");
+		goto err_free_master;
+	}
+
+	master->mode_bits = SPI_CPOL | SPI_CPHA;
+	master->bus_num = dws->bus_num;
+	master->num_chipselect = dws->num_cs;
+	master->cleanup = dw_spi_cleanup;
+	master->setup = dw_spi_setup;
+	master->transfer = dw_spi_transfer;
+
+	dws->dma_inited = 0;
+
+	/* Basic HW init */
+	spi_hw_init(dws);
+
+	/* Initial and start queue */
+	ret = init_queue(dws);
+	if (ret) {
+		dev_err(&master->dev, "problem initializing queue\n");
+		goto err_diable_hw;
+	}
+	ret = start_queue(dws);
+	if (ret) {
+		dev_err(&master->dev, "problem starting queue\n");
+		goto err_diable_hw;
+	}
+
+	spi_master_set_devdata(master, dws);
+	ret = spi_register_master(master);
+	if (ret) {
+		dev_err(&master->dev, "problem registering spi master\n");
+		goto err_queue_alloc;
+	}
+
+	mrst_spi_debugfs_init(dws);
+	return 0;
+
+err_queue_alloc:
+	destroy_queue(dws);
+err_diable_hw:
+	spi_enable_chip(dws, 0);
+	free_irq(dws->irq, dws);
+err_free_master:
+	spi_master_put(master);
+exit:
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_add_host);
+
+void __devexit dw_spi_remove_host(struct dw_spi *dws)
+{
+	int status = 0;
+
+	if (!dws)
+		return;
+	mrst_spi_debugfs_remove(dws);
+
+	/* Remove the queue */
+	status = destroy_queue(dws);
+	if (status != 0)
+		dev_err(&dws->master->dev, "dw_spi_remove: workqueue will not "
+			"complete, message memory not freed\n");
+
+	spi_enable_chip(dws, 0);
+	/* Disable clk */
+	spi_set_clk(dws, 0);
+	free_irq(dws->irq, dws);
+
+	/* Disconnect from the SPI framework */
+	spi_unregister_master(dws->master);
+}
+
+int dw_spi_suspend_host(struct dw_spi *dws)
+{
+	int ret = 0;
+
+	ret = stop_queue(dws);
+	if (ret)
+		return ret;
+	spi_enable_chip(dws, 0);
+	spi_set_clk(dws, 0);
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_suspend_host);
+
+int dw_spi_resume_host(struct dw_spi *dws)
+{
+	int ret;
+
+	spi_hw_init(dws);
+	ret = start_queue(dws);
+	if (ret)
+		dev_err(&dws->master->dev, "fail to start queue (%d)\n", ret);
+	return ret;
+}
+EXPORT_SYMBOL(dw_spi_resume_host);
+
+MODULE_AUTHOR("Feng Tang <feng.tang@intel.com>");
+MODULE_DESCRIPTION("Driver for DesignWare SPI controller core");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/dw_spi_pci.c b/drivers/spi/dw_spi_pci.c
new file mode 100644
index 000000000000..34ba69161734
--- /dev/null
+++ b/drivers/spi/dw_spi_pci.c
@@ -0,0 +1,169 @@
+/*
+ * mrst_spi_pci.c - PCI interface driver for DW SPI Core
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/spi/dw_spi.h>
+#include <linux/spi/spi.h>
+
+#define DRIVER_NAME "dw_spi_pci"
+
+struct dw_spi_pci {
+	struct pci_dev		*pdev;
+	struct dw_spi		dws;
+};
+
+static int __devinit spi_pci_probe(struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	struct dw_spi_pci *dwpci;
+	struct dw_spi *dws;
+	int pci_bar = 0;
+	int ret;
+
+	printk(KERN_INFO "DW: found PCI SPI controller(ID: %04x:%04x)\n",
+		pdev->vendor, pdev->device);
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	dwpci = kzalloc(sizeof(struct dw_spi_pci), GFP_KERNEL);
+	if (!dwpci) {
+		ret = -ENOMEM;
+		goto err_disable;
+	}
+
+	dwpci->pdev = pdev;
+	dws = &dwpci->dws;
+
+	/* Get basic io resource and map it */
+	dws->paddr = pci_resource_start(pdev, pci_bar);
+	dws->iolen = pci_resource_len(pdev, pci_bar);
+
+	ret = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
+	if (ret)
+		goto err_kfree;
+
+	dws->regs = ioremap_nocache((unsigned long)dws->paddr,
+				pci_resource_len(pdev, pci_bar));
+	if (!dws->regs) {
+		ret = -ENOMEM;
+		goto err_release_reg;
+	}
+
+	dws->parent_dev = &pdev->dev;
+	dws->bus_num = 0;
+	dws->num_cs = 4;
+	dws->max_freq = 25000000;	/* for Moorestwon */
+	dws->irq = pdev->irq;
+
+	ret = dw_spi_add_host(dws);
+	if (ret)
+		goto err_unmap;
+
+	/* PCI hook and SPI hook use the same drv data */
+	pci_set_drvdata(pdev, dwpci);
+	return 0;
+
+err_unmap:
+	iounmap(dws->regs);
+err_release_reg:
+	pci_release_region(pdev, pci_bar);
+err_kfree:
+	kfree(dwpci);
+err_disable:
+	pci_disable_device(pdev);
+	return ret;
+}
+
+static void __devexit spi_pci_remove(struct pci_dev *pdev)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+	iounmap(dwpci->dws.regs);
+	pci_release_region(pdev, 0);
+	kfree(dwpci);
+	pci_disable_device(pdev);
+}
+
+#ifdef CONFIG_PM
+static int spi_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+	int ret;
+
+	ret = dw_spi_suspend_host(&dwpci->dws);
+	if (ret)
+		return ret;
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+	return ret;
+}
+
+static int spi_resume(struct pci_dev *pdev)
+{
+	struct dw_spi_pci *dwpci = pci_get_drvdata(pdev);
+	int ret;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+	return dw_spi_resume_host(&dwpci->dws);
+}
+#else
+#define spi_suspend	NULL
+#define spi_resume	NULL
+#endif
+
+static const struct pci_device_id pci_ids[] __devinitdata = {
+	/* Intel Moorestown platform SPI controller 0 */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0800) },
+	{},
+};
+
+static struct pci_driver dw_spi_driver = {
+	.name =		DRIVER_NAME,
+	.id_table =	pci_ids,
+	.probe =	spi_pci_probe,
+	.remove =	__devexit_p(spi_pci_remove),
+	.suspend =	spi_suspend,
+	.resume	=	spi_resume,
+};
+
+static int __init mrst_spi_init(void)
+{
+	return pci_register_driver(&dw_spi_driver);
+}
+
+static void __exit mrst_spi_exit(void)
+{
+	pci_unregister_driver(&dw_spi_driver);
+}
+
+module_init(mrst_spi_init);
+module_exit(mrst_spi_exit);
+
+MODULE_AUTHOR("Feng Tang <feng.tang@intel.com>");
+MODULE_DESCRIPTION("PCI interface driver for DW SPI Core");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h
new file mode 100644
index 000000000000..51b3e771a9a3
--- /dev/null
+++ b/include/linux/spi/dw_spi.h
@@ -0,0 +1,212 @@
+#ifndef DW_SPI_HEADER_H
+#define DW_SPI_HEADER_H
+#include <linux/io.h>
+
+/* Bit fields in CTRLR0 */
+#define SPI_DFS_OFFSET			0
+
+#define SPI_FRF_OFFSET			4
+#define SPI_FRF_SPI			0x0
+#define SPI_FRF_SSP			0x1
+#define SPI_FRF_MICROWIRE		0x2
+#define SPI_FRF_RESV			0x3
+
+#define SPI_MODE_OFFSET			6
+#define SPI_SCPH_OFFSET			6
+#define SPI_SCOL_OFFSET			7
+#define SPI_TMOD_OFFSET			8
+#define	SPI_TMOD_TR			0x0		/* xmit & recv */
+#define SPI_TMOD_TO			0x1		/* xmit only */
+#define SPI_TMOD_RO			0x2		/* recv only */
+#define SPI_TMOD_EPROMREAD		0x3		/* eeprom read mode */
+
+#define SPI_SLVOE_OFFSET		10
+#define SPI_SRL_OFFSET			11
+#define SPI_CFS_OFFSET			12
+
+/* Bit fields in SR, 7 bits */
+#define SR_MASK				0x7f		/* cover 7 bits */
+#define SR_BUSY				(1 << 0)
+#define SR_TF_NOT_FULL			(1 << 1)
+#define SR_TF_EMPT			(1 << 2)
+#define SR_RF_NOT_EMPT			(1 << 3)
+#define SR_RF_FULL			(1 << 4)
+#define SR_TX_ERR			(1 << 5)
+#define SR_DCOL				(1 << 6)
+
+/* Bit fields in ISR, IMR, RISR, 7 bits */
+#define SPI_INT_TXEI			(1 << 0)
+#define SPI_INT_TXOI			(1 << 1)
+#define SPI_INT_RXUI			(1 << 2)
+#define SPI_INT_RXOI			(1 << 3)
+#define SPI_INT_RXFI			(1 << 4)
+#define SPI_INT_MSTI			(1 << 5)
+
+/* TX RX interrupt level threshhold, max can be 256 */
+#define SPI_INT_THRESHOLD		32
+
+enum dw_ssi_type {
+	SSI_MOTO_SPI = 0,
+	SSI_TI_SSP,
+	SSI_NS_MICROWIRE,
+};
+
+struct dw_spi_reg {
+	u32	ctrl0;
+	u32	ctrl1;
+	u32	ssienr;
+	u32	mwcr;
+	u32	ser;
+	u32	baudr;
+	u32	txfltr;
+	u32	rxfltr;
+	u32	txflr;
+	u32	rxflr;
+	u32	sr;
+	u32	imr;
+	u32	isr;
+	u32	risr;
+	u32	txoicr;
+	u32	rxoicr;
+	u32	rxuicr;
+	u32	msticr;
+	u32	icr;
+	u32	dmacr;
+	u32	dmatdlr;
+	u32	dmardlr;
+	u32	idr;
+	u32	version;
+	u32	dr;		/* Currently oper as 32 bits,
+				though only low 16 bits matters */
+} __packed;
+
+struct dw_spi {
+	struct spi_master	*master;
+	struct spi_device	*cur_dev;
+	struct device		*parent_dev;
+	enum dw_ssi_type	type;
+
+	void __iomem		*regs;
+	unsigned long		paddr;
+	u32			iolen;
+	int			irq;
+	u32			max_freq;	/* max bus freq supported */
+
+	u16			bus_num;
+	u16			num_cs;		/* supported slave numbers */
+
+	/* Driver message queue */
+	struct workqueue_struct	*workqueue;
+	struct work_struct	pump_messages;
+	spinlock_t		lock;
+	struct list_head	queue;
+	int			busy;
+	int			run;
+
+	/* Message Transfer pump */
+	struct tasklet_struct	pump_transfers;
+
+	/* Current message transfer state info */
+	struct spi_message	*cur_msg;
+	struct spi_transfer	*cur_transfer;
+	struct chip_data	*cur_chip;
+	struct chip_data	*prev_chip;
+	size_t			len;
+	void			*tx;
+	void			*tx_end;
+	void			*rx;
+	void			*rx_end;
+	int			dma_mapped;
+	dma_addr_t		rx_dma;
+	dma_addr_t		tx_dma;
+	size_t			rx_map_len;
+	size_t			tx_map_len;
+	u8			n_bytes;	/* current is a 1/2 bytes op */
+	u8			max_bits_per_word;	/* maxim is 16b */
+	u32			dma_width;
+	int			cs_change;
+	int			(*write)(struct dw_spi *dws);
+	int			(*read)(struct dw_spi *dws);
+	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
+	void			(*cs_control)(u32 command);
+
+	/* Dma info */
+	int			dma_inited;
+	struct dma_chan		*txchan;
+	struct dma_chan		*rxchan;
+	int			txdma_done;
+	int			rxdma_done;
+	u64			tx_param;
+	u64			rx_param;
+	struct device		*dma_dev;
+	dma_addr_t		dma_addr;
+
+	/* Bus interface info */
+	void			*priv;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs;
+#endif
+};
+
+#define dw_readl(dw, name) \
+	__raw_readl(&(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_writel(dw, name, val) \
+	__raw_writel((val), &(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_readw(dw, name) \
+	__raw_readw(&(((struct dw_spi_reg *)dw->regs)->name))
+#define dw_writew(dw, name, val) \
+	__raw_writew((val), &(((struct dw_spi_reg *)dw->regs)->name))
+
+static inline void spi_enable_chip(struct dw_spi *dws, int enable)
+{
+	dw_writel(dws, ssienr, (enable ? 1 : 0));
+}
+
+static inline void spi_set_clk(struct dw_spi *dws, u16 div)
+{
+	dw_writel(dws, baudr, div);
+}
+
+static inline void spi_chip_sel(struct dw_spi *dws, u16 cs)
+{
+	if (cs > dws->num_cs)
+		return;
+	dw_writel(dws, ser, 1 << cs);
+}
+
+/* Disable IRQ bits */
+static inline void spi_mask_intr(struct dw_spi *dws, u32 mask)
+{
+	u32 new_mask;
+
+	new_mask = dw_readl(dws, imr) & ~mask;
+	dw_writel(dws, imr, new_mask);
+}
+
+/* Enable IRQ bits */
+static inline void spi_umask_intr(struct dw_spi *dws, u32 mask)
+{
+	u32 new_mask;
+
+	new_mask = dw_readl(dws, imr) | mask;
+	dw_writel(dws, imr, new_mask);
+}
+
+/*
+ * Each SPI slave device to work with dw_api controller should
+ * has such a structure claiming its working mode (PIO/DMA etc),
+ * which can be save in the "controller_data" member of the
+ * struct spi_device
+ */
+struct dw_spi_chip {
+	u8 poll_mode;	/* 0 for contoller polling mode */
+	u8 type;	/* SPI/SSP/Micrwire */
+	u8 enable_dma;
+	void (*cs_control)(u32 command);
+};
+
+extern int dw_spi_add_host(struct dw_spi *dws);
+extern void dw_spi_remove_host(struct dw_spi *dws);
+extern int dw_spi_suspend_host(struct dw_spi *dws);
+extern int dw_spi_resume_host(struct dw_spi *dws);
+#endif /* DW_SPI_HEADER_H */
-- 
cgit v1.2.3


From 9afa2fb6c13501e5b3536d15344fce4e5442c469 Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Wed, 2 Dec 2009 19:51:54 -0500
Subject: fsstack/ecryptfs: remove unused get_nlinks param to
 fsstack_copy_attr_all

This get_nlinks parameter was never used by the only mainline user,
ecryptfs; and it has never been used by unionfs or wrapfs either.

Acked-by: Dustin Kirkland <kirkland@canonical.com>
Acked-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ecryptfs/dentry.c     |  2 +-
 fs/ecryptfs/inode.c      |  6 +++---
 fs/ecryptfs/main.c       |  2 +-
 fs/stack.c               | 17 +++--------------
 include/linux/fs_stack.h |  4 +---
 5 files changed, 9 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75bc..8f006a0d6076 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 		struct inode *lower_inode =
 			ecryptfs_inode_to_lower(dentry->d_inode);
 
-		fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
+		fsstack_copy_attr_all(dentry->d_inode, lower_inode);
 	}
 out:
 	return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0de..429ca0b3ba08 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			lower_new_dir_dentry->d_inode, lower_new_dentry);
 	if (rc)
 		goto out_lock;
-	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
+	fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
 	if (new_dir != old_dir)
-		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
+		fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
 out_lock:
 	unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
 	dput(lower_new_dentry->d_parent);
@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	rc = notify_change(lower_dentry, ia);
 	mutex_unlock(&lower_dentry->d_inode->i_mutex);
 out:
-	fsstack_copy_attr_all(inode, lower_inode, NULL);
+	fsstack_copy_attr_all(inode, lower_inode);
 	return rc;
 }
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 101fe4c7b1ee..567bc4b9f70a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 		init_special_inode(inode, lower_inode->i_mode,
 				   lower_inode->i_rdev);
 	dentry->d_op = &ecryptfs_dops;
-	fsstack_copy_attr_all(inode, lower_inode, NULL);
+	fsstack_copy_attr_all(inode, lower_inode);
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
 	fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4a..0e20e43ad740 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -14,11 +14,8 @@ void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
- * copying
- */
-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
-				int (*get_nlinks)(struct inode *))
+/* copy all attributes */
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
 {
 	dest->i_mode = src->i_mode;
 	dest->i_uid = src->i_uid;
@@ -29,14 +26,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
 	dest->i_ctime = src->i_ctime;
 	dest->i_blkbits = src->i_blkbits;
 	dest->i_flags = src->i_flags;
-
-	/*
-	 * Update the nlinks AFTER updating the above fields, because the
-	 * get_links callback may depend on them.
-	 */
-	if (!get_nlinks)
-		dest->i_nlink = src->i_nlink;
-	else
-		dest->i_nlink = (*get_nlinks)(dest);
+	dest->i_nlink = src->i_nlink;
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index bb516ceeefc9..aa60311900dd 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -8,9 +8,7 @@
 #include <linux/fs.h>
 
 /* externs for fs/stack.c */
-extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
-				int (*get_nlinks)(struct inode *));
-
+extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
 extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
 
 /* inlines */
-- 
cgit v1.2.3


From 1b8ab8159ef8f818f870a1d2e3b6953d80eefd3f Mon Sep 17 00:00:00 2001
From: Erez Zadok <ezk@cs.sunysb.edu>
Date: Thu, 3 Dec 2009 21:56:09 -0500
Subject: VFS/fsstack: handle 32-bit smp + preempt + large files in
 fsstack_copy_inode_size

Copy the inode size and blocks from one inode to another correctly on 32-bit
systems with CONFIG_SMP, CONFIG_PREEMPT, or CONFIG_LBDAF.  Use proper inode
spinlocks only when i_size/i_blocks cannot fit in one 32-bit word.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stack.c               | 54 +++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/fs_stack.h |  2 +-
 2 files changed, 52 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/stack.c b/fs/stack.c
index 0e20e43ad740..4a6f7f440658 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,10 +7,58 @@
  * This function cannot be inlined since i_size_{read,write} is rather
  * heavy-weight on 32-bit systems
  */
-void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
+void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
 {
-	i_size_write(dst, i_size_read((struct inode *)src));
-	dst->i_blocks = src->i_blocks;
+	loff_t i_size;
+	blkcnt_t i_blocks;
+
+	/*
+	 * i_size_read() includes its own seqlocking and protection from
+	 * preemption (see include/linux/fs.h): we need nothing extra for
+	 * that here, and prefer to avoid nesting locks than attempt to keep
+	 * i_size and i_blocks in sync together.
+	 */
+	i_size = i_size_read(src);
+
+	/*
+	 * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
+	 * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
+	 * though stat's generic_fillattr() doesn't bother, and we won't be
+	 * applying quotas (where i_blocks does become important) at the
+	 * upper level.
+	 *
+	 * We don't actually know what locking is used at the lower level;
+	 * but if it's a filesystem that supports quotas, it will be using
+	 * i_lock as in inode_add_bytes().  tmpfs uses other locking, and
+	 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
+	 * holes; but its i_blocks cannot carry into the upper long without
+	 * almost 2TB swap - let's ignore that case.
+	 */
+	if (sizeof(i_blocks) > sizeof(long))
+		spin_lock(&src->i_lock);
+	i_blocks = src->i_blocks;
+	if (sizeof(i_blocks) > sizeof(long))
+		spin_unlock(&src->i_lock);
+
+	/*
+	 * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
+	 * fsstack_copy_inode_size() to hold some lock around
+	 * i_size_write(), otherwise i_size_read() may spin forever (see
+	 * include/linux/fs.h).  We don't necessarily hold i_mutex when this
+	 * is called, so take i_lock for that case.
+	 *
+	 * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
+	 * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
+	 * for that case too, and do both at once by combining the tests.
+	 *
+	 * There is none of this locking overhead in the 64-bit case.
+	 */
+	if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+		spin_lock(&dst->i_lock);
+	i_size_write(dst, i_size);
+	dst->i_blocks = i_blocks;
+	if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+		spin_unlock(&dst->i_lock);
 }
 EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
 
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index aa60311900dd..da317c7163ab 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -9,7 +9,7 @@
 
 /* externs for fs/stack.c */
 extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src);
-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
+extern void fsstack_copy_inode_size(struct inode *dst, struct inode *src);
 
 /* inlines */
 static inline void fsstack_copy_attr_atime(struct inode *dest,
-- 
cgit v1.2.3


From 7a0ad10c367ab57c899d340372f37880cbe6ab52 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:24:40 +0100
Subject: fold do_sync_file_range into sys_sync_file_range

We recently go rid of all callers of do_sync_file_range as they're better
served with vfs_fsync or the filemap_write_and_wait.  Now that
do_sync_file_range is down to a single caller fold it into it so that people
don't start using it again accidentally.  While at it also switch it from
using __filemap_fdatawrite_range(..., WB_SYNC_ALL) to the more clear
filemap_fdatawrite_range().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c          | 59 +++++++++++++++++++++---------------------------------
 include/linux/fs.h |  4 ----
 2 files changed, 23 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sync.c b/fs/sync.c
index 36752a683481..418727a2a239 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 {
 	int ret;
 	struct file *file;
+	struct address_space *mapping;
 	loff_t endbyte;			/* inclusive */
 	int fput_needed;
 	umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 			!S_ISLNK(i_mode))
 		goto out_put;
 
-	ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
+	mapping = file->f_mapping;
+	if (!mapping) {
+		ret = -EINVAL;
+		goto out_put;
+	}
+
+	ret = 0;
+	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
+		if (ret < 0)
+			goto out_put;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WRITE) {
+		ret = filemap_fdatawrite_range(mapping, offset, endbyte);
+		if (ret < 0)
+			goto out_put;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
+		ret = filemap_fdatawait_range(mapping, offset, endbyte);
+
 out_put:
 	fput_light(file, fput_needed);
 out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
 }
 SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
 #endif
-
-/*
- * `endbyte' is inclusive
- */
-int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
-			  loff_t endbyte, unsigned int flags)
-{
-	int ret;
-
-	if (!mapping) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = 0;
-	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
-		ret = filemap_fdatawait_range(mapping, offset, endbyte);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (flags & SYNC_FILE_RANGE_WRITE) {
-		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
-						WB_SYNC_ALL);
-		if (ret < 0)
-			goto out;
-	}
-
-	if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
-		ret = filemap_fdatawait_range(mapping, offset, endbyte);
-	}
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 66bc0a54b284..77a975089d9a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,10 +1095,6 @@ struct file_lock {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
-/* fs/sync.c */
-extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
-			loff_t endbyte, unsigned int flags);
-
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, struct flock __user *);
 extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
-- 
cgit v1.2.3


From eaff8079d4f1016a12e34ab323737314f24127dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Dec 2009 14:25:01 +0100
Subject: kill I_LOCK

After I_SYNC was split from I_LOCK the leftover is always used together with
I_NEW and thus superflous.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/gfs2/inode.c             |  2 +-
 fs/inode.c                  | 26 +++++++++++++-------------
 fs/jfs/jfs_txnmgr.c         |  2 +-
 fs/ntfs/inode.c             |  6 +++---
 fs/ubifs/file.c             |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c |  2 +-
 fs/xfs/xfs_iget.c           |  4 ++--
 include/linux/fs.h          | 36 ++++++++++++++++--------------------
 include/linux/writeback.h   |  3 +--
 9 files changed, 39 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3ff32fa793da..6e220f4eee7d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
  * directory entry when gfs2_inode_lookup() is invoked. Part of the code
  * segment inside gfs2_inode_lookup code needs to get moved around.
  *
- * Clean up I_LOCK and I_NEW as well.
+ * Clears I_NEW as well.
  **/
 
 void gfs2_set_iop(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de611..03dfeb2e3928 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
 	 * Prevent speculative execution through spin_unlock(&inode_lock);
 	 */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_LOCK);
+	wake_up_bit(&inode->i_state, __I_NEW);
 }
 
 /**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
 	}
 #endif
 	/*
-	 * This is special!  We do not need the spinlock when clearing I_LOCK,
+	 * This is special!  We do not need the spinlock when clearing I_NEW,
 	 * because we're guaranteed that nobody else tries to do anything about
 	 * the state of the inode when it is locked, as we just created it (so
-	 * there can be no old holders that haven't tested I_LOCK).
+	 * there can be no old holders that haven't tested I_NEW).
 	 * However we must emit the memory barrier so that other CPUs reliably
-	 * see the clearing of I_LOCK after the other inode initialisation has
+	 * see the clearing of I_NEW after the other inode initialisation has
 	 * completed.
 	 */
 	smp_mb();
-	WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
-	inode->i_state &= ~(I_LOCK|I_NEW);
+	WARN_ON(!(inode->i_state & I_NEW));
+	inode->i_state &= ~I_NEW;
 	wake_up_inode(inode);
 }
 EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
 				goto set_failed;
 
 			__inode_add_to_lists(sb, head, inode);
-			inode->i_state = I_LOCK|I_NEW;
+			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 		if (!old) {
 			inode->i_ino = ino;
 			__inode_add_to_lists(sb, head, inode);
-			inode->i_state = I_LOCK|I_NEW;
+			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
 			/* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
 	ino_t ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 
-	inode->i_state |= I_LOCK|I_NEW;
+	inode->i_state |= I_NEW;
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 	struct super_block *sb = inode->i_sb;
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 
-	inode->i_state |= I_LOCK|I_NEW;
+	inode->i_state |= I_NEW;
 
 	while (1) {
 		struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
  * until the deletion _might_ have completed.  Callers are responsible
  * to recheck inode state.
  *
- * It doesn't matter if I_LOCK is not set initially, a call to
+ * It doesn't matter if I_NEW is not set initially, a call to
  * wake_up_inode() after removing from the hash list will DTRT.
  *
  * This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
 static void __wait_on_freeing_inode(struct inode *inode)
 {
 	wait_queue_head_t *wq;
-	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
-	wq = bit_waitqueue(&inode->i_state, __I_LOCK);
+	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+	wq = bit_waitqueue(&inode->i_state, __I_NEW);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode_lock);
 	schedule();
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada5..d945ea76b445 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 		 */
 		/*
 		 * I believe this code is no longer needed.  Splitting I_LOCK
-		 * into two bits, I_LOCK and I_SYNC should prevent this
+		 * into two bits, I_NEW and I_SYNC should prevent this
 		 * deadlock as well.  But since I don't have a JFS testload
 		 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
 		 * Joern
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762cc..dc2505abb6d7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
  * the ntfs inode.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
  *    is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
  * necessary fields in @vi as well as initializing the ntfs inode.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
  * normal directory inodes.
  *
  * Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
  *    i_count is set to 1, so it is not going to go away
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e72..16a6444330ec 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
  *
  * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
  * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
- * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
  * set as well. However, UBIFS disables readahead.
  */
 
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298ba8b2..225946012d0b 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
 	struct inode		*inode = &ip->i_vnode;
 
 	inode->i_ino = ip->i_ino;
-	inode->i_state = I_NEW|I_LOCK;
+	inode->i_state = I_NEW;
 	inode_add_to_lists(ip->i_mount->m_super, inode);
 
 	inode->i_mode	= ip->i_d.di_mode;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0de36c2a46f1..fa402a6bbbcf 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,7 @@ xfs_inode_alloc(
 	ip->i_new_size = 0;
 
 	/* prevent anyone from using this yet */
-	VFS_I(ip)->i_state = I_NEW|I_LOCK;
+	VFS_I(ip)->i_state = I_NEW;
 
 	return ip;
 }
@@ -217,7 +217,7 @@ xfs_iget_cache_hit(
 			trace_xfs_iget_reclaim(ip);
 			goto out_error;
 		}
-		inode->i_state = I_LOCK|I_NEW;
+		inode->i_state = I_NEW;
 	} else {
 		/* If the VFS inode is being torn down, pause and try again. */
 		if (!igrab(inode)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 77a975089d9a..cca191933ff6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1587,7 +1587,7 @@ struct super_operations {
  * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
  * various stages of removing an inode.
  *
- * Two bits are used for locking and completion notification, I_LOCK and I_SYNC.
+ * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
  *
  * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
  *			fdatasync().  i_atime is the usual cause.
@@ -1596,8 +1596,14 @@ struct super_operations {
  *			don't have to write inode on fdatasync() when only
  *			mtime has changed in it.
  * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
- * I_NEW		get_new_inode() sets i_state to I_LOCK|I_NEW.  Both
- *			are cleared by unlock_new_inode(), called from iget().
+ * I_NEW		Serves as both a mutex and completion notification.
+ *			New inodes set I_NEW.  If two processes both create
+ *			the same inode, one of them will release its inode and
+ *			wait for I_NEW to be released before returning.
+ *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
+ *			also cause waiting on I_NEW, without I_NEW actually
+ *			being set.  find_inode() uses this to prevent returning
+ *			nearly-dead inodes.
  * I_WILL_FREE		Must be set when calling write_inode_now() if i_count
  *			is zero.  I_FREEING must be set when I_WILL_FREE is
  *			cleared.
@@ -1611,20 +1617,11 @@ struct super_operations {
  *			prohibited for many purposes.  iget() must wait for
  *			the inode to be completely released, then create it
  *			anew.  Other functions will just ignore such inodes,
- *			if appropriate.  I_LOCK is used for waiting.
+ *			if appropriate.  I_NEW is used for waiting.
  *
- * I_LOCK		Serves as both a mutex and completion notification.
- *			New inodes set I_LOCK.  If two processes both create
- *			the same inode, one of them will release its inode and
- *			wait for I_LOCK to be released before returning.
- *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
- *			also cause waiting on I_LOCK, without I_LOCK actually
- *			being set.  find_inode() uses this to prevent returning
- *			nearly-dead inodes.
- * I_SYNC		Similar to I_LOCK, but limited in scope to writeback
- *			of inode dirty data.  Having a separate lock for this
- *			purpose reduces latency and prevents some filesystem-
- *			specific deadlocks.
+ * I_SYNC		Synchonized write of dirty inode data.  The bits is
+ *			set during data writeback, and cleared with a wakeup
+ *			on the bit address once it is done.
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
@@ -1633,13 +1630,12 @@ struct super_operations {
 #define I_DIRTY_SYNC		1
 #define I_DIRTY_DATASYNC	2
 #define I_DIRTY_PAGES		4
-#define I_NEW			8
+#define __I_NEW			3
+#define I_NEW			(1 << __I_NEW)
 #define I_WILL_FREE		16
 #define I_FREEING		32
 #define I_CLEAR			64
-#define __I_LOCK		7
-#define I_LOCK			(1 << __I_LOCK)
-#define __I_SYNC		8
+#define __I_SYNC		7
 #define I_SYNC			(1 << __I_SYNC)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 705f01fe413a..c18c008f4bbf 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -79,8 +79,7 @@ void wakeup_flusher_threads(long nr_pages);
 static inline void wait_on_inode(struct inode *inode)
 {
 	might_sleep();
-	wait_on_bit(&inode->i_state, __I_LOCK, inode_wait,
-							TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
 }
 static inline void inode_sync_wait(struct inode *inode)
 {
-- 
cgit v1.2.3


From a2770d86b33024f71df269fde2de096df89d6a48 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 17 Dec 2009 12:51:05 -0800
Subject: Revert "fix mismerge with Trond's stuff (create_mnt_ns() export is
 gone now)"

This reverts commit e9496ff46a20a8592fdc7bdaaf41b45eb808d310. Quoth Al:

 "it's dependent on a lot of other stuff not currently in mainline
  and badly broken with current fs/namespace.c.  Sorry, badly
  out-of-order cherry-pick from old queue.

  PS: there's a large pending series reworking the refcounting and
  lifetime rules for vfsmounts that will, among other things, allow to
  rip a subtree away _without_ dissolving connections in it, to be
  garbage-collected when all active references are gone.  It's
  considerably saner wrt "is the subtree busy" logics, but it's nowhere
  near being ready for merge at the moment; this changeset is one of the
  things becoming possible with that sucker, but it certainly shouldn't
  have been picked during this cycle.  My apologies..."

Noticed-by: Eric Paris <eparis@redhat.com>
Requested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c                | 3 ++-
 fs/nfs/super.c                | 8 ++++++++
 include/linux/mnt_namespace.h | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index faab1273281e..7d70d63ceb29 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2068,7 +2068,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  * create_mnt_ns - creates a private namespace and adds a root filesystem
  * @mnt: pointer to the new root filesystem mountpoint
  */
-static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2080,6 +2080,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
 	}
 	return new_ns;
 }
+EXPORT_SYMBOL(create_mnt_ns);
 
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 		char __user *, type, unsigned long, flags, void __user *, data)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d5b112bcf3de..ce907efc5508 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2648,13 +2648,21 @@ out_freepage:
 static int nfs_follow_remote_path(struct vfsmount *root_mnt,
 		const char *export_path, struct vfsmount *mnt_target)
 {
+	struct mnt_namespace *ns_private;
 	struct nameidata nd;
 	struct super_block *s;
 	int ret;
 
+	ns_private = create_mnt_ns(root_mnt);
+	ret = PTR_ERR(ns_private);
+	if (IS_ERR(ns_private))
+		goto out_mntput;
+
 	ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
 			export_path, LOOKUP_FOLLOW, &nd);
 
+	put_mnt_ns(ns_private);
+
 	if (ret != 0)
 		goto out_err;
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d9ebf1037dfa..d74785c2393a 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -23,6 +23,7 @@ struct proc_mounts {
 
 struct fs_struct;
 
+extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-- 
cgit v1.2.3


From b6e3224fb20954f155e41ec5709b2ab70b50ae2d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 17 Dec 2009 13:23:24 -0800
Subject: Revert "task_struct: make journal_info conditional"

This reverts commit e4c570c4cb7a95dbfafa3d016d2739bf3fdfe319, as
requested by Alexey:

 "I think I gave a good enough arguments to not merge it.
  To iterate:
   * patch makes impossible to start using ext3 on EXT3_FS=n kernels
     without reboot.
   * this is done only for one pointer on task_struct"

  None of config options which define task_struct are tristate directly
  or effectively."

Requested-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig                | 4 ----
 fs/btrfs/Kconfig          | 1 -
 fs/ext4/Kconfig           | 1 -
 fs/gfs2/Kconfig           | 1 -
 fs/jbd/Kconfig            | 1 -
 fs/jbd2/Kconfig           | 1 -
 fs/nilfs2/Kconfig         | 1 -
 fs/reiserfs/Kconfig       | 1 -
 include/linux/init_task.h | 8 +-------
 include/linux/sched.h     | 2 --
 10 files changed, 1 insertion(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index f8fccaaad628..64d44efad7a5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,10 +6,6 @@ menu "File systems"
 
 if BLOCK
 
-config FS_JOURNAL_INFO
-	bool
-	default n
-
 source "fs/ext2/Kconfig"
 source "fs/ext3/Kconfig"
 source "fs/ext4/Kconfig"
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 402afe0a0bfb..7bb3c020e570 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,7 +4,6 @@ config BTRFS_FS
 	select LIBCRC32C
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
-	select FS_JOURNAL_INFO
 	help
 	  Btrfs is a new filesystem with extents, writable snapshotting,
 	  support for multiple devices and many more features.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e5f6774846e4..9acf7e808139 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -2,7 +2,6 @@ config EXT4_FS
 	tristate "The Extended 4 (ext4) filesystem"
 	select JBD2
 	select CRC16
-	select FS_JOURNAL_INFO
 	help
 	  This is the next generation of the ext3 filesystem.
 
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index b192c661caa6..4dcddf83326f 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -10,7 +10,6 @@ config GFS2_FS
 	select SLOW_WORK
 	select QUOTA
 	select QUOTACTL
-	select FS_JOURNAL_INFO
 	help
 	  A cluster filesystem.
 
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
index a8408983abd4..4e28beeed157 100644
--- a/fs/jbd/Kconfig
+++ b/fs/jbd/Kconfig
@@ -1,6 +1,5 @@
 config JBD
 	tristate
-	select FS_JOURNAL_INFO
 	help
 	  This is a generic journalling layer for block devices.  It is
 	  currently used by the ext3 file system, but it could also be
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 0f7d1ceafdfd..f32f346f4b0a 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,7 +1,6 @@
 config JBD2
 	tristate
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  This is a generic journaling layer for block devices that support
 	  both 32-bit and 64-bit block numbers.  It is currently used by
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 1225af7b2166..251da07b2a1d 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -2,7 +2,6 @@ config NILFS2_FS
 	tristate "NILFS2 file system support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  NILFS2 is a log-structured file system (LFS) supporting continuous
 	  snapshotting.  In addition to versioning capability of the entire
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index ac7cd75c86f8..513f431038f9 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -1,7 +1,6 @@
 config REISERFS_FS
 	tristate "Reiserfs support"
 	select CRC32
-	select FS_JOURNAL_INFO
 	help
 	  Stores not just filenames but the files themselves in a balanced
 	  tree.  Uses journalling.
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5ed8b9c50355..abec69b63d7e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -111,12 +111,6 @@ extern struct cred init_cred;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
-#ifdef CONFIG_FS_JOURNAL_INFO
-#define INIT_JOURNAL_INFO	.journal_info = NULL,
-#else
-#define INIT_JOURNAL_INFO
-#endif
-
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -168,6 +162,7 @@ extern struct cred init_cred;
 		.signal = {{0}}},					\
 	.blocked	= {{0}},					\
 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
+	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),	\
@@ -178,7 +173,6 @@ extern struct cred init_cred;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
-	INIT_JOURNAL_INFO						\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 244c287a5ac1..211ed32befbd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1446,10 +1446,8 @@ struct task_struct {
 	gfp_t lockdep_reclaim_gfp;
 #endif
 
-#ifdef CONFIG_FS_JOURNAL_INFO
 /* journalling filesystem info */
 	void *journal_info;
-#endif
 
 /* stacked block device info */
 	struct bio *bio_list, **bio_tail;
-- 
cgit v1.2.3


From 5d0bb2c4238e333ae18c5cd23f75e02a3dac3519 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bernhard@bwalle.de>
Date: Thu, 17 Dec 2009 15:27:11 -0800
Subject: vt: don't export vt_kmsg_redirect() to userspace

Fix following warning in linux-next by guarding the function definition
(both the "extern" and the inline) with #ifdef __KERNEL__.

usr/include/linux/vt.h:89: userspace cannot call function or variable defined in
the kernel

Introduced by commit 5ada918b82399eef3afd6a71e3637697d6bd719f ("vt:
introduce and use vt_kmsg_redirect() function").

Signed-off-by: Bernhard Walle <bernhard@bwalle.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vt.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vt.h b/include/linux/vt.h
index 3fb9944e50a6..d5dd0bc408fd 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -84,6 +84,8 @@ struct vt_setactivate {
 
 #define VT_SETACTIVATE	0x560F	/* Activate and set the mode of a console */
 
+#ifdef __KERNEL__
+
 #ifdef CONFIG_VT_CONSOLE
 
 extern int vt_kmsg_redirect(int new);
@@ -97,6 +99,8 @@ static inline int vt_kmsg_redirect(int new)
 
 #endif
 
+#endif /* __KERNEL__ */
+
 #define vt_get_kmsg_redirect() vt_kmsg_redirect(-1)
 
 #endif /* _LINUX_VT_H */
-- 
cgit v1.2.3


From f6151dfea21496d43dbaba32cfcd9c9f404769bc Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 17 Dec 2009 15:27:16 -0800
Subject: mm: introduce coredump parameter structure

Introduce coredump parameter data structure (struct coredump_params) to
simplify binfmt->core_dump() arguments.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Suggested-by: Ingo Molnar <mingo@elte.hu>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_aout.c        | 13 +++++++------
 fs/binfmt_elf.c         | 24 +++++++++++++-----------
 fs/binfmt_elf_fdpic.c   | 29 +++++++++++++++--------------
 fs/binfmt_flat.c        |  6 +++---
 fs/binfmt_som.c         |  2 +-
 fs/exec.c               | 38 +++++++++++++++++++++-----------------
 include/linux/binfmts.h | 10 +++++++++-
 7 files changed, 69 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c778..346b69405363 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
 
 static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
 static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt aout_format = {
 	.module		= THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
  * dumping of the process results in another error..
  */
 
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
 {
+	struct file *file = cprm->file;
 	mm_segment_t fs;
 	int has_dumped = 0;
 	unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
 	current->flags |= PF_DUMPCORE;
        	strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
 	dump.u_ar0 = offsetof(struct user, regs);
-	dump.signal = signr;
-	aout_dump_thread(regs, &dump);
+	dump.signal = cprm->signr;
+	aout_dump_thread(cprm->regs, &dump);
 
 /* If the size of the dump file exceeds the rlimit, then see what would happen
    if we wrote the stack, but not the data area.  */
-	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+	if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
 		dump.u_dsize = 0;
 
 /* Make sure we have enough room to write the stack and data areas. */
-	if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+	if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
 		dump.u_ssize = 0;
 
 /* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 97b6e9efeb7f..edd90c49003c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  * don't even try.
  */
 #ifdef CONFIG_ELF_CORE
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
 #else
 #define elf_core_dump	NULL
 #endif
@@ -1272,8 +1272,9 @@ static int writenote(struct memelfnote *men, struct file *file,
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
 		goto end_coredump;
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1901,7 +1902,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
 	mm_segment_t fs;
@@ -1947,7 +1948,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	 * notes.  This also sets up the file header.
 	 */
 	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, signr, regs))
+			    &info, cprm->signr, cprm->regs))
 		goto cleanup;
 
 	has_dumped = 1;
@@ -2009,14 +2010,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 #endif
 
  	/* write out the notes section */
-	if (!write_note_info(&info, file, &foffset))
+	if (!write_note_info(&info, cprm->file, &foffset))
 		goto end_coredump;
 
-	if (elf_coredump_extra_notes_write(file, &foffset))
+	if (elf_coredump_extra_notes_write(cprm->file, &foffset))
 		goto end_coredump;
 
 	/* Align to page */
-	if (!dump_seek(file, dataoff - foffset))
+	if (!dump_seek(cprm->file, dataoff - foffset))
 		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2033,12 +2034,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 			page = get_dump_page(addr);
 			if (page) {
 				void *kaddr = kmap(page);
-				stop = ((size += PAGE_SIZE) > limit) ||
-					!dump_write(file, kaddr, PAGE_SIZE);
+				stop = ((size += PAGE_SIZE) > cprm->limit) ||
+					!dump_write(cprm->file, kaddr,
+						    PAGE_SIZE);
 				kunmap(page);
 				page_cache_release(page);
 			} else
-				stop = !dump_seek(file, PAGE_SIZE);
+				stop = !dump_seek(cprm->file, PAGE_SIZE);
 			if (stop)
 				goto end_coredump;
 		}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7b055385db8e..c25256a5c5b0 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
 					     struct file *, struct mm_struct *);
 
 #ifdef CONFIG_ELF_CORE
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
 #endif
 
 static struct linux_binfmt elf_fdpic_format = {
@@ -1326,8 +1326,9 @@ static int writenote(struct memelfnote *men, struct file *file)
 #undef DUMP_WRITE
 #undef DUMP_SEEK
 
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr)				\
+	if ((size += (nr)) > cprm->limit ||		\
+	    !dump_write(cprm->file, (addr), (nr)))	\
 		goto end_coredump;
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1582,8 +1583,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
  * and then they are actually written out.  If we run out of core limit
  * we just truncate.
  */
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
-			       struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
 {
 #define	NUM_NOTES	6
 	int has_dumped = 0;
@@ -1642,7 +1642,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 		goto cleanup;
 #endif
 
-	if (signr) {
+	if (cprm->signr) {
 		struct core_thread *ct;
 		struct elf_thread_status *tmp;
 
@@ -1661,14 +1661,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 			int sz;
 
 			tmp = list_entry(t, struct elf_thread_status, list);
-			sz = elf_dump_thread_status(signr, tmp);
+			sz = elf_dump_thread_status(cprm->signr, tmp);
 			thread_status_size += sz;
 		}
 	}
 
 	/* now collect the dump for the current */
-	fill_prstatus(prstatus, current, signr);
-	elf_core_copy_regs(&prstatus->pr_reg, regs);
+	fill_prstatus(prstatus, current, cprm->signr);
+	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
 
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
@@ -1703,7 +1703,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 
   	/* Try to dump the FPU. */
 	if ((prstatus->pr_fpvalid =
-	     elf_core_copy_task_fpregs(current, regs, fpu)))
+	     elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
 		fill_note(notes + numnote++,
 			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
 #ifdef ELF_CORE_COPY_XFPREGS
@@ -1774,7 +1774,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
-		if (!writenote(notes + i, file))
+		if (!writenote(notes + i, cprm->file))
 			goto end_coredump;
 
 	/* write out the thread status notes section */
@@ -1783,14 +1783,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 				list_entry(t, struct elf_thread_status, list);
 
 		for (i = 0; i < tmp->num_notes; i++)
-			if (!writenote(&tmp->notes[i], file))
+			if (!writenote(&tmp->notes[i], cprm->file))
 				goto end_coredump;
 	}
 
-	if (!dump_seek(file, dataoff))
+	if (!dump_seek(cprm->file, dataoff))
 		goto end_coredump;
 
-	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+				    mm_flags) < 0)
 		goto end_coredump;
 
 #ifdef ELF_CORE_WRITE_EXTRA_DATA
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e756..d4a00ea1054c 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
 #endif
 
 static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt flat_format = {
 	.module		= THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
  * Currently only a stub-function.
  */
 
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
 {
 	printk("Process %s:%d received signr %d and should have core dumped\n",
-			current->comm, current->pid, (int) signr);
+			current->comm, current->pid, (int) cprm->signr);
 	return(1);
 }
 
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e77..2a9b5330cc5e 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
  * don't even try.
  */
 #if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
 #else
 #define som_core_dump	NULL
 #endif
diff --git a/fs/exec.c b/fs/exec.c
index 77db9a97a773..632b02e34ec7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1763,17 +1763,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
-	struct file * file;
 	const struct cred *old_cred;
 	struct cred *cred;
 	int retval = 0;
 	int flag = 0;
 	int ispipe = 0;
-	unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
 	char **helper_argv = NULL;
 	int helper_argc = 0;
 	int dump_count = 0;
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
+	struct coredump_params cprm = {
+		.signr = signr,
+		.regs = regs,
+		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+	};
 
 	audit_core_dumps(signr);
 
@@ -1829,15 +1832,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (core_limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
 		goto fail_unlock;
 
  	if (ispipe) {
-		if (core_limit == 0) {
+		if (cprm.limit == 0) {
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
-			 * core_limit of 0 here as a speacial value. Any
+			 * cprm.limit of 0 here as a speacial value. Any
 			 * non-zero limit gets set to RLIM_INFINITY below, but
 			 * a limit of 0 skips the dump.  This is a consistent
 			 * way to catch recursive crashes.  We can still crash
@@ -1870,25 +1873,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			goto fail_dropcount;
 		}
 
-		core_limit = RLIM_INFINITY;
+		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
 		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&file)) {
+				&cprm.file)) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
  		}
  	} else
- 		file = filp_open(corename,
+		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(file))
+	if (IS_ERR(cprm.file))
 		goto fail_dropcount;
-	inode = file->f_path.dentry->d_inode;
+	inode = cprm.file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(file->f_path.dentry))
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
 		goto close_fail;
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1901,21 +1904,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 */
 	if (inode->i_uid != current_fsuid())
 		goto close_fail;
-	if (!file->f_op)
+	if (!cprm.file->f_op)
 		goto close_fail;
-	if (!file->f_op->write)
+	if (!cprm.file->f_op->write)
 		goto close_fail;
-	if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+	if (!ispipe &&
+	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
 		goto close_fail;
 
-	retval = binfmt->core_dump(signr, regs, file, core_limit);
+	retval = binfmt->core_dump(&cprm);
 
 	if (retval)
 		current->signal->group_exit_code |= 0x80;
 close_fail:
 	if (ispipe && core_pipe_limit)
-		wait_for_dump_helpers(file);
-	filp_close(file, NULL);
+		wait_for_dump_helpers(cprm.file);
+	filp_close(cprm.file, NULL);
 fail_dropcount:
 	if (dump_count)
 		atomic_dec(&core_dump_count);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index aece486ac734..cd4349bdc34e 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -68,6 +68,14 @@ struct linux_binprm{
 
 #define BINPRM_MAX_RECURSION 4
 
+/* Function parameter for binfmt->coredump */
+struct coredump_params {
+	long signr;
+	struct pt_regs *regs;
+	struct file *file;
+	unsigned long limit;
+};
+
 /*
  * This structure defines the functions that are used to load the binary formats that
  * linux accepts.
@@ -77,7 +85,7 @@ struct linux_binfmt {
 	struct module *module;
 	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
 	int (*load_shlib)(struct file *);
-	int (*core_dump)(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
 	int hasvdso;
 };
-- 
cgit v1.2.3


From 925cc71e512a29e2594bcc17dc58d0a0e9c4d524 Mon Sep 17 00:00:00 2001
From: Robert Jennings <rcj@linux.vnet.ibm.com>
Date: Thu, 17 Dec 2009 14:44:38 +0000
Subject: mm: Add notifier in pageblock isolation for balloon drivers

Memory balloon drivers can allocate a large amount of memory which is not
movable but could be freed to accomodate memory hotplug remove.

Prior to calling the memory hotplug notifier chain the memory in the
pageblock is isolated.  Currently, if the migrate type is not
MIGRATE_MOVABLE the isolation will not proceed, causing the memory removal
for that page range to fail.

Rather than failing pageblock isolation if the migrateteype is not
MIGRATE_MOVABLE, this patch checks if all of the pages in the pageblock,
and not on the LRU, are owned by a registered balloon driver (or other
entity) using a notifier chain.  If all of the non-movable pages are owned
by a balloon, they can be freed later through the memory notifier chain
and the range can still be isolated in set_migratetype_isolate().

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Brian King <brking@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Gerald Schaefer <geralds@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/base/memory.c  | 19 +++++++++++++++++
 include/linux/memory.h | 27 ++++++++++++++++++++++++
 mm/page_alloc.c        | 57 +++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 96 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c4c8f2e1dd15..d7d77d4a402c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_memory_notifier);
 
+static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
+
+int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(register_memory_isolate_notifier);
+
+void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(unregister_memory_isolate_notifier);
+
 /*
  * register_memory - Setup a sysfs device for a memory block
  */
@@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v)
 	return blocking_notifier_call_chain(&memory_chain, val, v);
 }
 
+int memory_isolate_notify(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
+}
+
 /*
  * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
  * OK to have direct references to sparsemem variables in here.
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 37fa19b34ef5..1adfe779eb99 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -50,6 +50,19 @@ struct memory_notify {
 	int status_change_nid;
 };
 
+/*
+ * During pageblock isolation, count the number of pages within the
+ * range [start_pfn, start_pfn + nr_pages) which are owned by code
+ * in the notifier chain.
+ */
+#define MEM_ISOLATE_COUNT	(1<<0)
+
+struct memory_isolate_notify {
+	unsigned long start_pfn;	/* Start of range to check */
+	unsigned int nr_pages;		/* # pages in range to check */
+	unsigned int pages_found;	/* # pages owned found by callbacks */
+};
+
 struct notifier_block;
 struct mem_section;
 
@@ -76,14 +89,28 @@ static inline int memory_notify(unsigned long val, void *v)
 {
 	return 0;
 }
+static inline int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+}
+static inline int memory_isolate_notify(unsigned long val, void *v)
+{
+	return 0;
+}
 #else
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
+extern int register_memory_isolate_notifier(struct notifier_block *nb);
+extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 extern int register_new_memory(int, struct mem_section *);
 extern int unregister_memory_section(struct mem_section *);
 extern int memory_dev_init(void);
 extern int remove_memory_block(unsigned long, struct mem_section *, int);
 extern int memory_notify(unsigned long val, void *v);
+extern int memory_isolate_notify(unsigned long val, void *v);
 extern struct memory_block *find_memory_block(struct mem_section *);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 enum mem_add_context { BOOT, HOTPLUG };
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74af449b1f1d..998eacc1e4c3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
+#include <linux/memory.h>
 #include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
@@ -5008,23 +5009,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
 int set_migratetype_isolate(struct page *page)
 {
 	struct zone *zone;
-	unsigned long flags;
+	struct page *curr_page;
+	unsigned long flags, pfn, iter;
+	unsigned long immobile = 0;
+	struct memory_isolate_notify arg;
+	int notifier_ret;
 	int ret = -EBUSY;
 	int zone_idx;
 
 	zone = page_zone(page);
 	zone_idx = zone_idx(zone);
+
 	spin_lock_irqsave(&zone->lock, flags);
+	if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
+	    zone_idx == ZONE_MOVABLE) {
+		ret = 0;
+		goto out;
+	}
+
+	pfn = page_to_pfn(page);
+	arg.start_pfn = pfn;
+	arg.nr_pages = pageblock_nr_pages;
+	arg.pages_found = 0;
+
 	/*
-	 * In future, more migrate types will be able to be isolation target.
+	 * It may be possible to isolate a pageblock even if the
+	 * migratetype is not MIGRATE_MOVABLE. The memory isolation
+	 * notifier chain is used by balloon drivers to return the
+	 * number of pages in a range that are held by the balloon
+	 * driver to shrink memory. If all the pages are accounted for
+	 * by balloons, are free, or on the LRU, isolation can continue.
+	 * Later, for example, when memory hotplug notifier runs, these
+	 * pages reported as "can be isolated" should be isolated(freed)
+	 * by the balloon driver through the memory notifier chain.
 	 */
-	if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
-	    zone_idx != ZONE_MOVABLE)
+	notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
+	notifier_ret = notifier_to_errno(notifier_ret);
+	if (notifier_ret || !arg.pages_found)
 		goto out;
-	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
-	move_freepages_block(zone, page, MIGRATE_ISOLATE);
-	ret = 0;
+
+	for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+		if (!pfn_valid_within(pfn))
+			continue;
+
+		curr_page = pfn_to_page(iter);
+		if (!page_count(curr_page) || PageLRU(curr_page))
+			continue;
+
+		immobile++;
+	}
+
+	if (arg.pages_found == immobile)
+		ret = 0;
+
 out:
+	if (!ret) {
+		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+		move_freepages_block(zone, page, MIGRATE_ISOLATE);
+	}
+
 	spin_unlock_irqrestore(&zone->lock, flags);
 	if (!ret)
 		drain_all_pages();
-- 
cgit v1.2.3


From 8c0414cd524e9f1c483ffb3ff1c2d860f5c567c8 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 3 Dec 2009 12:46:51 -0800
Subject: fiemap: Add new extent flag FIEMAP_EXTENT_SHARED

Some filesystems may allow multiple files to point to a particular
extent.  This patch adds flag FIEMAP_EXTENT_SHARED to denote extents
that are shared with other inodes.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 include/linux/fiemap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index 934e22d65801..d830747f5c0b 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -62,5 +62,7 @@ struct fiemap {
 #define FIEMAP_EXTENT_MERGED		0x00001000 /* File does not natively
 						    * support extents. Result
 						    * merged for efficiency. */
+#define FIEMAP_EXTENT_SHARED		0x00002000 /* Space shared with other
+						    * files. */
 
 #endif /* _LINUX_FIEMAP_H */
-- 
cgit v1.2.3


From 622e99bf0d54c4517cb0524540cd77257db8621a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 18 Dec 2009 17:43:20 +0100
Subject: [S390] rename NT_PRXSTATUS to NT_S390_HIGHREGS

The elf notes number for the upper register halves is s390 specific.
Change the name of the elf notes to include S390.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ptrace.c | 2 +-
 include/linux/elf.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 653c6a178740..13815d39f7dd 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -959,7 +959,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.set = s390_fpregs_set,
 	},
 	[REGSET_GENERAL_EXTENDED] = {
-		.core_note_type = NT_PRXSTATUS,
+		.core_note_type = NT_S390_HIGH_GPRS,
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 90a4ed0ea0e5..0cc4d55151b7 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -361,7 +361,7 @@ typedef struct elf64_shdr {
 #define NT_PPC_VSX	0x102		/* PowerPC VSX registers */
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
-#define NT_PRXSTATUS	0x300		/* s390 upper register halves */
+#define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit v1.2.3


From 9dfc6e68bfe6ee452efb1a4e9ca26a9007f2b864 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Fri, 18 Dec 2009 16:26:20 -0600
Subject: SLUB: Use this_cpu operations in slub

Using per cpu allocations removes the needs for the per cpu arrays in the
kmem_cache struct. These could get quite big if we have to support systems
with thousands of cpus. The use of this_cpu_xx operations results in:

1. The size of kmem_cache for SMP configuration shrinks since we will only
   need 1 pointer instead of NR_CPUS. The same pointer can be used by all
   processors. Reduces cache footprint of the allocator.

2. We can dynamically size kmem_cache according to the actual nodes in the
   system meaning less memory overhead for configurations that may potentially
   support up to 1k NUMA nodes / 4k cpus.

3. We can remove the diddle widdle with allocating and releasing of
   kmem_cache_cpu structures when bringing up and shutting down cpus. The cpu
   alloc logic will do it all for us. Removes some portions of the cpu hotplug
   functionality.

4. Fastpath performance increases since per cpu pointer lookups and
   address calculations are avoided.

V7-V8
- Convert missed get_cpu_slab() under CONFIG_SLUB_STATS

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h |   6 +-
 mm/slub.c                | 202 +++++++++++------------------------------------
 2 files changed, 49 insertions(+), 159 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 1e14beb23f9b..17ebe0f89bf3 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -69,6 +69,7 @@ struct kmem_cache_order_objects {
  * Slab cache management.
  */
 struct kmem_cache {
+	struct kmem_cache_cpu *cpu_slab;
 	/* Used for retriving partial slabs etc */
 	unsigned long flags;
 	int size;		/* The size of an object including meta data */
@@ -104,11 +105,6 @@ struct kmem_cache {
 	int remote_node_defrag_ratio;
 	struct kmem_cache_node *node[MAX_NUMNODES];
 #endif
-#ifdef CONFIG_SMP
-	struct kmem_cache_cpu *cpu_slab[NR_CPUS];
-#else
-	struct kmem_cache_cpu cpu_slab;
-#endif
 };
 
 /*
diff --git a/mm/slub.c b/mm/slub.c
index 8d71aaf888d7..d6c9ecf629d5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -242,15 +242,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
 #endif
 }
 
-static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
-{
-#ifdef CONFIG_SMP
-	return s->cpu_slab[cpu];
-#else
-	return &s->cpu_slab;
-#endif
-}
-
 /* Verify that a pointer has an address that is valid within a slab page */
 static inline int check_valid_pointer(struct kmem_cache *s,
 				struct page *page, const void *object)
@@ -1124,7 +1115,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 		if (!page)
 			return NULL;
 
-		stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
+		stat(this_cpu_ptr(s->cpu_slab), ORDER_FALLBACK);
 	}
 
 	if (kmemcheck_enabled
@@ -1422,7 +1413,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
 static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-	struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
+	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
 
 	__ClearPageSlubFrozen(page);
 	if (page->inuse) {
@@ -1454,7 +1445,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 			slab_unlock(page);
 		} else {
 			slab_unlock(page);
-			stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
+			stat(__this_cpu_ptr(s->cpu_slab), FREE_SLAB);
 			discard_slab(s, page);
 		}
 	}
@@ -1507,7 +1498,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
  */
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 {
-	struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 
 	if (likely(c && c->page))
 		flush_slab(s, c);
@@ -1673,7 +1664,7 @@ new_slab:
 		local_irq_disable();
 
 	if (new) {
-		c = get_cpu_slab(s, smp_processor_id());
+		c = __this_cpu_ptr(s->cpu_slab);
 		stat(c, ALLOC_SLAB);
 		if (c->page)
 			flush_slab(s, c);
@@ -1711,7 +1702,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	void **object;
 	struct kmem_cache_cpu *c;
 	unsigned long flags;
-	unsigned int objsize;
+	unsigned long objsize;
 
 	gfpflags &= gfp_allowed_mask;
 
@@ -1722,14 +1713,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 		return NULL;
 
 	local_irq_save(flags);
-	c = get_cpu_slab(s, smp_processor_id());
+	c = __this_cpu_ptr(s->cpu_slab);
+	object = c->freelist;
 	objsize = c->objsize;
-	if (unlikely(!c->freelist || !node_match(c, node)))
+	if (unlikely(!object || !node_match(c, node)))
 
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-		object = c->freelist;
 		c->freelist = object[c->offset];
 		stat(c, ALLOC_FASTPATH);
 	}
@@ -1800,7 +1791,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
 
-	c = get_cpu_slab(s, raw_smp_processor_id());
+	c = __this_cpu_ptr(s->cpu_slab);
 	stat(c, FREE_SLOWPATH);
 	slab_lock(page);
 
@@ -1872,7 +1863,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
 
 	kmemleak_free_recursive(x, s->flags);
 	local_irq_save(flags);
-	c = get_cpu_slab(s, smp_processor_id());
+	c = __this_cpu_ptr(s->cpu_slab);
 	kmemcheck_slab_free(s, object, c->objsize);
 	debug_check_no_locks_freed(object, c->objsize);
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
@@ -2095,130 +2086,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 #endif
 }
 
-#ifdef CONFIG_SMP
-/*
- * Per cpu array for per cpu structures.
- *
- * The per cpu array places all kmem_cache_cpu structures from one processor
- * close together meaning that it becomes possible that multiple per cpu
- * structures are contained in one cacheline. This may be particularly
- * beneficial for the kmalloc caches.
- *
- * A desktop system typically has around 60-80 slabs. With 100 here we are
- * likely able to get per cpu structures for all caches from the array defined
- * here. We must be able to cover all kmalloc caches during bootstrap.
- *
- * If the per cpu array is exhausted then fall back to kmalloc
- * of individual cachelines. No sharing is possible then.
- */
-#define NR_KMEM_CACHE_CPU 100
-
-static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
-		      kmem_cache_cpu);
-
-static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
-static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
-
-static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
-							int cpu, gfp_t flags)
-{
-	struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
-
-	if (c)
-		per_cpu(kmem_cache_cpu_free, cpu) =
-				(void *)c->freelist;
-	else {
-		/* Table overflow: So allocate ourselves */
-		c = kmalloc_node(
-			ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
-			flags, cpu_to_node(cpu));
-		if (!c)
-			return NULL;
-	}
-
-	init_kmem_cache_cpu(s, c);
-	return c;
-}
-
-static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
-{
-	if (c < per_cpu(kmem_cache_cpu, cpu) ||
-			c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
-		kfree(c);
-		return;
-	}
-	c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
-	per_cpu(kmem_cache_cpu_free, cpu) = c;
-}
-
-static void free_kmem_cache_cpus(struct kmem_cache *s)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
-
-		if (c) {
-			s->cpu_slab[cpu] = NULL;
-			free_kmem_cache_cpu(c, cpu);
-		}
-	}
-}
-
-static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
 
-		if (c)
-			continue;
-
-		c = alloc_kmem_cache_cpu(s, cpu, flags);
-		if (!c) {
-			free_kmem_cache_cpus(s);
-			return 0;
-		}
-		s->cpu_slab[cpu] = c;
-	}
-	return 1;
-}
-
-/*
- * Initialize the per cpu array.
- */
-static void init_alloc_cpu_cpu(int cpu)
-{
-	int i;
-
-	if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
-		return;
-
-	for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
-		free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
-
-	cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
-}
-
-static void __init init_alloc_cpu(void)
+static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 {
 	int cpu;
 
-	for_each_online_cpu(cpu)
-		init_alloc_cpu_cpu(cpu);
-  }
+	if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches)
+		/*
+		 * Boot time creation of the kmalloc array. Use static per cpu data
+		 * since the per cpu allocator is not available yet.
+		 */
+		s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches);
+	else
+		s->cpu_slab =  alloc_percpu(struct kmem_cache_cpu);
 
-#else
-static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
-static inline void init_alloc_cpu(void) {}
+	if (!s->cpu_slab)
+		return 0;
 
-static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
-{
-	init_kmem_cache_cpu(s, &s->cpu_slab);
+	for_each_possible_cpu(cpu)
+		init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
 	return 1;
 }
-#endif
 
 #ifdef CONFIG_NUMA
 /*
@@ -2609,9 +2498,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
 	int node;
 
 	flush_all(s);
-
+	free_percpu(s->cpu_slab);
 	/* Attempt to free all objects */
-	free_kmem_cache_cpus(s);
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 
@@ -2760,7 +2648,19 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 	realsize = kmalloc_caches[index].objsize;
 	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
 			 (unsigned int)realsize);
-	s = kmalloc(kmem_size, flags & ~SLUB_DMA);
+
+	if (flags & __GFP_WAIT)
+		s = kmalloc(kmem_size, flags & ~SLUB_DMA);
+	else {
+		int i;
+
+		s = NULL;
+		for (i = 0; i < SLUB_PAGE_SHIFT; i++)
+			if (kmalloc_caches[i].size) {
+				s = kmalloc_caches + i;
+				break;
+			}
+	}
 
 	/*
 	 * Must defer sysfs creation to a workqueue because we don't know
@@ -3176,8 +3076,6 @@ void __init kmem_cache_init(void)
 	int i;
 	int caches = 0;
 
-	init_alloc_cpu();
-
 #ifdef CONFIG_NUMA
 	/*
 	 * Must first have the slab cache available for the allocations of the
@@ -3261,8 +3159,10 @@ void __init kmem_cache_init(void)
 
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
-	kmem_size = offsetof(struct kmem_cache, cpu_slab) +
-				nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
+#endif
+#ifdef CONFIG_NUMA
+	kmem_size = offsetof(struct kmem_cache, node) +
+				nr_node_ids * sizeof(struct kmem_cache_node *);
 #else
 	kmem_size = sizeof(struct kmem_cache);
 #endif
@@ -3365,7 +3265,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 		 * per cpu structures
 		 */
 		for_each_online_cpu(cpu)
-			get_cpu_slab(s, cpu)->objsize = s->objsize;
+			per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
 
 		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
 		up_write(&slub_lock);
@@ -3422,11 +3322,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		init_alloc_cpu_cpu(cpu);
 		down_read(&slub_lock);
 		list_for_each_entry(s, &slab_caches, list)
-			s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
-							GFP_KERNEL);
+			init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
 		up_read(&slub_lock);
 		break;
 
@@ -3436,13 +3334,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 	case CPU_DEAD_FROZEN:
 		down_read(&slub_lock);
 		list_for_each_entry(s, &slab_caches, list) {
-			struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
-
 			local_irq_save(flags);
 			__flush_cpu_slab(s, cpu);
 			local_irq_restore(flags);
-			free_kmem_cache_cpu(c, cpu);
-			s->cpu_slab[cpu] = NULL;
 		}
 		up_read(&slub_lock);
 		break;
@@ -3928,7 +3822,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 		int cpu;
 
 		for_each_possible_cpu(cpu) {
-			struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 
 			if (!c || c->node < 0)
 				continue;
@@ -4353,7 +4247,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
 		return -ENOMEM;
 
 	for_each_online_cpu(cpu) {
-		unsigned x = get_cpu_slab(s, cpu)->stat[si];
+		unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
 
 		data[cpu] = x;
 		sum += x;
@@ -4376,7 +4270,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si)
 	int cpu;
 
 	for_each_online_cpu(cpu)
-		get_cpu_slab(s, cpu)->stat[si] = 0;
+		per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
 }
 
 #define STAT_ATTR(si, text) 					\
-- 
cgit v1.2.3


From 756dee75872a2a764b478e18076360b8a4ec9045 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Fri, 18 Dec 2009 16:26:21 -0600
Subject: SLUB: Get rid of dynamic DMA kmalloc cache allocation

Dynamic DMA kmalloc cache allocation is troublesome since the
new percpu allocator does not support allocations in atomic contexts.
Reserve some statically allocated kmalloc_cpu structures instead.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h | 19 +++++++++++--------
 mm/slub.c                | 24 ++++++++++--------------
 2 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 17ebe0f89bf3..a78fb4ac2015 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -131,11 +131,21 @@ struct kmem_cache {
 
 #define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
 
+#ifdef CONFIG_ZONE_DMA
+#define SLUB_DMA __GFP_DMA
+/* Reserve extra caches for potential DMA use */
+#define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT - 6)
+#else
+/* Disable DMA functionality */
+#define SLUB_DMA (__force gfp_t)0
+#define KMALLOC_CACHES SLUB_PAGE_SHIFT
+#endif
+
 /*
  * We keep the general caches in an array of slab caches that are used for
  * 2^x bytes of allocations.
  */
-extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT];
+extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES];
 
 /*
  * Sorry that the following has to be that ugly but some versions of GCC
@@ -203,13 +213,6 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 	return &kmalloc_caches[index];
 }
 
-#ifdef CONFIG_ZONE_DMA
-#define SLUB_DMA __GFP_DMA
-#else
-/* Disable DMA functionality */
-#define SLUB_DMA (__force gfp_t)0
-#endif
-
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
diff --git a/mm/slub.c b/mm/slub.c
index d6c9ecf629d5..cdb7f0214af0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2092,7 +2092,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 {
 	int cpu;
 
-	if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches)
+	if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
 		/*
 		 * Boot time creation of the kmalloc array. Use static per cpu data
 		 * since the per cpu allocator is not available yet.
@@ -2539,7 +2539,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
  *		Kmalloc subsystem
  *******************************************************************/
 
-struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned;
 EXPORT_SYMBOL(kmalloc_caches);
 
 static int __init setup_slub_min_order(char *str)
@@ -2629,6 +2629,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 	char *text;
 	size_t realsize;
 	unsigned long slabflags;
+	int i;
 
 	s = kmalloc_caches_dma[index];
 	if (s)
@@ -2649,18 +2650,13 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
 			 (unsigned int)realsize);
 
-	if (flags & __GFP_WAIT)
-		s = kmalloc(kmem_size, flags & ~SLUB_DMA);
-	else {
-		int i;
+	s = NULL;
+	for (i = 0; i < KMALLOC_CACHES; i++)
+		if (!kmalloc_caches[i].size)
+			break;
 
-		s = NULL;
-		for (i = 0; i < SLUB_PAGE_SHIFT; i++)
-			if (kmalloc_caches[i].size) {
-				s = kmalloc_caches + i;
-				break;
-			}
-	}
+	BUG_ON(i >= KMALLOC_CACHES);
+	s = kmalloc_caches + i;
 
 	/*
 	 * Must defer sysfs creation to a workqueue because we don't know
@@ -2674,7 +2670,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 
 	if (!s || !text || !kmem_cache_open(s, flags, text,
 			realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
-		kfree(s);
+		s->size = 0;
 		kfree(text);
 		goto unlock_out;
 	}
-- 
cgit v1.2.3


From ff12059ed14b0773d7bbef86f98218ada6c20770 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Fri, 18 Dec 2009 16:26:22 -0600
Subject: SLUB: this_cpu: Remove slub kmem_cache fields

Remove the fields in struct kmem_cache_cpu that were used to cache data from
struct kmem_cache when they were in different cachelines. The cacheline that
holds the per cpu array pointer now also holds these values. We can cut down
the struct kmem_cache_cpu size to almost half.

The get_freepointer() and set_freepointer() functions that used to be only
intended for the slow path now are also useful for the hot path since access
to the size field does not require accessing an additional cacheline anymore.
This results in consistent use of functions for setting the freepointer of
objects throughout SLUB.

Also we initialize all possible kmem_cache_cpu structures when a slab is
created. No need to initialize them when a processor or node comes online.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h |  2 --
 mm/slub.c                | 76 +++++++++++-------------------------------------
 2 files changed, 17 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a78fb4ac2015..0249d4175bac 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -38,8 +38,6 @@ struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to first free per cpu object */
 	struct page *page;	/* The slab from which we are allocating */
 	int node;		/* The node of the page (or -1 for debug) */
-	unsigned int offset;	/* Freepointer offset (in word units) */
-	unsigned int objsize;	/* Size of an object (from kmem_cache) */
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
diff --git a/mm/slub.c b/mm/slub.c
index cdb7f0214af0..30d2dde27563 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -260,13 +260,6 @@ static inline int check_valid_pointer(struct kmem_cache *s,
 	return 1;
 }
 
-/*
- * Slow version of get and set free pointer.
- *
- * This version requires touching the cache lines of kmem_cache which
- * we avoid to do in the fast alloc free paths. There we obtain the offset
- * from the page struct.
- */
 static inline void *get_freepointer(struct kmem_cache *s, void *object)
 {
 	return *(void **)(object + s->offset);
@@ -1473,10 +1466,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 
 		/* Retrieve object from cpu_freelist */
 		object = c->freelist;
-		c->freelist = c->freelist[c->offset];
+		c->freelist = get_freepointer(s, c->freelist);
 
 		/* And put onto the regular freelist */
-		object[c->offset] = page->freelist;
+		set_freepointer(s, object, page->freelist);
 		page->freelist = object;
 		page->inuse--;
 	}
@@ -1635,7 +1628,7 @@ load_freelist:
 	if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
 		goto debug;
 
-	c->freelist = object[c->offset];
+	c->freelist = get_freepointer(s, object);
 	c->page->inuse = c->page->objects;
 	c->page->freelist = NULL;
 	c->node = page_to_nid(c->page);
@@ -1681,7 +1674,7 @@ debug:
 		goto another_slab;
 
 	c->page->inuse++;
-	c->page->freelist = object[c->offset];
+	c->page->freelist = get_freepointer(s, object);
 	c->node = -1;
 	goto unlock_out;
 }
@@ -1702,7 +1695,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	void **object;
 	struct kmem_cache_cpu *c;
 	unsigned long flags;
-	unsigned long objsize;
 
 	gfpflags &= gfp_allowed_mask;
 
@@ -1715,22 +1707,21 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	local_irq_save(flags);
 	c = __this_cpu_ptr(s->cpu_slab);
 	object = c->freelist;
-	objsize = c->objsize;
 	if (unlikely(!object || !node_match(c, node)))
 
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-		c->freelist = object[c->offset];
+		c->freelist = get_freepointer(s, object);
 		stat(c, ALLOC_FASTPATH);
 	}
 	local_irq_restore(flags);
 
 	if (unlikely(gfpflags & __GFP_ZERO) && object)
-		memset(object, 0, objsize);
+		memset(object, 0, s->objsize);
 
-	kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
-	kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags);
+	kmemcheck_slab_alloc(s, gfpflags, object, s->objsize);
+	kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags);
 
 	return object;
 }
@@ -1785,7 +1776,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
  * handling required then we can return immediately.
  */
 static void __slab_free(struct kmem_cache *s, struct page *page,
-			void *x, unsigned long addr, unsigned int offset)
+			void *x, unsigned long addr)
 {
 	void *prior;
 	void **object = (void *)x;
@@ -1799,7 +1790,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 		goto debug;
 
 checks_ok:
-	prior = object[offset] = page->freelist;
+	prior = page->freelist;
+	set_freepointer(s, object, prior);
 	page->freelist = object;
 	page->inuse--;
 
@@ -1864,16 +1856,16 @@ static __always_inline void slab_free(struct kmem_cache *s,
 	kmemleak_free_recursive(x, s->flags);
 	local_irq_save(flags);
 	c = __this_cpu_ptr(s->cpu_slab);
-	kmemcheck_slab_free(s, object, c->objsize);
-	debug_check_no_locks_freed(object, c->objsize);
+	kmemcheck_slab_free(s, object, s->objsize);
+	debug_check_no_locks_freed(object, s->objsize);
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
-		debug_check_no_obj_freed(object, c->objsize);
+		debug_check_no_obj_freed(object, s->objsize);
 	if (likely(page == c->page && c->node >= 0)) {
-		object[c->offset] = c->freelist;
+		set_freepointer(s, object, c->freelist);
 		c->freelist = object;
 		stat(c, FREE_FASTPATH);
 	} else
-		__slab_free(s, page, x, addr, c->offset);
+		__slab_free(s, page, x, addr);
 
 	local_irq_restore(flags);
 }
@@ -2060,19 +2052,6 @@ static unsigned long calculate_alignment(unsigned long flags,
 	return ALIGN(align, sizeof(void *));
 }
 
-static void init_kmem_cache_cpu(struct kmem_cache *s,
-			struct kmem_cache_cpu *c)
-{
-	c->page = NULL;
-	c->freelist = NULL;
-	c->node = 0;
-	c->offset = s->offset / sizeof(void *);
-	c->objsize = s->objsize;
-#ifdef CONFIG_SLUB_STATS
-	memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
-#endif
-}
-
 static void
 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 {
@@ -2090,8 +2069,6 @@ static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
 
 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 {
-	int cpu;
-
 	if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
 		/*
 		 * Boot time creation of the kmalloc array. Use static per cpu data
@@ -2104,8 +2081,6 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 	if (!s->cpu_slab)
 		return 0;
 
-	for_each_possible_cpu(cpu)
-		init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
 	return 1;
 }
 
@@ -2391,6 +2366,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
 
 	if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
 		return 1;
+
 	free_kmem_cache_nodes(s);
 error:
 	if (flags & SLAB_PANIC)
@@ -3247,22 +3223,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 	down_write(&slub_lock);
 	s = find_mergeable(size, align, flags, name, ctor);
 	if (s) {
-		int cpu;
-
 		s->refcount++;
 		/*
 		 * Adjust the object sizes so that we clear
 		 * the complete object on kzalloc.
 		 */
 		s->objsize = max(s->objsize, (int)size);
-
-		/*
-		 * And then we need to update the object size in the
-		 * per cpu structures
-		 */
-		for_each_online_cpu(cpu)
-			per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
-
 		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
 		up_write(&slub_lock);
 
@@ -3316,14 +3282,6 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 	unsigned long flags;
 
 	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		down_read(&slub_lock);
-		list_for_each_entry(s, &slab_caches, list)
-			init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
-		up_read(&slub_lock);
-		break;
-
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-- 
cgit v1.2.3


From 9a418af5df03ad133cd8c8f6742b75e542db6392 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 17 Dec 2009 13:55:48 +0100
Subject: mac80211: fix peer HT capabilities

I noticed yesterday, because Jeff had noticed
a speed regression, cf. bug
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2138
that the SM PS settings for peers were wrong.
Instead of overwriting the SM PS settings with
the local bits, we need to keep the remote bits.

The bug was part of the original HT code from
over two years ago, but unfortunately nobody
noticed that it makes no sense -- we shouldn't
be overwriting the peer's setting with our own
but rather keep it intact when masking the peer
capabilities with our own.

While fixing that, I noticed that the masking of
capabilities is completely useless for most of
the bits, so also fix those other bits.

Finally, I also noticed that PSMP_SUPPORT no
longer exists in the final 802.11n version, so
also remove that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2800lib.c |  3 +--
 include/linux/ieee80211.h               |  2 +-
 net/mac80211/ht.c                       | 25 ++++++++++++++++++++++---
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 6bf6c0f12f35..27bf887f1453 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -2080,8 +2080,7 @@ int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	    IEEE80211_HT_CAP_SGI_20 |
 	    IEEE80211_HT_CAP_SGI_40 |
 	    IEEE80211_HT_CAP_TX_STBC |
-	    IEEE80211_HT_CAP_RX_STBC |
-	    IEEE80211_HT_CAP_PSMP_SUPPORT;
+	    IEEE80211_HT_CAP_RX_STBC;
 	spec->ht.ampdu_factor = 3;
 	spec->ht.ampdu_density = 4;
 	spec->ht.mcs.tx_params =
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d9724a28c0c2..163c840437d6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -832,7 +832,7 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_CAP_DELAY_BA		0x0400
 #define IEEE80211_HT_CAP_MAX_AMSDU		0x0800
 #define IEEE80211_HT_CAP_DSSSCCK40		0x1000
-#define IEEE80211_HT_CAP_PSMP_SUPPORT		0x2000
+#define IEEE80211_HT_CAP_RESERVED		0x2000
 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT	0x4000
 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT		0x8000
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3787455fb696..d7dcee680728 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -34,9 +34,28 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
 
 	ht_cap->ht_supported = true;
 
-	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) & sband->ht_cap.cap;
-	ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS;
-	ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS;
+	/*
+	 * The bits listed in this expression should be
+	 * the same for the peer and us, if the station
+	 * advertises more then we can't use those thus
+	 * we mask them out.
+	 */
+	ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info) &
+		(sband->ht_cap.cap |
+		 ~(IEEE80211_HT_CAP_LDPC_CODING |
+		   IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
+		   IEEE80211_HT_CAP_GRN_FLD |
+		   IEEE80211_HT_CAP_SGI_20 |
+		   IEEE80211_HT_CAP_SGI_40 |
+		   IEEE80211_HT_CAP_DSSSCCK40));
+	/*
+	 * The STBC bits are asymmetric -- if we don't have
+	 * TX then mask out the peer's RX and vice versa.
+	 */
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_RX_STBC;
+	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC))
+		ht_cap->cap &= ~IEEE80211_HT_CAP_TX_STBC;
 
 	ampdu_info = ht_cap_ie->ampdu_params_info;
 	ht_cap->ampdu_factor =
-- 
cgit v1.2.3


From 482928d59db668b8d82a48717f78986d8cea72e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Dec 2009 10:10:39 -0500
Subject: Fix f_flags/f_mode in case of lookup_instantiate_filp() from
 open(pathname, 3)

Just set f_flags when shoving struct file into nameidata; don't
postpone that until __dentry_open().  do_filp_open() has correct
value; lookup_instantiate_filp() doesn't - we lose the difference
between O_RDWR and 3 by that point.

We still set .intent.open.flags, so no fs code needs to be changed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h         |  7 +++++++
 fs/namei.c            |  6 ++++--
 fs/open.c             | 13 ++++++-------
 include/linux/namei.h |  2 --
 4 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index f67cd141d9a8..e96a1667d749 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -85,3 +85,10 @@ extern struct file *get_empty_filp(void);
  * super.c
  */
 extern int do_remount_sb(struct super_block *, int, void *, int);
+
+/*
+ * open.c
+ */
+struct nameidata;
+extern struct file *nameidata_to_filp(struct nameidata *);
+extern void release_open_intent(struct nameidata *);
diff --git a/fs/namei.c b/fs/namei.c
index dad4b80257db..d517f73aa36b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1640,6 +1640,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 		if (filp == NULL)
 			return ERR_PTR(-ENFILE);
 		nd.intent.open.file = filp;
+		filp->f_flags = open_flag;
 		nd.intent.open.flags = flag;
 		nd.intent.open.create_mode = 0;
 		error = do_path_lookup(dfd, pathname,
@@ -1685,6 +1686,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (filp == NULL)
 		goto exit_parent;
 	nd.intent.open.file = filp;
+	filp->f_flags = open_flag;
 	nd.intent.open.flags = flag;
 	nd.intent.open.create_mode = mode;
 	dir = nd.path.dentry;
@@ -1725,7 +1727,7 @@ do_last:
 			mnt_drop_write(nd.path.mnt);
 			goto exit;
 		}
-		filp = nameidata_to_filp(&nd, open_flag);
+		filp = nameidata_to_filp(&nd);
 		mnt_drop_write(nd.path.mnt);
 		if (nd.root.mnt)
 			path_put(&nd.root);
@@ -1789,7 +1791,7 @@ ok:
 			mnt_drop_write(nd.path.mnt);
 		goto exit;
 	}
-	filp = nameidata_to_filp(&nd, open_flag);
+	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
 		error = ima_path_check(&filp->f_path, filp->f_mode &
 			       (MAY_READ | MAY_WRITE | MAY_EXEC));
diff --git a/fs/open.c b/fs/open.c
index ca69241796bd..6daee28f6e8f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -821,15 +821,14 @@ static inline int __get_file_write_access(struct inode *inode,
 }
 
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-					int flags, struct file *f,
+					struct file *f,
 					int (*open)(struct inode *, struct file *),
 					const struct cred *cred)
 {
 	struct inode *inode;
 	int error;
 
-	f->f_flags = flags;
-	f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
+	f->f_mode = (__force fmode_t)((f->f_flags+1) & O_ACCMODE) | FMODE_LSEEK |
 				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
@@ -930,7 +929,6 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
 	if (IS_ERR(dentry))
 		goto out_err;
 	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
-					     nd->intent.open.flags - 1,
 					     nd->intent.open.file,
 					     open, cred);
 out:
@@ -949,7 +947,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
  *
  * Note that this function destroys the original nameidata
  */
-struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+struct file *nameidata_to_filp(struct nameidata *nd)
 {
 	const struct cred *cred = current_cred();
 	struct file *filp;
@@ -958,7 +956,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
 	filp = nd->intent.open.file;
 	/* Has the filesystem initialised the file for us? */
 	if (filp->f_path.dentry == NULL)
-		filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
+		filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
 				     NULL, cred);
 	else
 		path_put(&nd->path);
@@ -997,7 +995,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
 		return ERR_PTR(error);
 	}
 
-	return __dentry_open(dentry, mnt, flags, f, NULL, cred);
+	f->f_flags = flags;
+	return __dentry_open(dentry, mnt, f, NULL, cred);
 }
 EXPORT_SYMBOL(dentry_open);
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 028946750289..05b441d93642 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -72,8 +72,6 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 
 extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
 		int (*open)(struct inode *, struct file *));
-extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
-extern void release_open_intent(struct nameidata *);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 
-- 
cgit v1.2.3


From 5300990c0370e804e49d9a59d928c5d53fb73487 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 Dec 2009 10:15:07 -0500
Subject: Sanitize f_flags helpers

* pull ACC_MODE to fs.h; we have several copies all over the place
* nightmarish expression calculating f_mode by f_flags deserves a helper
too (OPEN_FMODE(flags))

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/anon_inodes.c       | 10 +---------
 fs/namei.c             |  2 --
 fs/open.c              |  2 +-
 include/linux/fs.h     |  3 +++
 kernel/auditsc.c       |  1 -
 security/tomoyo/file.c |  1 -
 6 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 598237e97221..9f0bf13291e5 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -89,19 +89,11 @@ struct file *anon_inode_getfile(const char *name,
 	struct qstr this;
 	struct path path;
 	struct file *file;
-	fmode_t mode;
 	int error;
 
 	if (IS_ERR(anon_inode_inode))
 		return ERR_PTR(-ENODEV);
 
-	switch (flags & O_ACCMODE) {
-	case O_RDONLY: mode = FMODE_READ;		break;
-	case O_WRONLY: mode = FMODE_WRITE;		break;
-	case O_RDWR:   mode = FMODE_READ | FMODE_WRITE;	break;
-	default:       return ERR_PTR(-EINVAL);
-	}
-
 	if (fops->owner && !try_module_get(fops->owner))
 		return ERR_PTR(-ENOENT);
 
@@ -129,7 +121,7 @@ struct file *anon_inode_getfile(const char *name,
 	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
-	file = alloc_file(&path, mode, fops);
+	file = alloc_file(&path, OPEN_FMODE(flags), fops);
 	if (!file)
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
diff --git a/fs/namei.c b/fs/namei.c
index d517f73aa36b..68921d9b5302 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -37,8 +37,6 @@
 
 #include "internal.h"
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
-
 /* [Feb-1997 T. Schoebel-Theuer]
  * Fundamental changes in the pathname lookup mechanisms (namei)
  * were necessary because of omirr.  The reason is that omirr needs
diff --git a/fs/open.c b/fs/open.c
index 6daee28f6e8f..040cef72bc00 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -828,7 +828,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	struct inode *inode;
 	int error;
 
-	f->f_mode = (__force fmode_t)((f->f_flags+1) & O_ACCMODE) | FMODE_LSEEK |
+	f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
 				FMODE_PREAD | FMODE_PWRITE;
 	inode = dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cca191933ff6..9e13b533aaef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2464,5 +2464,8 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
+#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 267e484f0198..fc0f928167e7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -250,7 +250,6 @@ struct audit_context {
 #endif
 };
 
-#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 static inline int open_arg(int flags, int mask)
 {
 	int n = ACC_MODE(flags);
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 8346938809b1..9a6c58881c0a 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -12,7 +12,6 @@
 #include "common.h"
 #include "tomoyo.h"
 #include "realpath.h"
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
 
 /*
  * tomoyo_globally_readable_file_entry is a structure which is used for holding
-- 
cgit v1.2.3


From 95ebc3a7930d5965b00bbedbf36bfd3eb9124d65 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 28 Oct 2009 01:46:33 +0100
Subject: Remove obsolete comment in fs.h

This question was determined to be a bug which was fixed in
commit 4a3b0a49.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Jan Blunck <jblunck@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9e13b533aaef..7e3012e0ac06 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1624,8 +1624,6 @@ struct super_operations {
  *			on the bit address once it is done.
  *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
- * Q: igrab() only checks on (I_FREEING|I_WILL_FREE).  Should it also check on
- *    I_CLEAR?  If not, why?
  */
 #define I_DIRTY_SYNC		1
 #define I_DIRTY_DATASYNC	2
-- 
cgit v1.2.3


From 0f78231bffb868a30e8533aace142213266bb811 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 1 Dec 2009 13:37:02 +0100
Subject: mac80211: enable spatial multiplexing powersave

Enable spatial multiplexing in mac80211 by telling the
driver what to do and, where necessary, sending action
frames to the AP to update the requested SMPS mode.

Also includes a trivial implementation for hwsim that
just logs the requested mode.

For now, the userspace interface is in debugfs only,
and let you toggle the requested mode at any time.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c |  24 ++++++--
 include/linux/ieee80211.h             |  25 +++++++-
 include/net/mac80211.h                |  59 ++++++++++++++++++
 net/mac80211/cfg.c                    |  49 +++++++++++++++
 net/mac80211/debugfs_netdev.c         | 111 +++++++++++++++++++++++++++++++++-
 net/mac80211/driver-trace.h           |   2 +
 net/mac80211/ht.c                     |  47 ++++++++++++++
 net/mac80211/ieee80211_i.h            |  14 +++++
 net/mac80211/main.c                   |  24 ++++++++
 net/mac80211/mlme.c                   |  63 +++++++++++++++++--
 net/mac80211/status.c                 |  38 ++++++++++++
 net/mac80211/util.c                   |  74 +++++++++++++++++++++++
 12 files changed, 518 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 88e41176e7fd..92c669ebb358 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -618,12 +618,26 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct mac80211_hwsim_data *data = hw->priv;
 	struct ieee80211_conf *conf = &hw->conf;
-
-	printk(KERN_DEBUG "%s:%s (freq=%d idle=%d ps=%d)\n",
+	static const char *chantypes[4] = {
+		[NL80211_CHAN_NO_HT] = "noht",
+		[NL80211_CHAN_HT20] = "ht20",
+		[NL80211_CHAN_HT40MINUS] = "ht40-",
+		[NL80211_CHAN_HT40PLUS] = "ht40+",
+	};
+	static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = {
+		[IEEE80211_SMPS_AUTOMATIC] = "auto",
+		[IEEE80211_SMPS_OFF] = "off",
+		[IEEE80211_SMPS_STATIC] = "static",
+		[IEEE80211_SMPS_DYNAMIC] = "dynamic",
+	};
+
+	printk(KERN_DEBUG "%s:%s (freq=%d/%s idle=%d ps=%d smps=%s)\n",
 	       wiphy_name(hw->wiphy), __func__,
 	       conf->channel->center_freq,
+	       chantypes[conf->channel_type],
 	       !!(conf->flags & IEEE80211_CONF_IDLE),
-	       !!(conf->flags & IEEE80211_CONF_PS));
+	       !!(conf->flags & IEEE80211_CONF_PS),
+	       smps_modes[conf->smps_mode]);
 
 	data->idle = !!(conf->flags & IEEE80211_CONF_IDLE);
 
@@ -1082,7 +1096,9 @@ static int __init init_mac80211_hwsim(void)
 			BIT(NL80211_IFTYPE_MESH_POINT);
 
 		hw->flags = IEEE80211_HW_MFP_CAPABLE |
-			    IEEE80211_HW_SIGNAL_DBM;
+			    IEEE80211_HW_SIGNAL_DBM |
+			    IEEE80211_HW_SUPPORTS_STATIC_SMPS |
+			    IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS;
 
 		/* ask mac80211 to reserve space for magic */
 		hw->vif_data_size = sizeof(struct hwsim_vif_priv);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d9724a28c0c2..e8d43d0ff2c3 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -707,6 +707,10 @@ struct ieee80211_mgmt {
 					u8 action;
 					u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN];
 				} __attribute__ ((packed)) sa_query;
+				struct {
+					u8 action;
+					u8 smps_control;
+				} __attribute__ ((packed)) ht_smps;
 			} u;
 		} __attribute__ ((packed)) action;
 	} u;
@@ -824,6 +828,7 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_CAP_LDPC_CODING		0x0001
 #define IEEE80211_HT_CAP_SUP_WIDTH_20_40	0x0002
 #define IEEE80211_HT_CAP_SM_PS			0x000C
+#define		IEEE80211_HT_CAP_SM_PS_SHIFT	2
 #define IEEE80211_HT_CAP_GRN_FLD		0x0010
 #define IEEE80211_HT_CAP_SGI_20			0x0020
 #define IEEE80211_HT_CAP_SGI_40			0x0040
@@ -839,6 +844,7 @@ struct ieee80211_ht_cap {
 /* 802.11n HT capability AMPDU settings (for ampdu_params_info) */
 #define IEEE80211_HT_AMPDU_PARM_FACTOR		0x03
 #define IEEE80211_HT_AMPDU_PARM_DENSITY		0x1C
+#define		IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT	2
 
 /*
  * Maximum length of AMPDU that the STA can receive.
@@ -922,12 +928,17 @@ struct ieee80211_ht_info {
 #define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
-/* Spatial Multiplexing Power Save Modes */
+/* Spatial Multiplexing Power Save Modes (for capability) */
 #define WLAN_HT_CAP_SM_PS_STATIC	0
 #define WLAN_HT_CAP_SM_PS_DYNAMIC	1
 #define WLAN_HT_CAP_SM_PS_INVALID	2
 #define WLAN_HT_CAP_SM_PS_DISABLED	3
 
+/* for SM power control field lower two bits */
+#define WLAN_HT_SMPS_CONTROL_DISABLED	0
+#define WLAN_HT_SMPS_CONTROL_STATIC	1
+#define WLAN_HT_SMPS_CONTROL_DYNAMIC	3
+
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
@@ -1150,6 +1161,18 @@ enum ieee80211_spectrum_mgmt_actioncode {
 	WLAN_ACTION_SPCT_CHL_SWITCH = 4,
 };
 
+/* HT action codes */
+enum ieee80211_ht_actioncode {
+	WLAN_HT_ACTION_NOTIFY_CHANWIDTH = 0,
+	WLAN_HT_ACTION_SMPS = 1,
+	WLAN_HT_ACTION_PSMP = 2,
+	WLAN_HT_ACTION_PCO_PHASE = 3,
+	WLAN_HT_ACTION_CSI = 4,
+	WLAN_HT_ACTION_NONCOMPRESSED_BF = 5,
+	WLAN_HT_ACTION_COMPRESSED_BF = 6,
+	WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7,
+};
+
 /* Security key length */
 enum ieee80211_key_len {
 	WLAN_KEY_LEN_WEP40 = 5,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e94cc526b0f6..e6b6bf81d5b9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -597,8 +597,10 @@ enum ieee80211_conf_flags {
  * @IEEE80211_CONF_CHANGE_CHANNEL: the channel/channel_type changed
  * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed
  * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed
+ * @IEEE80211_CONF_CHANGE_SMPS: Spatial multiplexing powersave mode changed
  */
 enum ieee80211_conf_changed {
+	IEEE80211_CONF_CHANGE_SMPS		= BIT(1),
 	IEEE80211_CONF_CHANGE_LISTEN_INTERVAL	= BIT(2),
 	IEEE80211_CONF_CHANGE_MONITOR		= BIT(3),
 	IEEE80211_CONF_CHANGE_PS		= BIT(4),
@@ -608,6 +610,21 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_IDLE		= BIT(8),
 };
 
+/**
+ * enum ieee80211_smps_mode - spatial multiplexing power save mode
+ *
+ * @
+ */
+enum ieee80211_smps_mode {
+	IEEE80211_SMPS_AUTOMATIC,
+	IEEE80211_SMPS_OFF,
+	IEEE80211_SMPS_STATIC,
+	IEEE80211_SMPS_DYNAMIC,
+
+	/* keep last */
+	IEEE80211_SMPS_NUM_MODES,
+};
+
 /**
  * struct ieee80211_conf - configuration of the device
  *
@@ -636,6 +653,10 @@ enum ieee80211_conf_changed {
  * @short_frame_max_tx_count: Maximum number of transmissions for a "short"
  *    frame, called "dot11ShortRetryLimit" in 802.11, but actually means the
  *    number of transmissions not the number of retries
+ *
+ * @smps_mode: spatial multiplexing powersave mode; note that
+ *	%IEEE80211_SMPS_STATIC is used when the device is not
+ *	configured for an HT channel
  */
 struct ieee80211_conf {
 	u32 flags;
@@ -648,6 +669,7 @@ struct ieee80211_conf {
 
 	struct ieee80211_channel *channel;
 	enum nl80211_channel_type channel_type;
+	enum ieee80211_smps_mode smps_mode;
 };
 
 /**
@@ -930,6 +952,16 @@ enum ieee80211_tkip_key_type {
  * @IEEE80211_HW_BEACON_FILTER:
  *	Hardware supports dropping of irrelevant beacon frames to
  *	avoid waking up cpu.
+ *
+ * @IEEE80211_HW_SUPPORTS_STATIC_SMPS:
+ *	Hardware supports static spatial multiplexing powersave,
+ *	ie. can turn off all but one chain even on HT connections
+ *	that should be using more chains.
+ *
+ * @IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS:
+ *	Hardware supports dynamic spatial multiplexing powersave,
+ *	ie. can turn off all but one chain and then wake the rest
+ *	up as required after, for example, rts/cts handshake.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -947,6 +979,8 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_DYNAMIC_PS		= 1<<12,
 	IEEE80211_HW_MFP_CAPABLE			= 1<<13,
 	IEEE80211_HW_BEACON_FILTER			= 1<<14,
+	IEEE80211_HW_SUPPORTS_STATIC_SMPS		= 1<<15,
+	IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS		= 1<<16,
 };
 
 /**
@@ -1214,6 +1248,31 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * signal strength threshold checking.
  */
 
+/**
+ * DOC: Spatial multiplexing power save
+ *
+ * SMPS (Spatial multiplexing power save) is a mechanism to conserve
+ * power in an 802.11n implementation. For details on the mechanism
+ * and rationale, please refer to 802.11 (as amended by 802.11n-2009)
+ * "11.2.3 SM power save".
+ *
+ * The mac80211 implementation is capable of sending action frames
+ * to update the AP about the station's SMPS mode, and will instruct
+ * the driver to enter the specific mode. It will also announce the
+ * requested SMPS mode during the association handshake. Hardware
+ * support for this feature is required, and can be indicated by
+ * hardware flags.
+ *
+ * The default mode will be "automatic", which nl80211/cfg80211
+ * defines to be dynamic SMPS in (regular) powersave, and SMPS
+ * turned off otherwise.
+ *
+ * To support this feature, the driver must set the appropriate
+ * hardware support flags, and handle the SMPS flag to the config()
+ * operation. It will then with this mechanism be instructed to
+ * enter the requested SMPS mode while associated to an HT AP.
+ */
+
 /**
  * DOC: Frame filtering
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fcfa1bf776a7..8c35418d1c96 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1318,6 +1318,50 @@ static int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len)
 }
 #endif
 
+int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
+			     enum ieee80211_smps_mode smps_mode)
+{
+	const u8 *ap;
+	enum ieee80211_smps_mode old_req;
+	int err;
+
+	old_req = sdata->u.mgd.req_smps;
+	sdata->u.mgd.req_smps = smps_mode;
+
+	if (old_req == smps_mode &&
+	    smps_mode != IEEE80211_SMPS_AUTOMATIC)
+		return 0;
+
+	/*
+	 * If not associated, or current association is not an HT
+	 * association, there's no need to send an action frame.
+	 */
+	if (!sdata->u.mgd.associated ||
+	    sdata->local->oper_channel_type == NL80211_CHAN_NO_HT) {
+		mutex_lock(&sdata->local->iflist_mtx);
+		ieee80211_recalc_smps(sdata->local, sdata);
+		mutex_unlock(&sdata->local->iflist_mtx);
+		return 0;
+	}
+
+	ap = sdata->u.mgd.associated->cbss.bssid;
+
+	if (smps_mode == IEEE80211_SMPS_AUTOMATIC) {
+		if (sdata->u.mgd.powersave)
+			smps_mode = IEEE80211_SMPS_DYNAMIC;
+		else
+			smps_mode = IEEE80211_SMPS_OFF;
+	}
+
+	/* send SM PS frame to AP */
+	err = ieee80211_send_smps_action(sdata, smps_mode,
+					 ap, ap);
+	if (err)
+		sdata->u.mgd.req_smps = old_req;
+
+	return err;
+}
+
 static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 				    bool enabled, int timeout)
 {
@@ -1335,6 +1379,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 	sdata->u.mgd.powersave = enabled;
 	conf->dynamic_ps_timeout = timeout;
 
+	/* no change, but if automatic follow powersave */
+	mutex_lock(&sdata->u.mgd.mtx);
+	__ieee80211_request_smps(sdata, sdata->u.mgd.req_smps);
+	mutex_unlock(&sdata->u.mgd.mtx);
+
 	if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 5d9c797635a9..355983503885 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -41,6 +41,30 @@ static ssize_t ieee80211_if_read(
 	return ret;
 }
 
+static ssize_t ieee80211_if_write(
+	struct ieee80211_sub_if_data *sdata,
+	const char __user *userbuf,
+	size_t count, loff_t *ppos,
+	ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int))
+{
+	u8 *buf;
+	ssize_t ret = -ENODEV;
+
+	buf = kzalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, userbuf, count))
+		return -EFAULT;
+
+	rtnl_lock();
+	if (sdata->dev->reg_state == NETREG_REGISTERED)
+		ret = (*write)(sdata, buf, count);
+	rtnl_unlock();
+
+	return ret;
+}
+
 #define IEEE80211_IF_FMT(name, field, format_string)			\
 static ssize_t ieee80211_if_fmt_##name(					\
 	const struct ieee80211_sub_if_data *sdata, char *buf,		\
@@ -71,7 +95,7 @@ static ssize_t ieee80211_if_fmt_##name(					\
 	return scnprintf(buf, buflen, "%pM\n", sdata->field);		\
 }
 
-#define __IEEE80211_IF_FILE(name)					\
+#define __IEEE80211_IF_FILE(name, _write)				\
 static ssize_t ieee80211_if_read_##name(struct file *file,		\
 					char __user *userbuf,		\
 					size_t count, loff_t *ppos)	\
@@ -82,12 +106,24 @@ static ssize_t ieee80211_if_read_##name(struct file *file,		\
 }									\
 static const struct file_operations name##_ops = {			\
 	.read = ieee80211_if_read_##name,				\
+	.write = (_write),						\
 	.open = mac80211_open_file_generic,				\
 }
 
+#define __IEEE80211_IF_FILE_W(name)					\
+static ssize_t ieee80211_if_write_##name(struct file *file,		\
+					 const char __user *userbuf,	\
+					 size_t count, loff_t *ppos)	\
+{									\
+	return ieee80211_if_write(file->private_data, userbuf, count,	\
+				  ppos, ieee80211_if_parse_##name);	\
+}									\
+__IEEE80211_IF_FILE(name, ieee80211_if_write_##name)
+
+
 #define IEEE80211_IF_FILE(name, field, format)				\
 		IEEE80211_IF_FMT_##format(name, field)			\
-		__IEEE80211_IF_FILE(name)
+		__IEEE80211_IF_FILE(name, NULL)
 
 /* common attributes */
 IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
@@ -99,6 +135,70 @@ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
 IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
 IEEE80211_IF_FILE(capab, u.mgd.capab, HEX);
 
+static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
+			      enum ieee80211_smps_mode smps_mode)
+{
+	struct ieee80211_local *local = sdata->local;
+	int err;
+
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_STATIC_SMPS) &&
+	    smps_mode == IEEE80211_SMPS_STATIC)
+		return -EINVAL;
+
+	/* auto should be dynamic if in PS mode */
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) &&
+	    (smps_mode == IEEE80211_SMPS_DYNAMIC ||
+	     smps_mode == IEEE80211_SMPS_AUTOMATIC))
+		return -EINVAL;
+
+	/* supported only on managed interfaces for now */
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&local->iflist_mtx);
+	err = __ieee80211_request_smps(sdata, smps_mode);
+	mutex_unlock(&local->iflist_mtx);
+
+	return err;
+}
+
+static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = {
+	[IEEE80211_SMPS_AUTOMATIC] = "auto",
+	[IEEE80211_SMPS_OFF] = "off",
+	[IEEE80211_SMPS_STATIC] = "static",
+	[IEEE80211_SMPS_DYNAMIC] = "dynamic",
+};
+
+static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata,
+				     char *buf, int buflen)
+{
+	if (sdata->vif.type != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	return snprintf(buf, buflen, "request: %s\nused: %s\n",
+			smps_modes[sdata->u.mgd.req_smps],
+			smps_modes[sdata->u.mgd.ap_smps]);
+}
+
+static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata,
+				       const char *buf, int buflen)
+{
+	enum ieee80211_smps_mode mode;
+
+	for (mode = 0; mode < IEEE80211_SMPS_NUM_MODES; mode++) {
+		if (strncmp(buf, smps_modes[mode], buflen) == 0) {
+			int err = ieee80211_set_smps(sdata, mode);
+			if (!err)
+				return buflen;
+			return err;
+		}
+	}
+
+	return -EINVAL;
+}
+
+__IEEE80211_IF_FILE_W(smps);
+
 /* AP attributes */
 IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC);
 IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC);
@@ -109,7 +209,7 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast(
 	return scnprintf(buf, buflen, "%u\n",
 			 skb_queue_len(&sdata->u.ap.ps_bc_buf));
 }
-__IEEE80211_IF_FILE(num_buffered_multicast);
+__IEEE80211_IF_FILE(num_buffered_multicast, NULL);
 
 /* WDS attributes */
 IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC);
@@ -158,6 +258,10 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
 	debugfs_create_file(#name, 0400, sdata->debugfs.dir, \
 			    sdata, &name##_ops);
 
+#define DEBUGFS_ADD_MODE(name, mode) \
+	debugfs_create_file(#name, mode, sdata->debugfs.dir, \
+			    sdata, &name##_ops);
+
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, sta);
@@ -167,6 +271,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 	DEBUGFS_ADD(bssid, sta);
 	DEBUGFS_ADD(aid, sta);
 	DEBUGFS_ADD(capab, sta);
+	DEBUGFS_ADD_MODE(smps, 0600);
 }
 
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index ee2d19a25ce1..7a849b920165 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -140,6 +140,7 @@ TRACE_EVENT(drv_config,
 		__field(u8, short_frame_max_tx_count)
 		__field(int, center_freq)
 		__field(int, channel_type)
+		__field(int, smps)
 	),
 
 	TP_fast_assign(
@@ -155,6 +156,7 @@ TRACE_EVENT(drv_config,
 		__entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count;
 		__entry->center_freq = local->hw.conf.channel->center_freq;
 		__entry->channel_type = local->hw.conf.channel_type;
+		__entry->smps = local->hw.conf.smps_mode;
 	),
 
 	TP_printk(
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 45ebd062a2fb..63b8f86b7f16 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -166,3 +166,50 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 		spin_unlock_bh(&sta->lock);
 	}
 }
+
+int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
+			       enum ieee80211_smps_mode smps, const u8 *da,
+			       const u8 *bssid)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct sk_buff *skb;
+	struct ieee80211_mgmt *action_frame;
+
+	/* 27 = header + category + action + smps mode */
+	skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+	action_frame = (void *)skb_put(skb, 27);
+	memcpy(action_frame->da, da, ETH_ALEN);
+	memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN);
+	memcpy(action_frame->bssid, bssid, ETH_ALEN);
+	action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+						  IEEE80211_STYPE_ACTION);
+	action_frame->u.action.category = WLAN_CATEGORY_HT;
+	action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS;
+	switch (smps) {
+	case IEEE80211_SMPS_AUTOMATIC:
+	case IEEE80211_SMPS_NUM_MODES:
+		WARN_ON(1);
+	case IEEE80211_SMPS_OFF:
+		action_frame->u.action.u.ht_smps.smps_control =
+				WLAN_HT_SMPS_CONTROL_DISABLED;
+		break;
+	case IEEE80211_SMPS_STATIC:
+		action_frame->u.action.u.ht_smps.smps_control =
+				WLAN_HT_SMPS_CONTROL_STATIC;
+		break;
+	case IEEE80211_SMPS_DYNAMIC:
+		action_frame->u.action.u.ht_smps.smps_control =
+				WLAN_HT_SMPS_CONTROL_DYNAMIC;
+		break;
+	}
+
+	/* we'll do more on status of this frame */
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+	ieee80211_tx_skb(sdata, skb);
+
+	return 0;
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 178e329f9257..e63aecbddfbe 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -297,6 +297,8 @@ struct ieee80211_if_managed {
 
 	unsigned long timers_running; /* used for quiesce/restart */
 	bool powersave; /* powersave requested for this iface */
+	enum ieee80211_smps_mode req_smps, /* requested smps mode */
+				 ap_smps; /* smps mode AP thinks we're in */
 
 	unsigned long request;
 
@@ -587,6 +589,9 @@ struct ieee80211_local {
 	/* used for uploading changed mc list */
 	struct work_struct reconfig_filter;
 
+	/* used to reconfigure hardware SM PS */
+	struct work_struct recalc_smps;
+
 	/* aggregated multicast list */
 	struct dev_addr_list *mc_list;
 	int mc_count;
@@ -760,6 +765,8 @@ struct ieee80211_local {
 	int user_power_level; /* in dBm */
 	int power_constr_level; /* in dBm */
 
+	enum ieee80211_smps_mode smps_mode;
+
 	struct work_struct restart_work;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
@@ -978,6 +985,9 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 			  const u8 *da, u16 tid,
 			  u16 initiator, u16 reason_code);
+int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
+			       enum ieee80211_smps_mode smps, const u8 *da,
+			       const u8 *bssid);
 
 void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
 				u16 tid, u16 initiator, u16 reason);
@@ -1088,6 +1098,10 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
 			    struct ieee802_11_elems *elems,
 			    enum ieee80211_band band);
+int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
+			     enum ieee80211_smps_mode smps_mode);
+void ieee80211_recalc_smps(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *forsdata);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 98320a94c270..e1293e8ed83a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -113,6 +113,18 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 		changed |= IEEE80211_CONF_CHANGE_CHANNEL;
 	}
 
+	if (!conf_is_ht(&local->hw.conf)) {
+		/*
+		 * mac80211.h documents that this is only valid
+		 * when the channel is set to an HT type, and
+		 * that otherwise STATIC is used.
+		 */
+		local->hw.conf.smps_mode = IEEE80211_SMPS_STATIC;
+	} else if (local->hw.conf.smps_mode != local->smps_mode) {
+		local->hw.conf.smps_mode = local->smps_mode;
+		changed |= IEEE80211_CONF_CHANGE_SMPS;
+	}
+
 	if (scan_chan)
 		power = chan->max_power;
 	else
@@ -297,6 +309,16 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL(ieee80211_restart_hw);
 
+static void ieee80211_recalc_smps_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, recalc_smps);
+
+	mutex_lock(&local->iflist_mtx);
+	ieee80211_recalc_smps(local, NULL);
+	mutex_unlock(&local->iflist_mtx);
+}
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -370,6 +392,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	INIT_WORK(&local->restart_work, ieee80211_restart_work);
 
 	INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter);
+	INIT_WORK(&local->recalc_smps, ieee80211_recalc_smps_work);
+	local->smps_mode = IEEE80211_SMPS_OFF;
 
 	INIT_WORK(&local->dynamic_ps_enable_work,
 		  ieee80211_dynamic_ps_enable_work);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cd5dcc3d8c2b..0a762a9ba4df 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -398,6 +398,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		__le16 tmp;
 		u32 flags = local->hw.conf.channel->flags;
 
+		/* determine capability flags */
+
 		switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
 		case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
 			if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
@@ -413,17 +415,64 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 			break;
 		}
 
-		tmp = cpu_to_le16(cap);
-		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap)+2);
+		/* set SM PS mode properly */
+		cap &= ~IEEE80211_HT_CAP_SM_PS;
+		/* new association always uses requested smps mode */
+		if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
+			if (ifmgd->powersave)
+				ifmgd->ap_smps = IEEE80211_SMPS_DYNAMIC;
+			else
+				ifmgd->ap_smps = IEEE80211_SMPS_OFF;
+		} else
+			ifmgd->ap_smps = ifmgd->req_smps;
+
+		switch (ifmgd->ap_smps) {
+		case IEEE80211_SMPS_AUTOMATIC:
+		case IEEE80211_SMPS_NUM_MODES:
+			WARN_ON(1);
+		case IEEE80211_SMPS_OFF:
+			cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		case IEEE80211_SMPS_STATIC:
+			cap |= WLAN_HT_CAP_SM_PS_STATIC <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		case IEEE80211_SMPS_DYNAMIC:
+			cap |= WLAN_HT_CAP_SM_PS_DYNAMIC <<
+				IEEE80211_HT_CAP_SM_PS_SHIFT;
+			break;
+		}
+
+		/* reserve and fill IE */
+
+		pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
 		*pos++ = WLAN_EID_HT_CAPABILITY;
 		*pos++ = sizeof(struct ieee80211_ht_cap);
 		memset(pos, 0, sizeof(struct ieee80211_ht_cap));
+
+		/* capability flags */
+		tmp = cpu_to_le16(cap);
 		memcpy(pos, &tmp, sizeof(u16));
 		pos += sizeof(u16);
-		/* TODO: needs a define here for << 2 */
+
+		/* AMPDU parameters */
 		*pos++ = sband->ht_cap.ampdu_factor |
-			 (sband->ht_cap.ampdu_density << 2);
+			 (sband->ht_cap.ampdu_density <<
+				IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
+
+		/* MCS set */
 		memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
+		pos += sizeof(sband->ht_cap.mcs);
+
+		/* extended capabilities */
+		pos += sizeof(__le16);
+
+		/* BF capabilities */
+		pos += sizeof(__le32);
+
+		/* antenna selection */
+		pos += sizeof(u8);
 	}
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -932,6 +981,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	mutex_lock(&local->iflist_mtx);
 	ieee80211_recalc_ps(local, -1);
+	ieee80211_recalc_smps(local, sdata);
 	mutex_unlock(&local->iflist_mtx);
 
 	netif_start_queue(sdata->dev);
@@ -2327,6 +2377,11 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 		ifmgd->flags |= IEEE80211_STA_WMM_ENABLED;
 
 	mutex_init(&ifmgd->mtx);
+
+	if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS)
+		ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC;
+	else
+		ifmgd->req_smps = IEEE80211_SMPS_OFF;
 }
 
 /* scan finished notification */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index b4608f11a40f..0c0850d37dda 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -134,6 +134,40 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	dev_kfree_skb(skb);
 }
 
+static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
+{
+	struct ieee80211_mgmt *mgmt = (void *) skb->data;
+	struct ieee80211_local *local = sta->local;
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+
+	if (ieee80211_is_action(mgmt->frame_control) &&
+	    sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    mgmt->u.action.category == WLAN_CATEGORY_HT &&
+	    mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS) {
+		/*
+		 * This update looks racy, but isn't -- if we come
+		 * here we've definitely got a station that we're
+		 * talking to, and on a managed interface that can
+		 * only be the AP. And the only other place updating
+		 * this variable is before we're associated.
+		 */
+		switch (mgmt->u.action.u.ht_smps.smps_control) {
+		case WLAN_HT_SMPS_CONTROL_DYNAMIC:
+			sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_DYNAMIC;
+			break;
+		case WLAN_HT_SMPS_CONTROL_STATIC:
+			sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_STATIC;
+			break;
+		case WLAN_HT_SMPS_CONTROL_DISABLED:
+		default: /* shouldn't happen since we don't send that */
+			sta->sdata->u.mgd.ap_smps = IEEE80211_SMPS_OFF;
+			break;
+		}
+
+		ieee80211_queue_work(&local->hw, &local->recalc_smps);
+	}
+}
+
 void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct sk_buff *skb2;
@@ -210,6 +244,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		rate_control_tx_status(local, sband, sta, skb);
 		if (ieee80211_vif_is_mesh(&sta->sdata->vif))
 			ieee80211s_update_metric(local, sta, skb);
+
+		if (!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
+		    (info->flags & IEEE80211_TX_STAT_ACK))
+			ieee80211_frame_acked(sta, skb);
 	}
 
 	rcu_read_unlock();
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d54dbe8e09ba..086ef6257b4b 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1170,3 +1170,77 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	return 0;
 }
 
+static int check_mgd_smps(struct ieee80211_if_managed *ifmgd,
+			  enum ieee80211_smps_mode *smps_mode)
+{
+	if (ifmgd->associated) {
+		*smps_mode = ifmgd->ap_smps;
+
+		if (*smps_mode == IEEE80211_SMPS_AUTOMATIC) {
+			if (ifmgd->powersave)
+				*smps_mode = IEEE80211_SMPS_DYNAMIC;
+			else
+				*smps_mode = IEEE80211_SMPS_OFF;
+		}
+
+		return 1;
+	}
+
+	return 0;
+}
+
+/* must hold iflist_mtx */
+void ieee80211_recalc_smps(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *forsdata)
+{
+	struct ieee80211_sub_if_data *sdata;
+	enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF;
+	int count = 0;
+
+	if (forsdata)
+		WARN_ON(!mutex_is_locked(&forsdata->u.mgd.mtx));
+
+	WARN_ON(!mutex_is_locked(&local->iflist_mtx));
+
+	/*
+	 * This function could be improved to handle multiple
+	 * interfaces better, but right now it makes any
+	 * non-station interfaces force SM PS to be turned
+	 * off. If there are multiple station interfaces it
+	 * could also use the best possible mode, e.g. if
+	 * one is in static and the other in dynamic then
+	 * dynamic is ok.
+	 */
+
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (!netif_running(sdata->dev))
+			continue;
+		if (sdata->vif.type != NL80211_IFTYPE_STATION)
+			goto set;
+		if (sdata != forsdata) {
+			/*
+			 * This nested is ok -- we are holding the iflist_mtx
+			 * so can't get here twice or so. But it's required
+			 * since normally we acquire it first and then the
+			 * iflist_mtx.
+			 */
+			mutex_lock_nested(&sdata->u.mgd.mtx, SINGLE_DEPTH_NESTING);
+			count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
+			mutex_unlock(&sdata->u.mgd.mtx);
+		} else
+			count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
+
+		if (count > 1) {
+			smps_mode = IEEE80211_SMPS_OFF;
+			break;
+		}
+	}
+
+	if (smps_mode == local->smps_mode)
+		return;
+
+ set:
+	local->smps_mode = smps_mode;
+	/* changed flag is auto-detected for this */
+	ieee80211_hw_config(local, 0);
+}
-- 
cgit v1.2.3


From 9da3e068142ec7856b2f13261dcf0660fad32b61 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Mon, 7 Dec 2009 15:57:50 -0500
Subject: mac80211: only bother printing highest data rate on debugfs if its
 set

IEEE-802.11n spec says the RX highest data rate field does
not specify the highest supported RX data rate if its not set.
Ignore it if not set then. Refer to section 7.3.56.4

Cc: johannes@sipsolutions.net
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  5 ++++-
 net/mac80211/debugfs_sta.c | 12 ++++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e8d43d0ff2c3..098bedcde9bb 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -775,7 +775,10 @@ struct ieee80211_bar {
 /**
  * struct ieee80211_mcs_info - MCS information
  * @rx_mask: RX mask
- * @rx_highest: highest supported RX rate
+ * @rx_highest: highest supported RX rate. If set represents
+ *	the highest supported RX data rate in units of 1 Mbps.
+ *	If this field is 0 this value should not be used to
+ *	consider the highest RX data rate supported.
  * @tx_params: TX parameters
  */
 struct ieee80211_mcs_info {
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c833b6ce9902..0d4a759ba72c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -218,11 +218,19 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 		p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n",
 				htc->ampdu_factor, htc->ampdu_density);
 		p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:");
+
 		for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
 			p += scnprintf(p, sizeof(buf)+buf-p, " %.2x",
 					htc->mcs.rx_mask[i]);
-		p += scnprintf(p, sizeof(buf)+buf-p, "\nMCS rx highest: %d\n",
-				le16_to_cpu(htc->mcs.rx_highest));
+		p += scnprintf(p, sizeof(buf)+buf-p, "\n");
+
+		/* If not set this is meaningless */
+		if (le16_to_cpu(htc->mcs.rx_highest)) {
+			p += scnprintf(p, sizeof(buf)+buf-p,
+				       "MCS rx highest: %d Mbps\n",
+				       le16_to_cpu(htc->mcs.rx_highest));
+		}
+
 		p += scnprintf(p, sizeof(buf)+buf-p, "MCS tx params: %x\n",
 				htc->mcs.tx_params);
 	}
-- 
cgit v1.2.3


From 45465487897a1c6d508b14b904dc5777f7ec7e04 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:26 -0800
Subject: kfifo: move struct kfifo in place

This is a new generic kernel FIFO implementation.

The current kernel fifo API is not very widely used, because it has to
many constrains.  Only 17 files in the current 2.6.31-rc5 used it.
FIFO's are like list's a very basic thing and a kfifo API which handles
the most use case would save a lot of development time and memory
resources.

I think this are the reasons why kfifo is not in use:

 - The API is to simple, important functions are missing
 - A fifo can be only allocated dynamically
 - There is a requirement of a spinlock whether you need it or not
 - There is no support for data records inside a fifo

So I decided to extend the kfifo in a more generic way without blowing up
the API to much.  The new API has the following benefits:

 - Generic usage: For kernel internal use and/or device driver.
 - Provide an API for the most use case.
 - Slim API: The whole API provides 25 functions.
 - Linux style habit.
 - DECLARE_KFIFO, DEFINE_KFIFO and INIT_KFIFO Macros
 - Direct copy_to_user from the fifo and copy_from_user into the fifo.
 - The kfifo itself is an in place member of the using data structure, this save an
   indirection access and does not waste the kernel allocator.
 - Lockless access: if only one reader and one writer is active on the fifo,
   which is the common use case, no additional locking is necessary.
 - Remove spinlock - give the user the freedom of choice what kind of locking to use if
   one is required.
 - Ability to handle records. Three type of records are supported:
   - Variable length records between 0-255 bytes, with a record size
     field of 1 bytes.
   - Variable length records between 0-65535 bytes, with a record size
     field of 2 bytes.
   - Fixed size records, which no record size field.
 - Preserve memory resource.
 - Performance!
 - Easy to use!

This patch:

Since most users want to have the kfifo as part of another object,
reorganize the code to allow including struct kfifo in another data
structure.  This requires changing the kfifo_alloc and kfifo_init
prototypes so that we pass an existing kfifo pointer into them.  This
patch changes the implementation and all existing users.

[akpm@linux-foundation.org: fix warning]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 21 +++++-----
 drivers/char/sonypi.c                       | 40 +++++++++---------
 drivers/infiniband/hw/cxgb3/cxio_hal.h      |  9 ++--
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 60 +++++++++++++-------------
 drivers/media/video/meye.c                  | 48 ++++++++++-----------
 drivers/media/video/meye.h                  |  4 +-
 drivers/net/wireless/libertas/cmd.c         |  4 +-
 drivers/net/wireless/libertas/dev.h         |  4 +-
 drivers/net/wireless/libertas/main.c        | 16 ++++---
 drivers/platform/x86/fujitsu-laptop.c       | 18 ++++----
 drivers/platform/x86/sony-laptop.c          | 46 ++++++++++----------
 drivers/scsi/libiscsi.c                     | 22 ++++------
 drivers/scsi/libiscsi_tcp.c                 | 29 +++++++------
 drivers/scsi/libsrp.c                       | 13 +++---
 drivers/usb/host/fhci-sched.c               | 10 ++---
 drivers/usb/host/fhci-tds.c                 | 35 ++++++++--------
 drivers/usb/host/fhci.h                     | 10 ++---
 drivers/usb/serial/usb-serial.c             |  5 +--
 include/linux/kfifo.h                       | 11 ++---
 include/scsi/libiscsi.h                     |  3 +-
 include/scsi/libiscsi_tcp.h                 |  2 +-
 include/scsi/libsrp.h                       |  2 +-
 kernel/kfifo.c                              | 65 +++++++++++++++--------------
 net/dccp/probe.c                            | 20 ++++-----
 24 files changed, 238 insertions(+), 259 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index d3400b20444f..0f39bec28b45 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -358,7 +358,7 @@ struct port {
 	u8 update_flow_control;
 	struct ctrl_ul ctrl_ul;
 	struct ctrl_dl ctrl_dl;
-	struct kfifo *fifo_ul;
+	struct kfifo fifo_ul;
 	void __iomem *dl_addr[2];
 	u32 dl_size[2];
 	u8 toggle_dl;
@@ -685,8 +685,8 @@ static int nozomi_read_config_table(struct nozomi *dc)
 		dump_table(dc);
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
-			dc->port[i].fifo_ul =
-			    kfifo_alloc(FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
+			kfifo_alloc(&dc->port[i].fifo_ul,
+				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = __kfifo_get(port->fifo_ul, dc->send_buf,
+	size = __kfifo_get(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -988,11 +988,11 @@ static int receive_flow_control(struct nozomi *dc)
 
 	} else if (old_ctrl.CTS == 0 && ctrl_dl.CTS == 1) {
 
-		if (__kfifo_len(dc->port[port].fifo_ul)) {
+		if (__kfifo_len(&dc->port[port].fifo_ul)) {
 			DBG1("Enable interrupt (0x%04X) on port: %d",
 				enable_ier, port);
 			DBG1("Data in buffer [%d], enable transmit! ",
-				__kfifo_len(dc->port[port].fifo_ul));
+				__kfifo_len(&dc->port[port].fifo_ul));
 			enable_transmit_ul(port, dc);
 		} else {
 			DBG1("No data in buffer...");
@@ -1536,8 +1536,7 @@ static void __devexit nozomi_card_exit(struct pci_dev *pdev)
 	free_irq(pdev->irq, dc);
 
 	for (i = 0; i < MAX_PORT; i++)
-		if (dc->port[i].fifo_ul)
-			kfifo_free(dc->port[i].fifo_ul);
+		kfifo_free(&dc->port[i].fifo_ul);
 
 	kfree(dc->send_buf);
 
@@ -1673,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = __kfifo_put(port->fifo_ul, (unsigned char *)buffer, count);
+	rval = __kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
@@ -1721,7 +1720,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!port->port.count)
 		goto exit;
 
-	room = port->fifo_ul->size - __kfifo_len(port->fifo_ul);
+	room = port->fifo_ul.size - __kfifo_len(&port->fifo_ul);
 
 exit:
 	mutex_unlock(&port->tty_sem);
@@ -1878,7 +1877,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	rval = __kfifo_len(port->fifo_ul);
+	rval = __kfifo_len(&port->fifo_ul);
 
 exit_in_buffer:
 	return rval;
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 8c262aaf7c26..9e6efb1f029f 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -487,7 +487,7 @@ static struct sonypi_device {
 	int camera_power;
 	int bluetooth_power;
 	struct mutex lock;
-	struct kfifo *fifo;
+	struct kfifo fifo;
 	spinlock_t fifo_lock;
 	wait_queue_head_t fifo_proc_list;
 	struct fasync_struct *fifo_async;
@@ -496,7 +496,7 @@ static struct sonypi_device {
 	struct input_dev *input_jog_dev;
 	struct input_dev *input_key_dev;
 	struct work_struct input_work;
-	struct kfifo *input_fifo;
+	struct kfifo input_fifo;
 	spinlock_t input_fifo_lock;
 } sonypi_device;
 
@@ -777,7 +777,7 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get(sonypi_device.input_fifo, (unsigned char *)&kp,
+	while (kfifo_get(&sonypi_device.input_fifo, (unsigned char *)&kp,
 			 sizeof(kp)) == sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
@@ -827,7 +827,7 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put(sonypi_device.input_fifo,
+		kfifo_put(&sonypi_device.input_fifo,
 			  (unsigned char *)&kp, sizeof(kp));
 		schedule_work(&sonypi_device.input_work);
 	}
@@ -880,7 +880,7 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put(sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put(&sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
 
@@ -906,7 +906,7 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	mutex_lock(&sonypi_device.lock);
 	/* Flush input queue on first open */
 	if (!sonypi_device.open_count)
-		kfifo_reset(sonypi_device.fifo);
+		kfifo_reset(&sonypi_device.fifo);
 	sonypi_device.open_count++;
 	mutex_unlock(&sonypi_device.lock);
 	unlock_kernel();
@@ -919,17 +919,17 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	unsigned char c;
 
-	if ((kfifo_len(sonypi_device.fifo) == 0) &&
+	if ((kfifo_len(&sonypi_device.fifo) == 0) &&
 	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	ret = wait_event_interruptible(sonypi_device.fifo_proc_list,
-				       kfifo_len(sonypi_device.fifo) != 0);
+				       kfifo_len(&sonypi_device.fifo) != 0);
 	if (ret)
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get(&sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -946,7 +946,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 static unsigned int sonypi_misc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &sonypi_device.fifo_proc_list, wait);
-	if (kfifo_len(sonypi_device.fifo))
+	if (kfifo_len(&sonypi_device.fifo))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -1313,11 +1313,11 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL,
 					 &sonypi_device.fifo_lock);
-	if (IS_ERR(sonypi_device.fifo)) {
+	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-		return PTR_ERR(sonypi_device.fifo);
+		return error;
 	}
 
 	init_waitqueue_head(&sonypi_device.fifo_proc_list);
@@ -1393,12 +1393,10 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 		}
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
-		sonypi_device.input_fifo =
-			kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
-				    &sonypi_device.input_fifo_lock);
-		if (IS_ERR(sonypi_device.input_fifo)) {
+		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
+				GFP_KERNEL, &sonypi_device.input_fifo_lock);
+		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-			error = PTR_ERR(sonypi_device.input_fifo);
 			goto err_inpdev_unregister;
 		}
 
@@ -1423,7 +1421,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 		pci_disable_device(pcidev);
  err_put_pcidev:
 	pci_dev_put(pcidev);
-	kfifo_free(sonypi_device.fifo);
+	kfifo_free(&sonypi_device.fifo);
 
 	return error;
 }
@@ -1438,7 +1436,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 	if (useinput) {
 		input_unregister_device(sonypi_device.input_key_dev);
 		input_unregister_device(sonypi_device.input_jog_dev);
-		kfifo_free(sonypi_device.input_fifo);
+		kfifo_free(&sonypi_device.input_fifo);
 	}
 
 	misc_deregister(&sonypi_misc_device);
@@ -1451,7 +1449,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 		pci_dev_put(sonypi_device.dev);
 	}
 
-	kfifo_free(sonypi_device.fifo);
+	kfifo_free(&sonypi_device.fifo);
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index bfd03bf8be54..f3d440cc68f2 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -34,6 +34,7 @@
 
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/kfifo.h>
 
 #include "t3_cpl.h"
 #include "t3cdev.h"
@@ -75,13 +76,13 @@ struct cxio_hal_ctrl_qp {
 };
 
 struct cxio_hal_resource {
-	struct kfifo *tpt_fifo;
+	struct kfifo tpt_fifo;
 	spinlock_t tpt_fifo_lock;
-	struct kfifo *qpid_fifo;
+	struct kfifo qpid_fifo;
 	spinlock_t qpid_fifo_lock;
-	struct kfifo *cqid_fifo;
+	struct kfifo cqid_fifo;
 	spinlock_t cqid_fifo_lock;
-	struct kfifo *pdid_fifo;
+	struct kfifo pdid_fifo;
 	spinlock_t pdid_fifo_lock;
 };
 
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index bd233c087653..65072bdfc1bf 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -39,12 +39,12 @@
 #include "cxio_resource.h"
 #include "cxio_hal.h"
 
-static struct kfifo *rhdl_fifo;
+static struct kfifo rhdl_fifo;
 static spinlock_t rhdl_fifo_lock;
 
 #define RANDOM_SIZE 16
 
-static int __cxio_init_resource_fifo(struct kfifo **fifo,
+static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				   spinlock_t *fifo_lock,
 				   u32 nr, u32 skip_low,
 				   u32 skip_high,
@@ -55,12 +55,11 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
 
-	*fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
-	if (IS_ERR(*fifo))
+	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL, fifo_lock))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		__kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
+		__kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -72,33 +71,33 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			__kfifo_put(*fifo,
+			__kfifo_put(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			__kfifo_put(*fifo,
+			__kfifo_put(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			__kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
+			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32));
 	return 0;
 }
 
-static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
+static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
 				   u32 nr, u32 skip_low, u32 skip_high)
 {
 	return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
 					  skip_high, 0));
 }
 
-static int cxio_init_resource_fifo_random(struct kfifo **fifo,
+static int cxio_init_resource_fifo_random(struct kfifo *fifo,
 				   spinlock_t * fifo_lock,
 				   u32 nr, u32 skip_low, u32 skip_high)
 {
@@ -113,15 +112,14 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
-	rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
+	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
 					      GFP_KERNEL,
-					      &rdev_p->rscp->qpid_fifo_lock);
-	if (IS_ERR(rdev_p->rscp->qpid_fifo))
+					      &rdev_p->rscp->qpid_fifo_lock))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			__kfifo_put(rdev_p->rscp->qpid_fifo,
+			__kfifo_put(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
@@ -134,7 +132,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
 
 void cxio_hal_destroy_rhdl_resource(void)
 {
-	kfifo_free(rhdl_fifo);
+	kfifo_free(&rhdl_fifo);
 }
 
 /* nr_* must be power of 2 */
@@ -167,11 +165,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
 		goto pdid_err;
 	return 0;
 pdid_err:
-	kfifo_free(rscp->cqid_fifo);
+	kfifo_free(&rscp->cqid_fifo);
 cqid_err:
-	kfifo_free(rscp->qpid_fifo);
+	kfifo_free(&rscp->qpid_fifo);
 qpid_err:
-	kfifo_free(rscp->tpt_fifo);
+	kfifo_free(&rscp->tpt_fifo);
 tpt_err:
 	return -ENOMEM;
 }
@@ -195,17 +193,17 @@ static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
 
 u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->tpt_fifo);
+	return cxio_hal_get_resource(&rscp->tpt_fifo);
 }
 
 void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
 {
-	cxio_hal_put_resource(rscp->tpt_fifo, stag);
+	cxio_hal_put_resource(&rscp->tpt_fifo, stag);
 }
 
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
-	u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
+	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo);
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
@@ -213,35 +211,35 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(rscp->qpid_fifo, qpid);
+	cxio_hal_put_resource(&rscp->qpid_fifo, qpid);
 }
 
 u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->cqid_fifo);
+	return cxio_hal_get_resource(&rscp->cqid_fifo);
 }
 
 void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
 {
-	cxio_hal_put_resource(rscp->cqid_fifo, cqid);
+	cxio_hal_put_resource(&rscp->cqid_fifo, cqid);
 }
 
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(rscp->pdid_fifo);
+	return cxio_hal_get_resource(&rscp->pdid_fifo);
 }
 
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
 {
-	cxio_hal_put_resource(rscp->pdid_fifo, pdid);
+	cxio_hal_put_resource(&rscp->pdid_fifo, pdid);
 }
 
 void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
 {
-	kfifo_free(rscp->tpt_fifo);
-	kfifo_free(rscp->cqid_fifo);
-	kfifo_free(rscp->qpid_fifo);
-	kfifo_free(rscp->pdid_fifo);
+	kfifo_free(&rscp->tpt_fifo);
+	kfifo_free(&rscp->cqid_fifo);
+	kfifo_free(&rscp->qpid_fifo);
+	kfifo_free(&rscp->pdid_fifo);
 	kfree(rscp);
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 6ffa64cd1c6d..dacbbb839b9e 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,7 +800,7 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get(meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int)) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
@@ -811,7 +811,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
 		wake_up_interruptible(&meye.proc_list);
 	} else {
 		int size;
@@ -820,7 +820,7 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get(meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int)) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
@@ -831,7 +831,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
 		wake_up_interruptible(&meye.proc_list);
 	}
 	mchip_free_frame();
@@ -859,8 +859,8 @@ static int meye_open(struct file *file)
 
 	for (i = 0; i < MEYE_MAX_BUFNBRS; i++)
 		meye.grab_buffer[i].state = MEYE_BUF_UNUSED;
-	kfifo_reset(meye.grabq);
-	kfifo_reset(meye.doneq);
+	kfifo_reset(&meye.grabq);
+	kfifo_reset(&meye.doneq);
 	return 0;
 }
 
@@ -933,7 +933,7 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put(meye.grabq, (unsigned char *)nb, sizeof(int));
+	kfifo_put(&meye.grabq, (unsigned char *)nb, sizeof(int));
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -965,7 +965,7 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get(meye.doneq, (unsigned char *)&unused, sizeof(int));
+		kfifo_get(&meye.doneq, (unsigned char *)&unused, sizeof(int));
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
@@ -1452,7 +1452,7 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put(meye.grabq, (unsigned char *)&buf->index, sizeof(int));
+	kfifo_put(&meye.grabq, (unsigned char *)&buf->index, sizeof(int));
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -1467,18 +1467,18 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 
 	mutex_lock(&meye.lock);
 
-	if (kfifo_len(meye.doneq) == 0 && file->f_flags & O_NONBLOCK) {
+	if (kfifo_len(&meye.doneq) == 0 && file->f_flags & O_NONBLOCK) {
 		mutex_unlock(&meye.lock);
 		return -EAGAIN;
 	}
 
 	if (wait_event_interruptible(meye.proc_list,
-				     kfifo_len(meye.doneq) != 0) < 0) {
+				     kfifo_len(&meye.doneq) != 0) < 0) {
 		mutex_unlock(&meye.lock);
 		return -EINTR;
 	}
 
-	if (!kfifo_get(meye.doneq, (unsigned char *)&reqnr,
+	if (!kfifo_get(&meye.doneq, (unsigned char *)&reqnr,
 		       sizeof(int))) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
@@ -1529,8 +1529,8 @@ static int vidioc_streamoff(struct file *file, void *fh, enum v4l2_buf_type i)
 {
 	mutex_lock(&meye.lock);
 	mchip_hic_stop();
-	kfifo_reset(meye.grabq);
-	kfifo_reset(meye.doneq);
+	kfifo_reset(&meye.grabq);
+	kfifo_reset(&meye.doneq);
 
 	for (i = 0; i < MEYE_MAX_BUFNBRS; i++)
 		meye.grab_buffer[i].state = MEYE_BUF_UNUSED;
@@ -1572,7 +1572,7 @@ static unsigned int meye_poll(struct file *file, poll_table *wait)
 
 	mutex_lock(&meye.lock);
 	poll_wait(file, &meye.proc_list, wait);
-	if (kfifo_len(meye.doneq))
+	if (kfifo_len(&meye.doneq))
 		res = POLLIN | POLLRDNORM;
 	mutex_unlock(&meye.lock);
 	return res;
@@ -1745,16 +1745,14 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	meye.grabq = kfifo_alloc(sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.grabq_lock);
-	if (IS_ERR(meye.grabq)) {
+	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
+				 &meye.grabq_lock)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	meye.doneq = kfifo_alloc(sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.doneq_lock);
-	if (IS_ERR(meye.doneq)) {
+	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
+				 &meye.doneq_lock)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
@@ -1868,9 +1866,9 @@ outregions:
 outenabledev:
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 0);
 outsonypienable:
-	kfifo_free(meye.doneq);
+	kfifo_free(&meye.doneq);
 outkfifoalloc2:
-	kfifo_free(meye.grabq);
+	kfifo_free(&meye.grabq);
 outkfifoalloc1:
 	vfree(meye.grab_temp);
 outvmalloc:
@@ -1901,8 +1899,8 @@ static void __devexit meye_remove(struct pci_dev *pcidev)
 
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 0);
 
-	kfifo_free(meye.doneq);
-	kfifo_free(meye.grabq);
+	kfifo_free(&meye.doneq);
+	kfifo_free(&meye.grabq);
 
 	vfree(meye.grab_temp);
 
diff --git a/drivers/media/video/meye.h b/drivers/media/video/meye.h
index 5f70a106ba2b..1321ad5d6597 100644
--- a/drivers/media/video/meye.h
+++ b/drivers/media/video/meye.h
@@ -303,9 +303,9 @@ struct meye {
 	struct meye_grab_buffer grab_buffer[MEYE_MAX_BUFNBRS];
 	int vma_use_count[MEYE_MAX_BUFNBRS]; /* mmap count */
 	struct mutex lock;		/* mutex for open/mmap... */
-	struct kfifo *grabq;		/* queue for buffers to be grabbed */
+	struct kfifo grabq;		/* queue for buffers to be grabbed */
 	spinlock_t grabq_lock;		/* lock protecting the queue */
-	struct kfifo *doneq;		/* queue for grabbed buffers */
+	struct kfifo doneq;		/* queue for grabbed buffers */
 	spinlock_t doneq_lock;		/* lock protecting the queue */
 	wait_queue_head_t proc_list;	/* wait queue */
 	struct video_device *video_dev;	/* video device parameters */
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index b9b371bfa30f..ffed17f4f506 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1365,7 +1365,7 @@ static void lbs_send_confirmsleep(struct lbs_private *priv)
 	priv->dnld_sent = DNLD_RES_RECEIVED;
 
 	/* If nothing to do, go back to sleep (?) */
-	if (!__kfifo_len(priv->event_fifo) && !priv->resp_len[priv->resp_idx])
+	if (!__kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
 		priv->psstate = PS_STATE_SLEEP;
 
 	spin_unlock_irqrestore(&priv->driver_lock, flags);
@@ -1439,7 +1439,7 @@ void lbs_ps_confirm_sleep(struct lbs_private *priv)
 	}
 
 	/* Pending events or command responses? */
-	if (__kfifo_len(priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
+	if (__kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
 		allowed = 0;
 		lbs_deb_host("pending events or command responses\n");
 	}
diff --git a/drivers/net/wireless/libertas/dev.h b/drivers/net/wireless/libertas/dev.h
index 6a8d2b291d8c..05bb298dfae9 100644
--- a/drivers/net/wireless/libertas/dev.h
+++ b/drivers/net/wireless/libertas/dev.h
@@ -10,7 +10,7 @@
 #include "scan.h"
 #include "assoc.h"
 
-
+#include <linux/kfifo.h>
 
 /** sleep_params */
 struct sleep_params {
@@ -120,7 +120,7 @@ struct lbs_private {
 	u32 resp_len[2];
 
 	/* Events sent from hardware to driver */
-	struct kfifo *event_fifo;
+	struct kfifo event_fifo;
 
 	/** thread to service interrupts */
 	struct task_struct *main_thread;
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index db38a5a719fa..403909287414 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -459,7 +459,7 @@ static int lbs_thread(void *data)
 		else if (!list_empty(&priv->cmdpendingq) &&
 					!(priv->wakeup_dev_required))
 			shouldsleep = 0;	/* We have a command to send */
-		else if (__kfifo_len(priv->event_fifo))
+		else if (__kfifo_len(&priv->event_fifo))
 			shouldsleep = 0;	/* We have an event to process */
 		else
 			shouldsleep = 1;	/* No command */
@@ -511,9 +511,9 @@ static int lbs_thread(void *data)
 
 		/* Process hardware events, e.g. card removed, link lost */
 		spin_lock_irq(&priv->driver_lock);
-		while (__kfifo_len(priv->event_fifo)) {
+		while (__kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			__kfifo_get(priv->event_fifo, (unsigned char *) &event,
+			__kfifo_get(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -883,10 +883,9 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	priv->event_fifo = kfifo_alloc(sizeof(u32) * 16, GFP_KERNEL, NULL);
-	if (IS_ERR(priv->event_fifo)) {
+	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL, NULL);
+	if (ret) {
 		lbs_pr_err("Out of memory allocating event FIFO buffer\n");
-		ret = -ENOMEM;
 		goto out;
 	}
 
@@ -901,8 +900,7 @@ static void lbs_free_adapter(struct lbs_private *priv)
 	lbs_deb_enter(LBS_DEB_MAIN);
 
 	lbs_free_cmd_buffer(priv);
-	if (priv->event_fifo)
-		kfifo_free(priv->event_fifo);
+	kfifo_free(&priv->event_fifo);
 	del_timer(&priv->command_timer);
 	del_timer(&priv->auto_deepsleep_timer);
 	kfree(priv->networks);
@@ -1177,7 +1175,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	__kfifo_put(priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	__kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index bcd4ba8be7db..f999fba0e25e 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -164,7 +164,7 @@ struct fujitsu_hotkey_t {
 	struct input_dev *input;
 	char phys[32];
 	struct platform_device *pf_device;
-	struct kfifo *fifo;
+	struct kfifo fifo;
 	spinlock_t fifo_lock;
 	int rfkill_supported;
 	int rfkill_state;
@@ -824,12 +824,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
-	fujitsu_hotkey->fifo =
-	    kfifo_alloc(RINGBUFFERSIZE * sizeof(int), GFP_KERNEL,
-			&fujitsu_hotkey->fifo_lock);
-	if (IS_ERR(fujitsu_hotkey->fifo)) {
+	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+			GFP_KERNEL, &fujitsu_hotkey->fifo_lock);
+	if (error) {
 		printk(KERN_ERR "kfifo_alloc failed\n");
-		error = PTR_ERR(fujitsu_hotkey->fifo);
 		goto err_stop;
 	}
 
@@ -934,7 +932,7 @@ err_unregister_input_dev:
 err_free_input_dev:
 	input_free_device(input);
 err_free_fifo:
-	kfifo_free(fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_hotkey->fifo);
 err_stop:
 	return result;
 }
@@ -956,7 +954,7 @@ static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type)
 
 	input_free_device(input);
 
-	kfifo_free(fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_hotkey->fifo);
 
 	fujitsu_hotkey->acpi_handle = NULL;
 
@@ -1008,7 +1006,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put(fujitsu_hotkey->fifo,
+				status = kfifo_put(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
 						   sizeof(keycode));
 				if (status != sizeof(keycode)) {
@@ -1022,7 +1020,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 			} else if (keycode == 0) {
 				while ((status =
 					kfifo_get
-					(fujitsu_hotkey->fifo, (unsigned char *)
+					(&fujitsu_hotkey->fifo, (unsigned char *)
 					 &keycode_r,
 					 sizeof
 					 (keycode_r))) == sizeof(keycode_r)) {
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 7a2cc8a5c975..04625a048e74 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -142,7 +142,7 @@ struct sony_laptop_input_s {
 	atomic_t		users;
 	struct input_dev	*jog_dev;
 	struct input_dev	*key_dev;
-	struct kfifo		*fifo;
+	struct kfifo		fifo;
 	spinlock_t		fifo_lock;
 	struct workqueue_struct	*wq;
 };
@@ -300,7 +300,7 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get(sony_laptop_input.fifo, (unsigned char *)&kp,
+	while (kfifo_get(&sony_laptop_input.fifo, (unsigned char *)&kp,
 			 sizeof(kp)) == sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
@@ -362,7 +362,7 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put(sony_laptop_input.fifo,
+		kfifo_put(&sony_laptop_input.fifo,
 			  (unsigned char *)&kp, sizeof(kp));
 
 		if (!work_pending(&sony_laptop_release_key_work))
@@ -385,12 +385,11 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
-	sony_laptop_input.fifo =
-		kfifo_alloc(SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
+	error =
+	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
 			    &sony_laptop_input.fifo_lock);
-	if (IS_ERR(sony_laptop_input.fifo)) {
+	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
-		error = PTR_ERR(sony_laptop_input.fifo);
 		goto err_dec_users;
 	}
 
@@ -474,7 +473,7 @@ err_destroy_wq:
 	destroy_workqueue(sony_laptop_input.wq);
 
 err_free_kfifo:
-	kfifo_free(sony_laptop_input.fifo);
+	kfifo_free(&sony_laptop_input.fifo);
 
 err_dec_users:
 	atomic_dec(&sony_laptop_input.users);
@@ -500,7 +499,7 @@ static void sony_laptop_remove_input(void)
 	}
 
 	destroy_workqueue(sony_laptop_input.wq);
-	kfifo_free(sony_laptop_input.fifo);
+	kfifo_free(&sony_laptop_input.fifo);
 }
 
 /*********** Platform Device ***********/
@@ -2079,7 +2078,7 @@ static struct attribute_group spic_attribute_group = {
 
 struct sonypi_compat_s {
 	struct fasync_struct	*fifo_async;
-	struct kfifo		*fifo;
+	struct kfifo		fifo;
 	spinlock_t		fifo_lock;
 	wait_queue_head_t	fifo_proc_list;
 	atomic_t		open_count;
@@ -2104,12 +2103,12 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	/* Flush input queue on first open */
 	unsigned long flags;
 
-	spin_lock_irqsave(sonypi_compat.fifo->lock, flags);
+	spin_lock_irqsave(&sonypi_compat.fifo_lock, flags);
 
 	if (atomic_inc_return(&sonypi_compat.open_count) == 1)
-		__kfifo_reset(sonypi_compat.fifo);
+		__kfifo_reset(&sonypi_compat.fifo);
 
-	spin_unlock_irqrestore(sonypi_compat.fifo->lock, flags);
+	spin_unlock_irqrestore(&sonypi_compat.fifo_lock, flags);
 
 	return 0;
 }
@@ -2120,17 +2119,17 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	unsigned char c;
 
-	if ((kfifo_len(sonypi_compat.fifo) == 0) &&
+	if ((kfifo_len(&sonypi_compat.fifo) == 0) &&
 	    (file->f_flags & O_NONBLOCK))
 		return -EAGAIN;
 
 	ret = wait_event_interruptible(sonypi_compat.fifo_proc_list,
-				       kfifo_len(sonypi_compat.fifo) != 0);
+				       kfifo_len(&sonypi_compat.fifo) != 0);
 	if (ret)
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get(&sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -2147,7 +2146,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 static unsigned int sonypi_misc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &sonypi_compat.fifo_proc_list, wait);
-	if (kfifo_len(sonypi_compat.fifo))
+	if (kfifo_len(&sonypi_compat.fifo))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
@@ -2309,7 +2308,7 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put(sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put(&sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
 }
@@ -2319,11 +2318,12 @@ static int sonypi_compat_init(void)
 	int error;
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
-	sonypi_compat.fifo = kfifo_alloc(SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
+	error =
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
 					 &sonypi_compat.fifo_lock);
-	if (IS_ERR(sonypi_compat.fifo)) {
+	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
-		return PTR_ERR(sonypi_compat.fifo);
+		return error;
 	}
 
 	init_waitqueue_head(&sonypi_compat.fifo_proc_list);
@@ -2342,14 +2342,14 @@ static int sonypi_compat_init(void)
 	return 0;
 
 err_free_kfifo:
-	kfifo_free(sonypi_compat.fifo);
+	kfifo_free(&sonypi_compat.fifo);
 	return error;
 }
 
 static void sonypi_compat_exit(void)
 {
 	misc_deregister(&sonypi_misc_device);
-	kfifo_free(sonypi_compat.fifo);
+	kfifo_free(&sonypi_compat.fifo);
 }
 #else
 static int sonypi_compat_init(void) { return 0; }
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index b7689f3d05f5..cf0aa7e90be9 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	__kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*));
+	__kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!__kfifo_get(session->cmdpool.queue,
+		if (!__kfifo_get(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!__kfifo_get(conn->session->cmdpool.queue,
+	if (!__kfifo_get(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2461,12 +2461,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 	if (q->pool == NULL)
 		return -ENOMEM;
 
-	q->queue = kfifo_init((void*)q->pool, max * sizeof(void*),
-			      GFP_KERNEL, NULL);
-	if (IS_ERR(q->queue)) {
-		q->queue = NULL;
-		goto enomem;
-	}
+	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*), NULL);
 
 	for (i = 0; i < max; i++) {
 		q->pool[i] = kzalloc(item_size, GFP_KERNEL);
@@ -2474,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		__kfifo_put(q->queue, (void*)&q->pool[i], sizeof(void*));
+		__kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2497,7 +2492,6 @@ void iscsi_pool_free(struct iscsi_pool *q)
 	for (i = 0; i < q->max; i++)
 		kfree(q->pool[i]);
 	kfree(q->pool);
-	kfree(q->queue);
 }
 EXPORT_SYMBOL_GPL(iscsi_pool_free);
 
@@ -2825,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!__kfifo_get(session->cmdpool.queue,
+	if (!__kfifo_get(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2845,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	__kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2908,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	__kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index ca25ee5190b0..a83ee56a185e 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (__kfifo_get(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+	while (__kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = __kfifo_get(tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = __kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		__kfifo_put(tcp_task->r2tpool.queue, (void*)&r2t,
+		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	__kfifo_put(tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	__kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -951,7 +951,7 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(__kfifo_len(tcp_task->r2tqueue));
+	BUG_ON(__kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				__kfifo_put(tcp_task->r2tpool.queue,
+				__kfifo_put(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			__kfifo_get(tcp_task->r2tqueue,
+			__kfifo_get(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
@@ -1127,9 +1127,8 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 		}
 
 		/* R2T xmit queue */
-		tcp_task->r2tqueue = kfifo_alloc(
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
-		if (tcp_task->r2tqueue == ERR_PTR(-ENOMEM)) {
+		if (kfifo_alloc(&tcp_task->r2tqueue,
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
@@ -1142,7 +1141,7 @@ r2t_alloc_fail:
 		struct iscsi_task *task = session->cmds[i];
 		struct iscsi_tcp_task *tcp_task = task->dd_data;
 
-		kfifo_free(tcp_task->r2tqueue);
+		kfifo_free(&tcp_task->r2tqueue);
 		iscsi_pool_free(&tcp_task->r2tpool);
 	}
 	return -ENOMEM;
@@ -1157,7 +1156,7 @@ void iscsi_tcp_r2tpool_free(struct iscsi_session *session)
 		struct iscsi_task *task = session->cmds[i];
 		struct iscsi_tcp_task *tcp_task = task->dd_data;
 
-		kfifo_free(tcp_task->r2tqueue);
+		kfifo_free(&tcp_task->r2tqueue);
 		iscsi_pool_free(&tcp_task->r2tpool);
 	}
 }
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 9ad38e81e343..b1b5e51ca8e3 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -58,19 +58,16 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 		goto free_pool;
 
 	spin_lock_init(&q->lock);
-	q->queue = kfifo_init((void *) q->pool, max * sizeof(void *),
-			      GFP_KERNEL, &q->lock);
-	if (IS_ERR(q->queue))
-		goto free_item;
+	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *),
+			      &q->lock);
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		__kfifo_put(q->queue, (void *) &iue, sizeof(void *));
+		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
 	return 0;
 
-free_item:
 	kfree(q->items);
 free_pool:
 	kfree(q->pool);
@@ -167,7 +164,7 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get(target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_get(&target->iu_queue.queue, (void *) &iue, sizeof(void *));
 	if (!iue)
 		return iue;
 	iue->target = target;
@@ -179,7 +176,7 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put(iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_put(&iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
 
diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c
index 00a29855d0c4..ff43747a614f 100644
--- a/drivers/usb/host/fhci-sched.c
+++ b/drivers/usb/host/fhci-sched.c
@@ -37,7 +37,7 @@ static void recycle_frame(struct fhci_usb *usb, struct packet *pkt)
 	pkt->info = 0;
 	pkt->priv_data = NULL;
 
-	cq_put(usb->ep0->empty_frame_Q, pkt);
+	cq_put(&usb->ep0->empty_frame_Q, pkt);
 }
 
 /* confirm submitted packet */
@@ -57,7 +57,7 @@ void fhci_transaction_confirm(struct fhci_usb *usb, struct packet *pkt)
 		if ((td->data + td->actual_len) && trans_len)
 			memcpy(td->data + td->actual_len, pkt->data,
 			       trans_len);
-		cq_put(usb->ep0->dummy_packets_Q, pkt->data);
+		cq_put(&usb->ep0->dummy_packets_Q, pkt->data);
 	}
 
 	recycle_frame(usb, pkt);
@@ -213,7 +213,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 	}
 
 	/* update frame object fields before transmitting */
-	pkt = cq_get(usb->ep0->empty_frame_Q);
+	pkt = cq_get(&usb->ep0->empty_frame_Q);
 	if (!pkt) {
 		fhci_dbg(usb->fhci, "there is no empty frame\n");
 		return -1;
@@ -222,7 +222,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 
 	pkt->info = 0;
 	if (data == NULL) {
-		data = cq_get(usb->ep0->dummy_packets_Q);
+		data = cq_get(&usb->ep0->dummy_packets_Q);
 		BUG_ON(!data);
 		pkt->info = PKT_DUMMY_PACKET;
 	}
@@ -246,7 +246,7 @@ static int add_packet(struct fhci_usb *usb, struct ed *ed, struct td *td)
 		list_del_init(&td->frame_lh);
 		td->status = USB_TD_OK;
 		if (pkt->info & PKT_DUMMY_PACKET)
-			cq_put(usb->ep0->dummy_packets_Q, pkt->data);
+			cq_put(&usb->ep0->dummy_packets_Q, pkt->data);
 		recycle_frame(usb, pkt);
 		usb->actual_frame->total_bytes -= (len + PROTOCOL_OVERHEAD);
 		fhci_err(usb->fhci, "host transaction failed\n");
diff --git a/drivers/usb/host/fhci-tds.c b/drivers/usb/host/fhci-tds.c
index b40332290319..d224ab467a40 100644
--- a/drivers/usb/host/fhci-tds.c
+++ b/drivers/usb/host/fhci-tds.c
@@ -106,33 +106,33 @@ void fhci_ep0_free(struct fhci_usb *usb)
 			cpm_muram_free(cpm_muram_offset(ep->td_base));
 
 		if (ep->conf_frame_Q) {
-			size = cq_howmany(ep->conf_frame_Q);
+			size = cq_howmany(&ep->conf_frame_Q);
 			for (; size; size--) {
-				struct packet *pkt = cq_get(ep->conf_frame_Q);
+				struct packet *pkt = cq_get(&ep->conf_frame_Q);
 
 				kfree(pkt);
 			}
-			cq_delete(ep->conf_frame_Q);
+			cq_delete(&ep->conf_frame_Q);
 		}
 
 		if (ep->empty_frame_Q) {
-			size = cq_howmany(ep->empty_frame_Q);
+			size = cq_howmany(&ep->empty_frame_Q);
 			for (; size; size--) {
-				struct packet *pkt = cq_get(ep->empty_frame_Q);
+				struct packet *pkt = cq_get(&ep->empty_frame_Q);
 
 				kfree(pkt);
 			}
-			cq_delete(ep->empty_frame_Q);
+			cq_delete(&ep->empty_frame_Q);
 		}
 
 		if (ep->dummy_packets_Q) {
-			size = cq_howmany(ep->dummy_packets_Q);
+			size = cq_howmany(&ep->dummy_packets_Q);
 			for (; size; size--) {
-				u8 *buff = cq_get(ep->dummy_packets_Q);
+				u8 *buff = cq_get(&ep->dummy_packets_Q);
 
 				kfree(buff);
 			}
-			cq_delete(ep->dummy_packets_Q);
+			cq_delete(&ep->dummy_packets_Q);
 		}
 
 		kfree(ep);
@@ -175,10 +175,9 @@ u32 fhci_create_ep(struct fhci_usb *usb, enum fhci_mem_alloc data_mem,
 	ep->td_base = cpm_muram_addr(ep_offset);
 
 	/* zero all queue pointers */
-	ep->conf_frame_Q = cq_new(ring_len + 2);
-	ep->empty_frame_Q = cq_new(ring_len + 2);
-	ep->dummy_packets_Q = cq_new(ring_len + 2);
-	if (!ep->conf_frame_Q || !ep->empty_frame_Q || !ep->dummy_packets_Q) {
+	if (cq_new(&ep->conf_frame_Q, ring_len + 2) ||
+	    cq_new(&ep->empty_frame_Q, ring_len + 2) ||
+	    cq_new(&ep->dummy_packets_Q, ring_len + 2)) {
 		err_for = "frame_queues";
 		goto err;
 	}
@@ -199,8 +198,8 @@ u32 fhci_create_ep(struct fhci_usb *usb, enum fhci_mem_alloc data_mem,
 			err_for = "buffer";
 			goto err;
 		}
-		cq_put(ep->empty_frame_Q, pkt);
-		cq_put(ep->dummy_packets_Q, buff);
+		cq_put(&ep->empty_frame_Q, pkt);
+		cq_put(&ep->dummy_packets_Q, buff);
 	}
 
 	/* we put the endpoint parameter RAM right behind the TD ring */
@@ -319,7 +318,7 @@ static void fhci_td_transaction_confirm(struct fhci_usb *usb)
 		if ((buf == DUMMY2_BD_BUFFER) && !(td_status & ~TD_W))
 			continue;
 
-		pkt = cq_get(ep->conf_frame_Q);
+		pkt = cq_get(&ep->conf_frame_Q);
 		if (!pkt)
 			fhci_err(usb->fhci, "no frame to confirm\n");
 
@@ -460,9 +459,9 @@ u32 fhci_host_transaction(struct fhci_usb *usb,
 		out_be16(&td->length, pkt->len);
 
 	/* put the frame to the confirmation queue */
-	cq_put(ep->conf_frame_Q, pkt);
+	cq_put(&ep->conf_frame_Q, pkt);
 
-	if (cq_howmany(ep->conf_frame_Q) == 1)
+	if (cq_howmany(&ep->conf_frame_Q) == 1)
 		out_8(&usb->fhci->regs->usb_comm, USB_CMD_STR_FIFO);
 
 	return 0;
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 7116284ed21a..2277428ef5d3 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -423,9 +423,9 @@ struct endpoint {
 	struct usb_td __iomem *td_base; /* first TD in the ring */
 	struct usb_td __iomem *conf_td; /* next TD for confirm after transac */
 	struct usb_td __iomem *empty_td;/* next TD for new transaction req. */
-	struct kfifo *empty_frame_Q;  /* Empty frames list to use */
-	struct kfifo *conf_frame_Q;   /* frames passed to TDs,waiting for tx */
-	struct kfifo *dummy_packets_Q;/* dummy packets for the CRC overun */
+	struct kfifo empty_frame_Q;  /* Empty frames list to use */
+	struct kfifo conf_frame_Q;   /* frames passed to TDs,waiting for tx */
+	struct kfifo dummy_packets_Q;/* dummy packets for the CRC overun */
 
 	bool already_pushed_dummy_bd;
 };
@@ -493,9 +493,9 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 }
 
 /* fifo of pointers */
-static inline struct kfifo *cq_new(int size)
+static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(size * sizeof(void *), GFP_KERNEL, NULL);
+	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL, NULL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 4543f359be75..44b72d47fac2 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -939,9 +939,8 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		port->write_fifo = kfifo_alloc(PAGE_SIZE, GFP_KERNEL,
-			&port->lock);
-		if (IS_ERR(port->write_fifo))
+		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL,
+			&port->lock))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index ad6bdf5a5970..c3f8d82efd34 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -1,6 +1,7 @@
 /*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
  *
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
  * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -32,10 +33,10 @@ struct kfifo {
 	spinlock_t *lock;	/* protects concurrent modifications */
 };
 
-extern struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-				gfp_t gfp_mask, spinlock_t *lock);
-extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
-				 spinlock_t *lock);
+extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+			unsigned int size, spinlock_t *lock);
+extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
+			gfp_t gfp_mask, spinlock_t *lock);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 7394e3bc8f4b..ff92b46f5153 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -28,6 +28,7 @@
 #include <linux/mutex.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/kfifo.h>
 #include <scsi/iscsi_proto.h>
 #include <scsi/iscsi_if.h>
 #include <scsi/scsi_transport_iscsi.h>
@@ -231,7 +232,7 @@ struct iscsi_conn {
 };
 
 struct iscsi_pool {
-	struct kfifo		*queue;		/* FIFO Queue */
+	struct kfifo		queue;		/* FIFO Queue */
 	void			**pool;		/* Pool of elements */
 	int			max;		/* Max number of elements */
 };
diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
index 9e3182e659db..741ae7ed4394 100644
--- a/include/scsi/libiscsi_tcp.h
+++ b/include/scsi/libiscsi_tcp.h
@@ -80,7 +80,7 @@ struct iscsi_tcp_task {
 	int			data_offset;
 	struct iscsi_r2t_info	*r2t;		/* in progress solict R2T */
 	struct iscsi_pool	r2tpool;
-	struct kfifo		*r2tqueue;
+	struct kfifo		r2tqueue;
 	void			*dd_data;
 };
 
diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h
index ba615e4c1d7c..07e3adde21d9 100644
--- a/include/scsi/libsrp.h
+++ b/include/scsi/libsrp.h
@@ -21,7 +21,7 @@ struct srp_buf {
 struct srp_queue {
 	void *pool;
 	void *items;
-	struct kfifo *queue;
+	struct kfifo queue;
 	spinlock_t lock;
 };
 
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 3765ff3c1bbe..8da6bb9782bb 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -1,6 +1,7 @@
 /*
- * A simple kernel FIFO implementation.
+ * A generic kernel FIFO implementation.
  *
+ * Copyright (C) 2009 Stefani Seibold <stefani@seibold.net>
  * Copyright (C) 2004 Stelian Pop <stelian@popies.net>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -26,49 +27,51 @@
 #include <linux/kfifo.h>
 #include <linux/log2.h>
 
+static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+		unsigned int size, spinlock_t *lock)
+{
+	fifo->buffer = buffer;
+	fifo->size = size;
+	fifo->lock = lock;
+
+	kfifo_reset(fifo);
+}
+
 /**
- * kfifo_init - allocates a new FIFO using a preallocated buffer
+ * kfifo_init - initialize a FIFO using a preallocated buffer
+ * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
  * @size: the size of the internal buffer, this have to be a power of 2.
- * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
- * Do NOT pass the kfifo to kfifo_free() after use! Simply free the
- * &struct kfifo with kfree().
  */
-struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-			 gfp_t gfp_mask, spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size,
+			spinlock_t *lock)
 {
-	struct kfifo *fifo;
-
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
 
-	fifo = kmalloc(sizeof(struct kfifo), gfp_mask);
-	if (!fifo)
-		return ERR_PTR(-ENOMEM);
-
-	fifo->buffer = buffer;
-	fifo->size = size;
-	fifo->in = fifo->out = 0;
-	fifo->lock = lock;
-
-	return fifo;
+	_kfifo_init(fifo, buffer, size, lock);
 }
 EXPORT_SYMBOL(kfifo_init);
 
 /**
- * kfifo_alloc - allocates a new FIFO and its internal buffer
- * @size: the size of the internal buffer to be allocated.
+ * kfifo_alloc - allocates a new FIFO internal buffer
+ * @fifo: the fifo to assign then new buffer
+ * @size: the size of the buffer to be allocated, this have to be a power of 2.
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
  * @lock: the lock to be used to protect the fifo buffer
  *
+ * This function dynamically allocates a new fifo internal buffer
+ *
  * The size will be rounded-up to a power of 2.
+ * The buffer will be release with kfifo_free().
+ * Return 0 if no error, otherwise the an error code
  */
-struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
+			spinlock_t *lock)
 {
 	unsigned char *buffer;
-	struct kfifo *ret;
 
 	/*
 	 * round up to the next power of 2, since our 'let the indices
@@ -80,26 +83,24 @@ struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
 	}
 
 	buffer = kmalloc(size, gfp_mask);
-	if (!buffer)
-		return ERR_PTR(-ENOMEM);
-
-	ret = kfifo_init(buffer, size, gfp_mask, lock);
+	if (!buffer) {
+		_kfifo_init(fifo, 0, 0, NULL);
+		return -ENOMEM;
+	}
 
-	if (IS_ERR(ret))
-		kfree(buffer);
+	_kfifo_init(fifo, buffer, size, lock);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_alloc);
 
 /**
- * kfifo_free - frees the FIFO
+ * kfifo_free - frees the FIFO internal buffer
  * @fifo: the fifo to be freed.
  */
 void kfifo_free(struct kfifo *fifo)
 {
 	kfree(fifo->buffer);
-	kfree(fifo);
 }
 EXPORT_SYMBOL(kfifo_free);
 
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index dc328425fa20..6230ceb0823e 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -43,7 +43,7 @@ static int bufsize = 64 * 1024;
 static const char procname[] = "dccpprobe";
 
 static struct {
-	struct kfifo	  *fifo;
+	struct kfifo	  fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
 	struct timespec	  tstart;
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(dccpw.fifo, tbuf, len);
+	kfifo_put(&dccpw.fifo, tbuf, len);
 	wake_up(&dccpw.wait);
 }
 
@@ -109,7 +109,7 @@ static struct jprobe dccp_send_probe = {
 
 static int dccpprobe_open(struct inode *inode, struct file *file)
 {
-	kfifo_reset(dccpw.fifo);
+	kfifo_reset(&dccpw.fifo);
 	getnstimeofday(&dccpw.tstart);
 	return 0;
 }
@@ -131,11 +131,11 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(dccpw.fifo) != 0);
+					 __kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(dccpw.fifo, tbuf, len);
+	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,10 +156,8 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock);
-	if (IS_ERR(dccpw.fifo))
-		return PTR_ERR(dccpw.fifo);
-
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
 
@@ -172,14 +170,14 @@ static __init int dccpprobe_init(void)
 err1:
 	proc_net_remove(&init_net, procname);
 err0:
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	return ret;
 }
 module_init(dccpprobe_init);
 
 static __exit void dccpprobe_exit(void)
 {
-	kfifo_free(dccpw.fifo);
+	kfifo_free(&dccpw.fifo);
 	proc_net_remove(&init_net, procname);
 	unregister_jprobe(&dccp_send_probe);
 
-- 
cgit v1.2.3


From c1e13f25674ed564948ecb7dfe5f83e578892896 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:27 -0800
Subject: kfifo: move out spinlock

Move the pointer to the spinlock out of struct kfifo.  Most users in
tree do not actually use a spinlock, so the few exceptions now have to
call kfifo_{get,put}_locked, which takes an extra argument to a
spinlock.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       |  2 +-
 drivers/char/sonypi.c                       | 21 ++++----
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 36 +++++++------
 drivers/media/video/meye.c                  | 35 +++++++------
 drivers/net/wireless/libertas/main.c        |  2 +-
 drivers/platform/x86/fujitsu-laptop.c       | 18 ++++---
 drivers/platform/x86/sony-laptop.c          | 22 ++++----
 drivers/scsi/libiscsi.c                     |  2 +-
 drivers/scsi/libiscsi_tcp.c                 |  2 +-
 drivers/scsi/libsrp.c                       |  9 ++--
 drivers/usb/host/fhci.h                     |  2 +-
 drivers/usb/serial/generic.c                |  4 +-
 drivers/usb/serial/usb-serial.c             |  3 +-
 include/linux/kfifo.h                       | 80 +++++++++++++----------------
 kernel/kfifo.c                              | 17 +++---
 net/dccp/probe.c                            |  6 +--
 16 files changed, 131 insertions(+), 130 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 0f39bec28b45..935b30d80adf 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -686,7 +686,7 @@ static int nozomi_read_config_table(struct nozomi *dc)
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
 			kfifo_alloc(&dc->port[i].fifo_ul,
-				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC, NULL);
+				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 9e6efb1f029f..dbcb3bd192c7 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -777,8 +777,9 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get(&sonypi_device.input_fifo, (unsigned char *)&kp,
-			 sizeof(kp)) == sizeof(kp)) {
+	while (kfifo_get_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
+			 sizeof(kp), &sonypi_device.input_fifo_lock)
+			== sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
 		input_sync(kp.dev);
@@ -827,8 +828,9 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put(&sonypi_device.input_fifo,
-			  (unsigned char *)&kp, sizeof(kp));
+		kfifo_put_locked(&sonypi_device.input_fifo,
+			(unsigned char *)&kp, sizeof(kp),
+			&sonypi_device.input_fifo_lock);
 		schedule_work(&sonypi_device.input_work);
 	}
 }
@@ -880,7 +882,8 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put(&sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put_locked(&sonypi_device.fifo, (unsigned char *)&event,
+			sizeof(event), &sonypi_device.fifo_lock);
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
 
@@ -929,7 +932,8 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(&sonypi_device.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get_locked(&sonypi_device.fifo, &c, sizeof(c),
+				 &sonypi_device.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -1313,8 +1317,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL,
-					 &sonypi_device.fifo_lock);
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 		return error;
@@ -1394,7 +1397,7 @@ static int __devinit sonypi_probe(struct platform_device *dev)
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
 		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
-				GFP_KERNEL, &sonypi_device.input_fifo_lock);
+				GFP_KERNEL);
 		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 			goto err_inpdev_unregister;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 65072bdfc1bf..98f24e6d906e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -55,7 +55,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
 
-	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL, fifo_lock))
+	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
@@ -86,7 +86,8 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_get_locked(fifo, (unsigned char *) &entry,
+				sizeof(u32), fifo_lock);
 	return 0;
 }
 
@@ -113,8 +114,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
 	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
-					      GFP_KERNEL,
-					      &rdev_p->rscp->qpid_fifo_lock))
+					      GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
@@ -177,33 +177,37 @@ tpt_err:
 /*
  * returns 0 if no resource available
  */
-static u32 cxio_hal_get_resource(struct kfifo *fifo)
+static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
 {
 	u32 entry;
-	if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
+	if (kfifo_get_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
 		return entry;
 	else
 		return 0;	/* fifo emptry */
 }
 
-static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
+		u32 entry)
 {
-	BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
+	BUG_ON(
+	kfifo_put_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+	== 0);
 }
 
 u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->tpt_fifo);
+	return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
 }
 
 void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
 {
-	cxio_hal_put_resource(&rscp->tpt_fifo, stag);
+	cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
 }
 
 u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 {
-	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo);
+	u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
+			&rscp->qpid_fifo_lock);
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
 	return qpid;
 }
@@ -211,27 +215,27 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
 void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
 {
 	PDBG("%s qpid 0x%x\n", __func__, qpid);
-	cxio_hal_put_resource(&rscp->qpid_fifo, qpid);
+	cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
 }
 
 u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->cqid_fifo);
+	return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
 }
 
 void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
 {
-	cxio_hal_put_resource(&rscp->cqid_fifo, cqid);
+	cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
 }
 
 u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
 {
-	return cxio_hal_get_resource(&rscp->pdid_fifo);
+	return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
 }
 
 void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
 {
-	cxio_hal_put_resource(&rscp->pdid_fifo, pdid);
+	cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
 }
 
 void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index dacbbb839b9e..38bcedfd9fec 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,8 +800,8 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
-			      sizeof(int)) != sizeof(int)) {
+		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
 		}
@@ -811,7 +811,8 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	} else {
 		int size;
@@ -820,8 +821,8 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get(&meye.grabq, (unsigned char *)&reqnr,
-			      sizeof(int)) != sizeof(int)) {
+		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
 		}
@@ -831,7 +832,8 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put(&meye.doneq, (unsigned char *)&reqnr, sizeof(int));
+		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	}
 	mchip_free_frame();
@@ -933,7 +935,8 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put(&meye.grabq, (unsigned char *)nb, sizeof(int));
+	kfifo_put_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
+			 &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -965,7 +968,8 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get(&meye.doneq, (unsigned char *)&unused, sizeof(int));
+		kfifo_get_locked(&meye.doneq, (unsigned char *)&unused,
+				sizeof(int), &meye.doneq_lock);
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
@@ -1452,7 +1456,8 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put(&meye.grabq, (unsigned char *)&buf->index, sizeof(int));
+	kfifo_put_locked(&meye.grabq, (unsigned char *)&buf->index,
+			sizeof(int), &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
 	return 0;
@@ -1478,8 +1483,8 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		return -EINTR;
 	}
 
-	if (!kfifo_get(&meye.doneq, (unsigned char *)&reqnr,
-		       sizeof(int))) {
+	if (!kfifo_get_locked(&meye.doneq, (unsigned char *)&reqnr,
+		       sizeof(int), &meye.doneq_lock)) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
 	}
@@ -1745,14 +1750,14 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.grabq_lock)) {
+	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS,
+				GFP_KERNEL)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS, GFP_KERNEL,
-				 &meye.doneq_lock)) {
+	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS,
+				GFP_KERNEL)) {
 		printk(KERN_ERR "meye: fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 403909287414..2cc7ecd8d123 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -883,7 +883,7 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL, NULL);
+	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL);
 	if (ret) {
 		lbs_pr_err("Out of memory allocating event FIFO buffer\n");
 		goto out;
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index f999fba0e25e..13dc7bedcfce 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -825,7 +825,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
 	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
-			GFP_KERNEL, &fujitsu_hotkey->fifo_lock);
+			GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "kfifo_alloc failed\n");
 		goto err_stop;
@@ -1006,9 +1006,10 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put(&fujitsu_hotkey->fifo,
+				status = kfifo_put_locked(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
-						   sizeof(keycode));
+						   sizeof(keycode),
+						   &fujitsu_hotkey->fifo_lock);
 				if (status != sizeof(keycode)) {
 					vdbg_printk(FUJLAPTOP_DBG_WARN,
 					    "Could not push keycode [0x%x]\n",
@@ -1019,11 +1020,12 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				}
 			} else if (keycode == 0) {
 				while ((status =
-					kfifo_get
-					(&fujitsu_hotkey->fifo, (unsigned char *)
-					 &keycode_r,
-					 sizeof
-					 (keycode_r))) == sizeof(keycode_r)) {
+					kfifo_get_locked(
+					 &fujitsu_hotkey->fifo,
+					 (unsigned char *) &keycode_r,
+					 sizeof(keycode_r),
+					 &fujitsu_hotkey->fifo_lock))
+					 == sizeof(keycode_r)) {
 					input_report_key(input, keycode_r, 0);
 					input_sync(input);
 					vdbg_printk(FUJLAPTOP_DBG_TRACE,
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 04625a048e74..1538a0a3c0af 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -300,8 +300,9 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get(&sony_laptop_input.fifo, (unsigned char *)&kp,
-			 sizeof(kp)) == sizeof(kp)) {
+	while (kfifo_get_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
+			sizeof(kp), &sony_laptop_input.fifo_lock)
+			== sizeof(kp)) {
 		msleep(10);
 		input_report_key(kp.dev, kp.key, 0);
 		input_sync(kp.dev);
@@ -362,8 +363,9 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put(&sony_laptop_input.fifo,
-			  (unsigned char *)&kp, sizeof(kp));
+		kfifo_put_locked(&sony_laptop_input.fifo,
+			  (unsigned char *)&kp, sizeof(kp),
+			  &sony_laptop_input.fifo_lock);
 
 		if (!work_pending(&sony_laptop_release_key_work))
 			queue_work(sony_laptop_input.wq,
@@ -386,8 +388,7 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
 	error =
-	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
-			    &sony_laptop_input.fifo_lock);
+	 kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
 		goto err_dec_users;
@@ -2129,7 +2130,8 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get(&sonypi_compat.fifo, &c, sizeof(c)) == sizeof(c))) {
+	       (kfifo_get_locked(&sonypi_compat.fifo, &c, sizeof(c),
+			  &sonypi_compat.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
 		ret++;
@@ -2308,7 +2310,8 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put(&sonypi_compat.fifo, (unsigned char *)&event, sizeof(event));
+	kfifo_put_locked(&sonypi_compat.fifo, (unsigned char *)&event,
+			sizeof(event), &sonypi_compat.fifo_lock);
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
 }
@@ -2319,8 +2322,7 @@ static int sonypi_compat_init(void)
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
 	error =
-	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL,
-					 &sonypi_compat.fifo_lock);
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR DRV_PFX "kfifo_alloc failed\n");
 		return error;
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index cf0aa7e90be9..1bccbc1e588e 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2461,7 +2461,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 	if (q->pool == NULL)
 		return -ENOMEM;
 
-	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*), NULL);
+	kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*));
 
 	for (i = 0; i < max; i++) {
 		q->pool[i] = kzalloc(item_size, GFP_KERNEL);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index a83ee56a185e..41643c860d26 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -1128,7 +1128,7 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 
 		/* R2T xmit queue */
 		if (kfifo_alloc(&tcp_task->r2tqueue,
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL)) {
+		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index b1b5e51ca8e3..db1b41c55fd3 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -58,8 +58,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 		goto free_pool;
 
 	spin_lock_init(&q->lock);
-	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *),
-			      &q->lock);
+	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
 		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
@@ -164,7 +163,8 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get(&target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_get_locked(&target->iu_queue.queue, (void *) &iue,
+			sizeof(void *), &target->iu_queue.lock);
 	if (!iue)
 		return iue;
 	iue->target = target;
@@ -176,7 +176,8 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put(&iue->target->iu_queue.queue, (void *) &iue, sizeof(void *));
+	kfifo_put_locked(&iue->target->iu_queue.queue, (void *) &iue,
+			sizeof(void *), &iue->target->iu_queue.lock);
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
 
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 2277428ef5d3..a76da201183b 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -495,7 +495,7 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 /* fifo of pointers */
 static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL, NULL);
+	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index bbe005cefcfb..61eef18218be 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_get(port->write_fifo, data, port->bulk_out_size);
+	count = kfifo_get_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_put(port->write_fifo, buf, count);
+	count = kfifo_put_locked(port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 44b72d47fac2..636a4f23445e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -939,8 +939,7 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL,
-			&port->lock))
+		if (kfifo_alloc(port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c3f8d82efd34..e0f5c9d4197d 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -30,13 +30,12 @@ struct kfifo {
 	unsigned int size;	/* the size of the allocated buffer */
 	unsigned int in;	/* data is added at offset (in % size) */
 	unsigned int out;	/* data is extracted from off. (out % size) */
-	spinlock_t *lock;	/* protects concurrent modifications */
 };
 
 extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
-			unsigned int size, spinlock_t *lock);
+			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
-			gfp_t gfp_mask, spinlock_t *lock);
+			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
@@ -58,58 +57,67 @@ static inline void __kfifo_reset(struct kfifo *fifo)
  */
 static inline void kfifo_reset(struct kfifo *fifo)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(fifo->lock, flags);
-
 	__kfifo_reset(fifo);
+}
+
+/**
+ * __kfifo_len - returns the number of bytes available in the FIFO
+ * @fifo: the fifo to be used.
+ */
+static inline unsigned int __kfifo_len(struct kfifo *fifo)
+{
+	register unsigned int	out;
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	out = fifo->out;
+	smp_rmb();
+	return fifo->in - out;
 }
 
 /**
- * kfifo_put - puts some data into the FIFO
+ * kfifo_put_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
- * @buffer: the data to be added.
- * @len: the length of the data to be added.
+ * @from: the data to be added.
+ * @n: the length of the data to be added.
+ * @lock: pointer to the spinlock to use for locking.
  *
- * This function copies at most @len bytes from the @buffer into
+ * This function copies at most @len bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline unsigned int kfifo_put(struct kfifo *fifo,
-				const unsigned char *buffer, unsigned int len)
+static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
+		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
 
-	spin_lock_irqsave(fifo->lock, flags);
+	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_put(fifo, buffer, len);
+	ret = __kfifo_put(fifo, from, n);
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
 }
 
 /**
- * kfifo_get - gets some data from the FIFO
+ * kfifo_get_locked - gets some data from the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
- * @len: the size of the destination buffer.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @lock: pointer to the spinlock to use for locking.
  *
  * This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
  */
-static inline unsigned int kfifo_get(struct kfifo *fifo,
-				     unsigned char *buffer, unsigned int len)
+static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
+	unsigned char *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
 
-	spin_lock_irqsave(fifo->lock, flags);
+	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_get(fifo, buffer, len);
+	ret = __kfifo_get(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
@@ -118,36 +126,18 @@ static inline unsigned int kfifo_get(struct kfifo *fifo,
 	if (fifo->in == fifo->out)
 		fifo->in = fifo->out = 0;
 
-	spin_unlock_irqrestore(fifo->lock, flags);
+	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
 }
 
-/**
- * __kfifo_len - returns the number of bytes available in the FIFO, no locking version
- * @fifo: the fifo to be used.
- */
-static inline unsigned int __kfifo_len(struct kfifo *fifo)
-{
-	return fifo->in - fifo->out;
-}
-
 /**
  * kfifo_len - returns the number of bytes available in the FIFO
  * @fifo: the fifo to be used.
  */
 static inline unsigned int kfifo_len(struct kfifo *fifo)
 {
-	unsigned long flags;
-	unsigned int ret;
-
-	spin_lock_irqsave(fifo->lock, flags);
-
-	ret = __kfifo_len(fifo);
-
-	spin_unlock_irqrestore(fifo->lock, flags);
-
-	return ret;
+	return __kfifo_len(fifo);
 }
 
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 8da6bb9782bb..4950bdbe3477 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,11 +28,10 @@
 #include <linux/log2.h>
 
 static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
-		unsigned int size, spinlock_t *lock)
+		unsigned int size)
 {
 	fifo->buffer = buffer;
 	fifo->size = size;
-	fifo->lock = lock;
 
 	kfifo_reset(fifo);
 }
@@ -42,16 +41,14 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
  * @size: the size of the internal buffer, this have to be a power of 2.
- * @lock: the lock to be used to protect the fifo buffer
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size,
-			spinlock_t *lock)
+void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
 
-	_kfifo_init(fifo, buffer, size, lock);
+	_kfifo_init(fifo, buffer, size);
 }
 EXPORT_SYMBOL(kfifo_init);
 
@@ -60,7 +57,6 @@ EXPORT_SYMBOL(kfifo_init);
  * @fifo: the fifo to assign then new buffer
  * @size: the size of the buffer to be allocated, this have to be a power of 2.
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
- * @lock: the lock to be used to protect the fifo buffer
  *
  * This function dynamically allocates a new fifo internal buffer
  *
@@ -68,8 +64,7 @@ EXPORT_SYMBOL(kfifo_init);
  * The buffer will be release with kfifo_free().
  * Return 0 if no error, otherwise the an error code
  */
-int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
-			spinlock_t *lock)
+int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 {
 	unsigned char *buffer;
 
@@ -84,11 +79,11 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask,
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0, NULL);
+		_kfifo_init(fifo, 0, 0);
 		return -ENOMEM;
 	}
 
-	_kfifo_init(fifo, buffer, size, lock);
+	_kfifo_init(fifo, buffer, size);
 
 	return 0;
 }
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 6230ceb0823e..c6b50351aa78 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put(&dccpw.fifo, tbuf, len);
+	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get(&dccpw.fifo, tbuf, len);
+	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
@@ -156,7 +156,7 @@ static __init int dccpprobe_init(void)
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL, &dccpw.lock))
+	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
 		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
-- 
cgit v1.2.3


From e64c026dd09b73faf20707711402fc5ed55a8e70 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: cleanup namespace

change name of __kfifo_* functions to kfifo_*, because the prefix __kfifo
should be reserved for internal functions only.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 12 +++++------
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 10 ++++-----
 drivers/net/wireless/libertas/cmd.c         |  4 ++--
 drivers/net/wireless/libertas/main.c        |  8 ++++----
 drivers/platform/x86/sony-laptop.c          |  2 +-
 drivers/scsi/libiscsi.c                     | 14 ++++++-------
 drivers/scsi/libiscsi_tcp.c                 | 20 +++++++++---------
 drivers/scsi/libsrp.c                       |  2 +-
 drivers/usb/host/fhci.h                     |  6 +++---
 drivers/usb/serial/generic.c                |  4 ++--
 include/linux/kfifo.h                       | 32 +++++++----------------------
 kernel/kfifo.c                              | 12 +++++------
 net/dccp/probe.c                            |  2 +-
 13 files changed, 55 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 935b30d80adf..61f5bfe74f38 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = __kfifo_get(&port->fifo_ul, dc->send_buf,
+	size = kfifo_get(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -988,11 +988,11 @@ static int receive_flow_control(struct nozomi *dc)
 
 	} else if (old_ctrl.CTS == 0 && ctrl_dl.CTS == 1) {
 
-		if (__kfifo_len(&dc->port[port].fifo_ul)) {
+		if (kfifo_len(&dc->port[port].fifo_ul)) {
 			DBG1("Enable interrupt (0x%04X) on port: %d",
 				enable_ier, port);
 			DBG1("Data in buffer [%d], enable transmit! ",
-				__kfifo_len(&dc->port[port].fifo_ul));
+				kfifo_len(&dc->port[port].fifo_ul));
 			enable_transmit_ul(port, dc);
 		} else {
 			DBG1("No data in buffer...");
@@ -1672,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = __kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
+	rval = kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
@@ -1720,7 +1720,7 @@ static int ntty_write_room(struct tty_struct *tty)
 	if (!port->port.count)
 		goto exit;
 
-	room = port->fifo_ul.size - __kfifo_len(&port->fifo_ul);
+	room = port->fifo_ul.size - kfifo_len(&port->fifo_ul);
 
 exit:
 	mutex_unlock(&port->tty_sem);
@@ -1877,7 +1877,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty)
 		goto exit_in_buffer;
 	}
 
-	rval = __kfifo_len(&port->fifo_ul);
+	rval = kfifo_len(&port->fifo_ul);
 
 exit_in_buffer:
 	return rval;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 98f24e6d906e..d7d18fb02c93 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -59,7 +59,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		__kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -71,19 +71,19 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			__kfifo_put(fifo,
+			kfifo_put(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			__kfifo_put(fifo,
+			kfifo_put(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			__kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
+			kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
 		kfifo_get_locked(fifo, (unsigned char *) &entry,
@@ -119,7 +119,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			__kfifo_put(&rdev_p->rscp->qpid_fifo,
+			kfifo_put(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c
index ffed17f4f506..42611bea76a3 100644
--- a/drivers/net/wireless/libertas/cmd.c
+++ b/drivers/net/wireless/libertas/cmd.c
@@ -1365,7 +1365,7 @@ static void lbs_send_confirmsleep(struct lbs_private *priv)
 	priv->dnld_sent = DNLD_RES_RECEIVED;
 
 	/* If nothing to do, go back to sleep (?) */
-	if (!__kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
+	if (!kfifo_len(&priv->event_fifo) && !priv->resp_len[priv->resp_idx])
 		priv->psstate = PS_STATE_SLEEP;
 
 	spin_unlock_irqrestore(&priv->driver_lock, flags);
@@ -1439,7 +1439,7 @@ void lbs_ps_confirm_sleep(struct lbs_private *priv)
 	}
 
 	/* Pending events or command responses? */
-	if (__kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
+	if (kfifo_len(&priv->event_fifo) || priv->resp_len[priv->resp_idx]) {
 		allowed = 0;
 		lbs_deb_host("pending events or command responses\n");
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 2cc7ecd8d123..0622104f0a03 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -459,7 +459,7 @@ static int lbs_thread(void *data)
 		else if (!list_empty(&priv->cmdpendingq) &&
 					!(priv->wakeup_dev_required))
 			shouldsleep = 0;	/* We have a command to send */
-		else if (__kfifo_len(&priv->event_fifo))
+		else if (kfifo_len(&priv->event_fifo))
 			shouldsleep = 0;	/* We have an event to process */
 		else
 			shouldsleep = 1;	/* No command */
@@ -511,9 +511,9 @@ static int lbs_thread(void *data)
 
 		/* Process hardware events, e.g. card removed, link lost */
 		spin_lock_irq(&priv->driver_lock);
-		while (__kfifo_len(&priv->event_fifo)) {
+		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			__kfifo_get(&priv->event_fifo, (unsigned char *) &event,
+			kfifo_get(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -1175,7 +1175,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	__kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 1538a0a3c0af..36e5dc6fc953 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2107,7 +2107,7 @@ static int sonypi_misc_open(struct inode *inode, struct file *file)
 	spin_lock_irqsave(&sonypi_compat.fifo_lock, flags);
 
 	if (atomic_inc_return(&sonypi_compat.open_count) == 1)
-		__kfifo_reset(&sonypi_compat.fifo);
+		kfifo_reset(&sonypi_compat.fifo);
 
 	spin_unlock_irqrestore(&sonypi_compat.fifo_lock, flags);
 
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 1bccbc1e588e..5f0c46f43ee1 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	__kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
+	kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!__kfifo_get(&session->cmdpool.queue,
+		if (!kfifo_get(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!__kfifo_get(&conn->session->cmdpool.queue,
+	if (!kfifo_get(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2469,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		__kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
+		kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2819,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!__kfifo_get(&session->cmdpool.queue,
+	if (!kfifo_get(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2839,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2902,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	__kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 41643c860d26..c0be926637b1 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (__kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+	while (kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = __kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		__kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	__kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -951,7 +951,7 @@ int iscsi_tcp_task_init(struct iscsi_task *task)
 		return conn->session->tt->init_pdu(task, 0, task->data_count);
 	}
 
-	BUG_ON(__kfifo_len(&tcp_task->r2tqueue));
+	BUG_ON(kfifo_len(&tcp_task->r2tqueue));
 	tcp_task->exp_datasn = 0;
 
 	/* Prepare PDU, optionally w/ immediate data */
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				__kfifo_put(&tcp_task->r2tpool.queue,
+				kfifo_put(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			__kfifo_get(&tcp_task->r2tqueue,
+			kfifo_get(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index db1b41c55fd3..975e448cfcb9 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -61,7 +61,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		__kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
+		kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index a76da201183b..96aa787f208f 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -505,19 +505,19 @@ static inline void cq_delete(struct kfifo *kfifo)
 
 static inline unsigned int cq_howmany(struct kfifo *kfifo)
 {
-	return __kfifo_len(kfifo) / sizeof(void *);
+	return kfifo_len(kfifo) / sizeof(void *);
 }
 
 static inline int cq_put(struct kfifo *kfifo, void *p)
 {
-	return __kfifo_put(kfifo, (void *)&p, sizeof(p));
+	return kfifo_put(kfifo, (void *)&p, sizeof(p));
 }
 
 static inline void *cq_get(struct kfifo *kfifo)
 {
 	void *p = NULL;
 
-	__kfifo_get(kfifo, (void *)&p, sizeof(p));
+	kfifo_get(kfifo, (void *)&p, sizeof(p));
 	return p;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 61eef18218be..d0a2e464cacd 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -276,7 +276,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	if (port->write_urb_busy)
 		start_io = false;
 	else {
-		start_io = (__kfifo_len(port->write_fifo) != 0);
+		start_io = (kfifo_len(port->write_fifo) != 0);
 		port->write_urb_busy = start_io;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -370,7 +370,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
 	} else if (serial->num_bulk_out)
-		room = port->write_fifo->size - __kfifo_len(port->write_fifo);
+		room = port->write_fifo->size - kfifo_len(port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index e0f5c9d4197d..a893acda3964 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -37,34 +37,25 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern unsigned int __kfifo_put(struct kfifo *fifo,
+extern unsigned int kfifo_put(struct kfifo *fifo,
 				const unsigned char *buffer, unsigned int len);
-extern unsigned int __kfifo_get(struct kfifo *fifo,
+extern unsigned int kfifo_get(struct kfifo *fifo,
 				unsigned char *buffer, unsigned int len);
 
-/**
- * __kfifo_reset - removes the entire FIFO contents, no locking version
- * @fifo: the fifo to be emptied.
- */
-static inline void __kfifo_reset(struct kfifo *fifo)
-{
-	fifo->in = fifo->out = 0;
-}
-
 /**
  * kfifo_reset - removes the entire FIFO contents
  * @fifo: the fifo to be emptied.
  */
 static inline void kfifo_reset(struct kfifo *fifo)
 {
-	__kfifo_reset(fifo);
+	fifo->in = fifo->out = 0;
 }
 
 /**
- * __kfifo_len - returns the number of bytes available in the FIFO
+ * kfifo_len - returns the number of used bytes in the FIFO
  * @fifo: the fifo to be used.
  */
-static inline unsigned int __kfifo_len(struct kfifo *fifo)
+static inline unsigned int kfifo_len(struct kfifo *fifo)
 {
 	register unsigned int	out;
 
@@ -92,7 +83,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_put(fifo, from, n);
+	ret = kfifo_put(fifo, from, n);
 
 	spin_unlock_irqrestore(lock, flags);
 
@@ -117,7 +108,7 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = __kfifo_get(fifo, to, n);
+	ret = kfifo_get(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
@@ -131,13 +122,4 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 	return ret;
 }
 
-/**
- * kfifo_len - returns the number of bytes available in the FIFO
- * @fifo: the fifo to be used.
- */
-static inline unsigned int kfifo_len(struct kfifo *fifo)
-{
-	return __kfifo_len(fifo);
-}
-
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4950bdbe3477..963ffde4af1a 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -100,7 +100,7 @@ void kfifo_free(struct kfifo *fifo)
 EXPORT_SYMBOL(kfifo_free);
 
 /**
- * __kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_put - puts some data into the FIFO, no locking version
  * @fifo: the fifo to be used.
  * @buffer: the data to be added.
  * @len: the length of the data to be added.
@@ -112,7 +112,7 @@ EXPORT_SYMBOL(kfifo_free);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int __kfifo_put(struct kfifo *fifo,
+unsigned int kfifo_put(struct kfifo *fifo,
 			const unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
@@ -144,10 +144,10 @@ unsigned int __kfifo_put(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(__kfifo_put);
+EXPORT_SYMBOL(kfifo_put);
 
 /**
- * __kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_get - gets some data from the FIFO, no locking version
  * @fifo: the fifo to be used.
  * @buffer: where the data must be copied.
  * @len: the size of the destination buffer.
@@ -158,7 +158,7 @@ EXPORT_SYMBOL(__kfifo_put);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int __kfifo_get(struct kfifo *fifo,
+unsigned int kfifo_get(struct kfifo *fifo,
 			 unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
@@ -190,4 +190,4 @@ unsigned int __kfifo_get(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(__kfifo_get);
+EXPORT_SYMBOL(kfifo_get);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index c6b50351aa78..9ef36849edd7 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -131,7 +131,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	error = wait_event_interruptible(dccpw.wait,
-					 __kfifo_len(&dccpw.fifo) != 0);
+					 kfifo_len(&dccpw.fifo) != 0);
 	if (error)
 		goto out_free;
 
-- 
cgit v1.2.3


From 7acd72eb85f1c7a15e8b5eb554994949241737f1 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:28 -0800
Subject: kfifo: rename kfifo_put... into kfifo_in... and kfifo_get... into
 kfifo_out...

rename kfifo_put...  into kfifo_in...  to prevent miss use of old non in
kernel-tree drivers

ditto for kfifo_get...  -> kfifo_out...

Improve the prototypes of kfifo_in and kfifo_out to make the kerneldoc
annotations more readable.

Add mini "howto porting to the new API" in kfifo.h

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       |  4 +--
 drivers/char/sonypi.c                       |  8 +++---
 drivers/infiniband/hw/cxgb3/cxio_resource.c | 16 ++++++------
 drivers/media/video/meye.c                  | 16 ++++++------
 drivers/net/wireless/libertas/main.c        |  5 ++--
 drivers/platform/x86/fujitsu-laptop.c       |  4 +--
 drivers/platform/x86/sony-laptop.c          |  8 +++---
 drivers/scsi/libiscsi.c                     | 14 +++++------
 drivers/scsi/libiscsi_tcp.c                 | 18 ++++++-------
 drivers/scsi/libsrp.c                       |  6 ++---
 drivers/usb/host/fhci.h                     |  4 +--
 drivers/usb/serial/generic.c                |  4 +--
 include/linux/kfifo.h                       | 39 +++++++++++++++++++++--------
 kernel/kfifo.c                              | 32 +++++++++++------------
 net/dccp/probe.c                            |  4 +--
 15 files changed, 101 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 61f5bfe74f38..9ef243429014 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -798,7 +798,7 @@ static int send_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 
 	/* Get data from tty and place in buf for now */
-	size = kfifo_get(&port->fifo_ul, dc->send_buf,
+	size = kfifo_out(&port->fifo_ul, dc->send_buf,
 			   ul_size < SEND_BUF_MAX ? ul_size : SEND_BUF_MAX);
 
 	if (size == 0) {
@@ -1672,7 +1672,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer,
 		goto exit;
 	}
 
-	rval = kfifo_put(&port->fifo_ul, (unsigned char *)buffer, count);
+	rval = kfifo_in(&port->fifo_ul, (unsigned char *)buffer, count);
 
 	/* notify card */
 	if (unlikely(dc == NULL)) {
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index dbcb3bd192c7..0798754a607c 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -777,7 +777,7 @@ static void input_keyrelease(struct work_struct *work)
 {
 	struct sonypi_keypress kp;
 
-	while (kfifo_get_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
+	while (kfifo_out_locked(&sonypi_device.input_fifo, (unsigned char *)&kp,
 			 sizeof(kp), &sonypi_device.input_fifo_lock)
 			== sizeof(kp)) {
 		msleep(10);
@@ -828,7 +828,7 @@ static void sonypi_report_input_event(u8 event)
 	if (kp.dev) {
 		input_report_key(kp.dev, kp.key, 1);
 		input_sync(kp.dev);
-		kfifo_put_locked(&sonypi_device.input_fifo,
+		kfifo_in_locked(&sonypi_device.input_fifo,
 			(unsigned char *)&kp, sizeof(kp),
 			&sonypi_device.input_fifo_lock);
 		schedule_work(&sonypi_device.input_work);
@@ -882,7 +882,7 @@ found:
 		acpi_bus_generate_proc_event(sonypi_acpi_device, 1, event);
 #endif
 
-	kfifo_put_locked(&sonypi_device.fifo, (unsigned char *)&event,
+	kfifo_in_locked(&sonypi_device.fifo, (unsigned char *)&event,
 			sizeof(event), &sonypi_device.fifo_lock);
 	kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_device.fifo_proc_list);
@@ -932,7 +932,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get_locked(&sonypi_device.fifo, &c, sizeof(c),
+	       (kfifo_out_locked(&sonypi_device.fifo, &c, sizeof(c),
 				 &sonypi_device.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index d7d18fb02c93..dcbf2606c438 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -59,7 +59,7 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32));
+		kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
 	if (random) {
 		j = 0;
 		random_bytes = random32();
@@ -71,22 +71,22 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 				random_bytes = random32();
 			}
 			idx = (random_bytes >> (j * 2)) & 0xF;
-			kfifo_put(fifo,
+			kfifo_in(fifo,
 				(unsigned char *) &rarray[idx],
 				sizeof(u32));
 			rarray[idx] = i;
 			j++;
 		}
 		for (i = 0; i < RANDOM_SIZE; i++)
-			kfifo_put(fifo,
+			kfifo_in(fifo,
 				(unsigned char *) &rarray[i],
 				sizeof(u32));
 	} else
 		for (i = skip_low; i < nr - skip_high; i++)
-			kfifo_put(fifo, (unsigned char *) &i, sizeof(u32));
+			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_get_locked(fifo, (unsigned char *) &entry,
+		kfifo_out_locked(fifo, (unsigned char *) &entry,
 				sizeof(u32), fifo_lock);
 	return 0;
 }
@@ -119,7 +119,7 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
 		if (!(i & rdev_p->qpmask))
-			kfifo_put(&rdev_p->rscp->qpid_fifo,
+			kfifo_in(&rdev_p->rscp->qpid_fifo,
 				    (unsigned char *) &i, sizeof(u32));
 	return 0;
 }
@@ -180,7 +180,7 @@ tpt_err:
 static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
 {
 	u32 entry;
-	if (kfifo_get_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
+	if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
 		return entry;
 	else
 		return 0;	/* fifo emptry */
@@ -190,7 +190,7 @@ static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
 		u32 entry)
 {
 	BUG_ON(
-	kfifo_put_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+	kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
 	== 0);
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 38bcedfd9fec..884a569d60a2 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -800,7 +800,7 @@ again:
 		return IRQ_HANDLED;
 
 	if (meye.mchip_mode == MCHIP_HIC_MODE_CONT_OUT) {
-		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_out_locked(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			return IRQ_HANDLED;
@@ -811,7 +811,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+		kfifo_in_locked(&meye.doneq, (unsigned char *)&reqnr,
 				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	} else {
@@ -821,7 +821,7 @@ again:
 			mchip_free_frame();
 			goto again;
 		}
-		if (kfifo_get_locked(&meye.grabq, (unsigned char *)&reqnr,
+		if (kfifo_out_locked(&meye.grabq, (unsigned char *)&reqnr,
 			      sizeof(int), &meye.grabq_lock) != sizeof(int)) {
 			mchip_free_frame();
 			goto again;
@@ -832,7 +832,7 @@ again:
 		meye.grab_buffer[reqnr].state = MEYE_BUF_DONE;
 		do_gettimeofday(&meye.grab_buffer[reqnr].timestamp);
 		meye.grab_buffer[reqnr].sequence = sequence++;
-		kfifo_put_locked(&meye.doneq, (unsigned char *)&reqnr,
+		kfifo_in_locked(&meye.doneq, (unsigned char *)&reqnr,
 				sizeof(int), &meye.doneq_lock);
 		wake_up_interruptible(&meye.proc_list);
 	}
@@ -935,7 +935,7 @@ static int meyeioc_qbuf_capt(int *nb)
 		mchip_cont_compression_start();
 
 	meye.grab_buffer[*nb].state = MEYE_BUF_USING;
-	kfifo_put_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
+	kfifo_in_locked(&meye.grabq, (unsigned char *)nb, sizeof(int),
 			 &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
@@ -968,7 +968,7 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_get_locked(&meye.doneq, (unsigned char *)&unused,
+		kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
 				sizeof(int), &meye.doneq_lock);
 	}
 	*i = meye.grab_buffer[*i].size;
@@ -1456,7 +1456,7 @@ static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	buf->flags |= V4L2_BUF_FLAG_QUEUED;
 	buf->flags &= ~V4L2_BUF_FLAG_DONE;
 	meye.grab_buffer[buf->index].state = MEYE_BUF_USING;
-	kfifo_put_locked(&meye.grabq, (unsigned char *)&buf->index,
+	kfifo_in_locked(&meye.grabq, (unsigned char *)&buf->index,
 			sizeof(int), &meye.grabq_lock);
 	mutex_unlock(&meye.lock);
 
@@ -1483,7 +1483,7 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		return -EINTR;
 	}
 
-	if (!kfifo_get_locked(&meye.doneq, (unsigned char *)&reqnr,
+	if (!kfifo_out_locked(&meye.doneq, (unsigned char *)&reqnr,
 		       sizeof(int), &meye.doneq_lock)) {
 		mutex_unlock(&meye.lock);
 		return -EBUSY;
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 0622104f0a03..2bcfa745524a 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -513,7 +513,8 @@ static int lbs_thread(void *data)
 		spin_lock_irq(&priv->driver_lock);
 		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
-			kfifo_get(&priv->event_fifo, (unsigned char *) &event,
+
+			kfifo_out(&priv->event_fifo, (unsigned char *) &event,
 				sizeof(event));
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
@@ -1175,7 +1176,7 @@ void lbs_queue_event(struct lbs_private *priv, u32 event)
 	if (priv->psstate == PS_STATE_SLEEP)
 		priv->psstate = PS_STATE_AWAKE;
 
-	kfifo_put(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
+	kfifo_in(&priv->event_fifo, (unsigned char *) &event, sizeof(u32));
 
 	wake_up_interruptible(&priv->waitq);
 
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 13dc7bedcfce..b66029bd75d0 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1006,7 +1006,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				vdbg_printk(FUJLAPTOP_DBG_TRACE,
 					"Push keycode into ringbuffer [%d]\n",
 					keycode);
-				status = kfifo_put_locked(&fujitsu_hotkey->fifo,
+				status = kfifo_in_locked(&fujitsu_hotkey->fifo,
 						   (unsigned char *)&keycode,
 						   sizeof(keycode),
 						   &fujitsu_hotkey->fifo_lock);
@@ -1020,7 +1020,7 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 				}
 			} else if (keycode == 0) {
 				while ((status =
-					kfifo_get_locked(
+					kfifo_out_locked(
 					 &fujitsu_hotkey->fifo,
 					 (unsigned char *) &keycode_r,
 					 sizeof(keycode_r),
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 36e5dc6fc953..2896ca4cd9ab 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -300,7 +300,7 @@ static void do_sony_laptop_release_key(struct work_struct *work)
 {
 	struct sony_laptop_keypress kp;
 
-	while (kfifo_get_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
+	while (kfifo_out_locked(&sony_laptop_input.fifo, (unsigned char *)&kp,
 			sizeof(kp), &sony_laptop_input.fifo_lock)
 			== sizeof(kp)) {
 		msleep(10);
@@ -363,7 +363,7 @@ static void sony_laptop_report_input_event(u8 event)
 		/* we emit the scancode so we can always remap the key */
 		input_event(kp.dev, EV_MSC, MSC_SCAN, event);
 		input_sync(kp.dev);
-		kfifo_put_locked(&sony_laptop_input.fifo,
+		kfifo_in_locked(&sony_laptop_input.fifo,
 			  (unsigned char *)&kp, sizeof(kp),
 			  &sony_laptop_input.fifo_lock);
 
@@ -2130,7 +2130,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 		return ret;
 
 	while (ret < count &&
-	       (kfifo_get_locked(&sonypi_compat.fifo, &c, sizeof(c),
+	       (kfifo_out_locked(&sonypi_compat.fifo, &c, sizeof(c),
 			  &sonypi_compat.fifo_lock) == sizeof(c))) {
 		if (put_user(c, buf++))
 			return -EFAULT;
@@ -2310,7 +2310,7 @@ static struct miscdevice sonypi_misc_device = {
 
 static void sonypi_compat_report_event(u8 event)
 {
-	kfifo_put_locked(&sonypi_compat.fifo, (unsigned char *)&event,
+	kfifo_in_locked(&sonypi_compat.fifo, (unsigned char *)&event,
 			sizeof(event), &sonypi_compat.fifo_lock);
 	kill_fasync(&sonypi_compat.fifo_async, SIGIO, POLL_IN);
 	wake_up_interruptible(&sonypi_compat.fifo_proc_list);
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 5f0c46f43ee1..c28a712fd4db 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -517,7 +517,7 @@ static void iscsi_free_task(struct iscsi_task *task)
 	if (conn->login_task == task)
 		return;
 
-	kfifo_put(&session->cmdpool.queue, (void*)&task, sizeof(void*));
+	kfifo_in(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
 	if (sc) {
 		task->sc = NULL;
@@ -737,7 +737,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-		if (!kfifo_get(&session->cmdpool.queue,
+		if (!kfifo_out(&session->cmdpool.queue,
 				 (void*)&task, sizeof(void*)))
 			return NULL;
 	}
@@ -1567,7 +1567,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
 {
 	struct iscsi_task *task;
 
-	if (!kfifo_get(&conn->session->cmdpool.queue,
+	if (!kfifo_out(&conn->session->cmdpool.queue,
 			 (void *) &task, sizeof(void *)))
 		return NULL;
 
@@ -2469,7 +2469,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
 			q->max = i;
 			goto enomem;
 		}
-		kfifo_put(&q->queue, (void*)&q->pool[i], sizeof(void*));
+		kfifo_in(&q->queue, (void*)&q->pool[i], sizeof(void*));
 	}
 
 	if (items) {
@@ -2819,7 +2819,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 
 	/* allocate login_task used for the login/text sequences */
 	spin_lock_bh(&session->lock);
-	if (!kfifo_get(&session->cmdpool.queue,
+	if (!kfifo_out(&session->cmdpool.queue,
                          (void*)&conn->login_task,
 			 sizeof(void*))) {
 		spin_unlock_bh(&session->lock);
@@ -2839,7 +2839,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	return cls_conn;
 
 login_task_data_alloc_fail:
-	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 login_task_alloc_fail:
 	iscsi_destroy_conn(cls_conn);
@@ -2902,7 +2902,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
 	kfree(conn->persistent_address);
-	kfifo_put(&session->cmdpool.queue, (void*)&conn->login_task,
+	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
 		    sizeof(void*));
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index c0be926637b1..d51ffeca2ec9 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -445,15 +445,15 @@ void iscsi_tcp_cleanup_task(struct iscsi_task *task)
 		return;
 
 	/* flush task's r2t queues */
-	while (kfifo_get(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+	while (kfifo_out(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*))) {
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		ISCSI_DBG_TCP(task->conn, "pending r2t dropped\n");
 	}
 
 	r2t = tcp_task->r2t;
 	if (r2t != NULL) {
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		tcp_task->r2t = NULL;
 	}
@@ -541,7 +541,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 		return 0;
 	}
 
-	rc = kfifo_get(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
+	rc = kfifo_out(&tcp_task->r2tpool.queue, (void*)&r2t, sizeof(void*));
 	if (!rc) {
 		iscsi_conn_printk(KERN_ERR, conn, "Could not allocate R2T. "
 				  "Target has sent more R2Ts than it "
@@ -554,7 +554,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	if (r2t->data_length == 0) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with zero data len\n");
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -570,7 +570,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", r2t->data_length,
 				  r2t->data_offset, scsi_out(task->sc)->length);
-		kfifo_put(&tcp_task->r2tpool.queue, (void*)&r2t,
+		kfifo_in(&tcp_task->r2tpool.queue, (void*)&r2t,
 			    sizeof(void*));
 		return ISCSI_ERR_DATALEN;
 	}
@@ -580,7 +580,7 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 	r2t->sent = 0;
 
 	tcp_task->exp_datasn = r2tsn + 1;
-	kfifo_put(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
+	kfifo_in(&tcp_task->r2tqueue, (void*)&r2t, sizeof(void*));
 	conn->r2t_pdus_cnt++;
 
 	iscsi_requeue_task(task);
@@ -982,7 +982,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 			if (r2t->data_length <= r2t->sent) {
 				ISCSI_DBG_TCP(task->conn,
 					      "  done with r2t %p\n", r2t);
-				kfifo_put(&tcp_task->r2tpool.queue,
+				kfifo_in(&tcp_task->r2tpool.queue,
 					    (void *)&tcp_task->r2t,
 					    sizeof(void *));
 				tcp_task->r2t = r2t = NULL;
@@ -990,7 +990,7 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			kfifo_get(&tcp_task->r2tqueue,
+			kfifo_out(&tcp_task->r2tqueue,
 				    (void *)&tcp_task->r2t, sizeof(void *));
 			r2t = tcp_task->r2t;
 		}
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 975e448cfcb9..8424b8606efb 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -61,7 +61,7 @@ static int srp_iu_pool_alloc(struct srp_queue *q, size_t max,
 	kfifo_init(&q->queue, (void *) q->pool, max * sizeof(void *));
 
 	for (i = 0, iue = q->items; i < max; i++) {
-		kfifo_put(&q->queue, (void *) &iue, sizeof(void *));
+		kfifo_in(&q->queue, (void *) &iue, sizeof(void *));
 		iue->sbuf = ring[i];
 		iue++;
 	}
@@ -163,7 +163,7 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_get_locked(&target->iu_queue.queue, (void *) &iue,
+	kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
 			sizeof(void *), &target->iu_queue.lock);
 	if (!iue)
 		return iue;
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(srp_iu_get);
 
 void srp_iu_put(struct iu_entry *iue)
 {
-	kfifo_put_locked(&iue->target->iu_queue.queue, (void *) &iue,
+	kfifo_in_locked(&iue->target->iu_queue.queue, (void *) &iue,
 			sizeof(void *), &iue->target->iu_queue.lock);
 }
 EXPORT_SYMBOL_GPL(srp_iu_put);
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 96aa787f208f..72dae1c5ab38 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -510,14 +510,14 @@ static inline unsigned int cq_howmany(struct kfifo *kfifo)
 
 static inline int cq_put(struct kfifo *kfifo, void *p)
 {
-	return kfifo_put(kfifo, (void *)&p, sizeof(p));
+	return kfifo_in(kfifo, (void *)&p, sizeof(p));
 }
 
 static inline void *cq_get(struct kfifo *kfifo)
 {
 	void *p = NULL;
 
-	kfifo_get(kfifo, (void *)&p, sizeof(p));
+	kfifo_out(kfifo, (void *)&p, sizeof(p));
 	return p;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index d0a2e464cacd..b0f1183755c9 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_get_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
+	count = kfifo_out_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_put_locked(port->write_fifo, buf, count, &port->lock);
+	count = kfifo_in_locked(port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index a893acda3964..1b59c4a0e85f 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -19,6 +19,25 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  */
+
+/*
+ * Howto porting drivers to the new generic fifo API:
+ *
+ * - Modify the declaration of the "struct kfifo *" object into a
+ *   in-place "struct kfifo" object
+ * - Init the in-place object with kfifo_alloc() or kfifo_init()
+ *   Note: The address of the in-place "struct kfifo" object must be
+ *   passed as the first argument to this functions
+ * - Replace the use of __kfifo_put into kfifo_in and __kfifo_get
+ *   into kfifo_out
+ * - Replace the use of kfifo_put into kfifo_in_locked and kfifo_get
+ *   into kfifo_out_locked
+ *   Note: the spinlock pointer formerly passed to kfifo_init/kfifo_alloc
+ *   must be passed now to the kfifo_in_locked and kfifo_out_locked
+ *   as the last parameter.
+ * - All formerly name __kfifo_* functions has been renamed into kfifo_*
+ */
+
 #ifndef _LINUX_KFIFO_H
 #define _LINUX_KFIFO_H
 
@@ -37,10 +56,10 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern unsigned int kfifo_put(struct kfifo *fifo,
-				const unsigned char *buffer, unsigned int len);
-extern unsigned int kfifo_get(struct kfifo *fifo,
-				unsigned char *buffer, unsigned int len);
+extern __must_check unsigned int kfifo_in(struct kfifo *fifo,
+				const unsigned char *from, unsigned int len);
+extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
+				unsigned char *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -65,7 +84,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
 }
 
 /**
- * kfifo_put_locked - puts some data into the FIFO using a spinlock for locking
+ * kfifo_in_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
  * @from: the data to be added.
  * @n: the length of the data to be added.
@@ -75,7 +94,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
+static inline __must_check unsigned int kfifo_in_locked(struct kfifo *fifo,
 		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
@@ -83,7 +102,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = kfifo_put(fifo, from, n);
+	ret = kfifo_in(fifo, from, n);
 
 	spin_unlock_irqrestore(lock, flags);
 
@@ -91,7 +110,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
 }
 
 /**
- * kfifo_get_locked - gets some data from the FIFO using a spinlock for locking
+ * kfifo_out_locked - gets some data from the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @n: the size of the destination buffer.
@@ -100,7 +119,7 @@ static inline __must_check unsigned int kfifo_put_locked(struct kfifo *fifo,
  * This function copies at most @len bytes from the FIFO into the
  * @to buffer and returns the number of copied bytes.
  */
-static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
+static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	unsigned char *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
@@ -108,7 +127,7 @@ static inline __must_check unsigned int kfifo_get_locked(struct kfifo *fifo,
 
 	spin_lock_irqsave(lock, flags);
 
-	ret = kfifo_get(fifo, to, n);
+	ret = kfifo_out(fifo, to, n);
 
 	/*
 	 * optimization: if the FIFO is empty, set the indices to 0
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 963ffde4af1a..d659442e73f2 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -100,20 +100,20 @@ void kfifo_free(struct kfifo *fifo)
 EXPORT_SYMBOL(kfifo_free);
 
 /**
- * kfifo_put - puts some data into the FIFO, no locking version
+ * kfifo_in - puts some data into the FIFO
  * @fifo: the fifo to be used.
- * @buffer: the data to be added.
+ * @from: the data to be added.
  * @len: the length of the data to be added.
  *
- * This function copies at most @len bytes from the @buffer into
+ * This function copies at most @len bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_put(struct kfifo *fifo,
-			const unsigned char *buffer, unsigned int len)
+unsigned int kfifo_in(struct kfifo *fifo,
+			const unsigned char *from, unsigned int len)
 {
 	unsigned int l;
 
@@ -128,10 +128,10 @@ unsigned int kfifo_put(struct kfifo *fifo,
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
-	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), buffer, l);
+	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), from, l);
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	memcpy(fifo->buffer, buffer + l, len - l);
+	memcpy(fifo->buffer, from + l, len - l);
 
 	/*
 	 * Ensure that we add the bytes to the kfifo -before-
@@ -144,22 +144,22 @@ unsigned int kfifo_put(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_put);
+EXPORT_SYMBOL(kfifo_in);
 
 /**
- * kfifo_get - gets some data from the FIFO, no locking version
+ * kfifo_out - gets some data from the FIFO
  * @fifo: the fifo to be used.
- * @buffer: where the data must be copied.
+ * @to: where the data must be copied.
  * @len: the size of the destination buffer.
  *
  * This function copies at most @len bytes from the FIFO into the
- * @buffer and returns the number of copied bytes.
+ * @to buffer and returns the number of copied bytes.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_get(struct kfifo *fifo,
-			 unsigned char *buffer, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo,
+			 unsigned char *to, unsigned int len)
 {
 	unsigned int l;
 
@@ -174,10 +174,10 @@ unsigned int kfifo_get(struct kfifo *fifo,
 
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
-	memcpy(buffer, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+	memcpy(to, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	memcpy(buffer + l, fifo->buffer, len - l);
+	memcpy(to + l, fifo->buffer, len - l);
 
 	/*
 	 * Ensure that we remove the bytes from the kfifo -before-
@@ -190,4 +190,4 @@ unsigned int kfifo_get(struct kfifo *fifo,
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_get);
+EXPORT_SYMBOL(kfifo_out);
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 9ef36849edd7..a1362dc8abb0 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -67,7 +67,7 @@ static void printl(const char *fmt, ...)
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
-	kfifo_put_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	wake_up(&dccpw.wait);
 }
 
@@ -135,7 +135,7 @@ static ssize_t dccpprobe_read(struct file *file, char __user *buf,
 	if (error)
 		goto out_free;
 
-	cnt = kfifo_get_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
+	cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
 	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
 
 out_free:
-- 
cgit v1.2.3


From 9842c38e917636fa7dc6b88aff17a8f1fd7f0cc0 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:29 -0800
Subject: kfifo: fix warn_unused_result

Fix the "ignoring return value of '...', declared with attribute
warn_unused_result" compiler warning in several users of the new kfifo
API.

It removes the __must_check attribute from kfifo_in() and
kfifo_in_locked() which must not necessary performed.

Fix the allocation bug in the nozomi driver file, by moving out the
kfifo_alloc from the interrupt handler into the probe function.

Fix the kfifo_out() and kfifo_out_locked() users to handle a unexpected
end of fifo.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/nozomi.c                       | 31 +++++++++++++++++++++++++----
 drivers/infiniband/hw/cxgb3/cxio_resource.c |  5 +++--
 drivers/media/video/meye.c                  |  5 +++--
 drivers/net/wireless/libertas/main.c        |  6 ++++--
 drivers/scsi/libiscsi_tcp.c                 |  9 +++++++--
 drivers/scsi/libsrp.c                       |  7 +++++--
 include/linux/kfifo.h                       |  4 ++--
 7 files changed, 51 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c
index 9ef243429014..7d73cd430340 100644
--- a/drivers/char/nozomi.c
+++ b/drivers/char/nozomi.c
@@ -685,8 +685,6 @@ static int nozomi_read_config_table(struct nozomi *dc)
 		dump_table(dc);
 
 		for (i = PORT_MDM; i < MAX_PORT; i++) {
-			kfifo_alloc(&dc->port[i].fifo_ul,
-				FIFO_BUFFER_SIZE_UL, GFP_ATOMIC);
 			memset(&dc->port[i].ctrl_dl, 0, sizeof(struct ctrl_dl));
 			memset(&dc->port[i].ctrl_ul, 0, sizeof(struct ctrl_ul));
 		}
@@ -1433,6 +1431,16 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 		goto err_free_sbuf;
 	}
 
+	for (i = PORT_MDM; i < MAX_PORT; i++) {
+		if (kfifo_alloc(&dc->port[i].fifo_ul,
+		      FIFO_BUFFER_SIZE_UL, GFP_ATOMIC)) {
+			dev_err(&pdev->dev,
+					"Could not allocate kfifo buffer\n");
+			ret = -ENOMEM;
+			goto err_free_kfifo;
+		}
+	}
+
 	spin_lock_init(&dc->spin_mutex);
 
 	nozomi_setup_private_data(dc);
@@ -1445,7 +1453,7 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 			NOZOMI_NAME, dc);
 	if (unlikely(ret)) {
 		dev_err(&pdev->dev, "can't request irq %d\n", pdev->irq);
-		goto err_free_sbuf;
+		goto err_free_kfifo;
 	}
 
 	DBG1("base_addr: %p", dc->base_addr);
@@ -1464,13 +1472,28 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev,
 	dc->state = NOZOMI_STATE_ENABLED;
 
 	for (i = 0; i < MAX_PORT; i++) {
+		struct device *tty_dev;
+
 		mutex_init(&dc->port[i].tty_sem);
 		tty_port_init(&dc->port[i].port);
-		tty_register_device(ntty_driver, dc->index_start + i,
+		tty_dev = tty_register_device(ntty_driver, dc->index_start + i,
 							&pdev->dev);
+
+		if (IS_ERR(tty_dev)) {
+			ret = PTR_ERR(tty_dev);
+			dev_err(&pdev->dev, "Could not allocate tty?\n");
+			goto err_free_tty;
+		}
 	}
+
 	return 0;
 
+err_free_tty:
+	for (i = dc->index_start; i < dc->index_start + MAX_PORT; ++i)
+		tty_unregister_device(ntty_driver, i);
+err_free_kfifo:
+	for (i = 0; i < MAX_PORT; i++)
+		kfifo_free(&dc->port[i].fifo_ul);
 err_free_sbuf:
 	kfree(dc->send_buf);
 	iounmap(dc->base_addr);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index dcbf2606c438..31f9201b2980 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -86,8 +86,9 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 			kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
 
 	for (i = 0; i < skip_low + skip_high; i++)
-		kfifo_out_locked(fifo, (unsigned char *) &entry,
-				sizeof(u32), fifo_lock);
+		if (kfifo_out_locked(fifo, (unsigned char *) &entry,
+				sizeof(u32), fifo_lock) != sizeof(u32))
+					break;
 	return 0;
 }
 
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 884a569d60a2..b421858ccf90 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -968,8 +968,9 @@ static int meyeioc_sync(struct file *file, void *fh, int *i)
 		/* fall through */
 	case MEYE_BUF_DONE:
 		meye.grab_buffer[*i].state = MEYE_BUF_UNUSED;
-		kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
-				sizeof(int), &meye.doneq_lock);
+		if (kfifo_out_locked(&meye.doneq, (unsigned char *)&unused,
+				sizeof(int), &meye.doneq_lock) != sizeof(int))
+					break;
 	}
 	*i = meye.grab_buffer[*i].size;
 	mutex_unlock(&meye.lock);
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 2bcfa745524a..c2975c8e2f21 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -514,8 +514,10 @@ static int lbs_thread(void *data)
 		while (kfifo_len(&priv->event_fifo)) {
 			u32 event;
 
-			kfifo_out(&priv->event_fifo, (unsigned char *) &event,
-				sizeof(event));
+			if (kfifo_out(&priv->event_fifo,
+				(unsigned char *) &event, sizeof(event)) !=
+				sizeof(event))
+					break;
 			spin_unlock_irq(&priv->driver_lock);
 			lbs_process_event(priv, event);
 			spin_lock_irq(&priv->driver_lock);
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index d51ffeca2ec9..db6856c138fc 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -990,8 +990,13 @@ static struct iscsi_r2t_info *iscsi_tcp_get_curr_r2t(struct iscsi_task *task)
 		}
 
 		if (r2t == NULL) {
-			kfifo_out(&tcp_task->r2tqueue,
-				    (void *)&tcp_task->r2t, sizeof(void *));
+			if (kfifo_out(&tcp_task->r2tqueue,
+			    (void *)&tcp_task->r2t, sizeof(void *)) !=
+			    sizeof(void *)) {
+				WARN_ONCE(1, "unexpected fifo state");
+				r2t = NULL;
+			}
+
 			r2t = tcp_task->r2t;
 		}
 		spin_unlock_bh(&session->lock);
diff --git a/drivers/scsi/libsrp.c b/drivers/scsi/libsrp.c
index 8424b8606efb..ab19b3b4be52 100644
--- a/drivers/scsi/libsrp.c
+++ b/drivers/scsi/libsrp.c
@@ -163,8 +163,11 @@ struct iu_entry *srp_iu_get(struct srp_target *target)
 {
 	struct iu_entry *iue = NULL;
 
-	kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
-			sizeof(void *), &target->iu_queue.lock);
+	if (kfifo_out_locked(&target->iu_queue.queue, (void *) &iue,
+		sizeof(void *), &target->iu_queue.lock) != sizeof(void *)) {
+			WARN_ONCE(1, "unexpected fifo state");
+			return NULL;
+	}
 	if (!iue)
 		return iue;
 	iue->target = target;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 1b59c4a0e85f..5ed2565c89b6 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -56,7 +56,7 @@ extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
-extern __must_check unsigned int kfifo_in(struct kfifo *fifo,
+extern unsigned int kfifo_in(struct kfifo *fifo,
 				const unsigned char *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 				unsigned char *to, unsigned int len);
@@ -94,7 +94,7 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
-static inline __must_check unsigned int kfifo_in_locked(struct kfifo *fifo,
+static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
 		const unsigned char *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
-- 
cgit v1.2.3


From 37bdfbbfaab47811fcec84dff23c4e8da1a09f9e Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:30 -0800
Subject: kfifo: add DEFINE_KFIFO and friends, add very tiny functions

Add DECLARE_KFIFO - macro to declare a kfifo and the associated buffer inside a struct
 Add INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
 Add DEFINE_KFIFO - macro to define and initialize a kfifo as a global or local object
 Add kfifo_size() - returns the size of the fifo in bytes
 Add kfifo_is_empty() - returns true if the fifo is empty
 Add kfifo_is_full() - returns true if the fifo is full
 Add kfifo_avail() - returns the number of bytes available in the FIFO
 Do some code cleanup

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 94 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 92 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 5ed2565c89b6..dd53eed3e2af 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -51,6 +51,60 @@ struct kfifo {
 	unsigned int out;	/* data is extracted from off. (out % size) */
 };
 
+/*
+ * Macros for declaration and initialization of the kfifo datatype
+ */
+
+/* helper macro */
+#define __kfifo_initializer(s, b) \
+	(struct kfifo) { \
+		.size	= s, \
+		.in	= 0, \
+		.out	= 0, \
+		.buffer = b \
+	}
+
+/**
+ * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ *
+ * Note: the macro can be used inside struct or union declaration
+ * Note: the macro creates two objects:
+ *  A kfifo object with the given name and a buffer for the kfifo
+ *  object named name##kfifo_buffer
+ */
+#define DECLARE_KFIFO(name, size) \
+union { \
+	struct kfifo name; \
+	unsigned char name##kfifo_buffer[size + sizeof(struct kfifo)]; \
+}
+
+/**
+ * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ */
+#define INIT_KFIFO(name) \
+	name = __kfifo_initializer(sizeof(name##kfifo_buffer) - \
+				sizeof(struct kfifo), name##kfifo_buffer)
+
+/**
+ * DEFINE_KFIFO - macro to define and initialize a kfifo
+ * @name: name of the declared kfifo datatype
+ * @size: size of the fifo buffer
+ *
+ * Note: the macro can be used for global and local kfifo data type variables
+ * Note: the macro creates two objects:
+ *  A kfifo object with the given name and a buffer for the kfifo
+ *  object named name##kfifo_buffer
+ */
+#define DEFINE_KFIFO(name, size) \
+	unsigned char name##kfifo_buffer[size]; \
+	struct kfifo name = __kfifo_initializer(size, name##kfifo_buffer)
+
+#undef __kfifo_initializer
+
 extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
@@ -70,6 +124,15 @@ static inline void kfifo_reset(struct kfifo *fifo)
 	fifo->in = fifo->out = 0;
 }
 
+/**
+ * kfifo_size - returns the size of the fifo in bytes
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check unsigned int kfifo_size(struct kfifo *fifo)
+{
+	return fifo->size;
+}
+
 /**
  * kfifo_len - returns the number of used bytes in the FIFO
  * @fifo: the fifo to be used.
@@ -83,6 +146,33 @@ static inline unsigned int kfifo_len(struct kfifo *fifo)
 	return fifo->in - out;
 }
 
+/**
+ * kfifo_is_empty - returns true if the fifo is empty
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check int kfifo_is_empty(struct kfifo *fifo)
+{
+	return fifo->in == fifo->out;
+}
+
+/**
+ * kfifo_is_full - returns true if the fifo is full
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check int kfifo_is_full(struct kfifo *fifo)
+{
+	return kfifo_len(fifo) == kfifo_size(fifo);
+}
+
+/**
+ * kfifo_avail - returns the number of bytes available in the FIFO
+ * @fifo: the fifo to be used.
+ */
+static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
+{
+	return kfifo_size(fifo) - kfifo_len(fifo);
+}
+
 /**
  * kfifo_in_locked - puts some data into the FIFO using a spinlock for locking
  * @fifo: the fifo to be used.
@@ -133,8 +223,8 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	 * optimization: if the FIFO is empty, set the indices to 0
 	 * so we don't wrap the next time
 	 */
-	if (fifo->in == fifo->out)
-		fifo->in = fifo->out = 0;
+	if (kfifo_is_empty(fifo))
+		kfifo_reset(fifo);
 
 	spin_unlock_irqrestore(lock, flags);
 
-- 
cgit v1.2.3


From a121f24accac1600bf5b6fb1e12eeabdfed7cb1a Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:31 -0800
Subject: kfifo: add kfifo_skip, kfifo_from_user and kfifo_to_user

Add kfifo_reset_out() for save lockless discard the fifo output
 Add kfifo_skip() to skip a number of output bytes
 Add kfifo_from_user() to copy user space data into the fifo
 Add kfifo_to_user() to copy fifo data to user space

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  47 +++++++++++++++++
 kernel/kfifo.c        | 139 ++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 170 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index dd53eed3e2af..d3230fb08bc7 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,6 +124,16 @@ static inline void kfifo_reset(struct kfifo *fifo)
 	fifo->in = fifo->out = 0;
 }
 
+/**
+ * kfifo_reset_out - skip FIFO contents
+ * @fifo: the fifo to be emptied.
+ */
+static inline void kfifo_reset_out(struct kfifo *fifo)
+{
+	smp_mb();
+	fifo->out = fifo->in;
+}
+
 /**
  * kfifo_size - returns the size of the fifo in bytes
  * @fifo: the fifo to be used.
@@ -231,4 +241,41 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 	return ret;
 }
 
+extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
+
+extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n);
+
+extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n);
+
+/**
+ * __kfifo_add_out internal helper function for updating the out offset
+ */
+static inline void __kfifo_add_out(struct kfifo *fifo,
+				unsigned int off)
+{
+	smp_mb();
+	fifo->out += off;
+}
+
+/**
+ * __kfifo_add_in internal helper function for updating the in offset
+ */
+static inline void __kfifo_add_in(struct kfifo *fifo,
+				unsigned int off)
+{
+	smp_wmb();
+	fifo->in += off;
+}
+
+/**
+ * __kfifo_off internal helper function for calculating the index of a
+ * given offeset
+ */
+static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
+{
+	return off & (fifo->size - 1);
+}
+
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index d659442e73f2..2a78425ef67f 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -26,6 +26,7 @@
 #include <linux/err.h>
 #include <linux/kfifo.h>
 #include <linux/log2.h>
+#include <linux/uaccess.h>
 
 static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
 		unsigned int size)
@@ -99,6 +100,21 @@ void kfifo_free(struct kfifo *fifo)
 }
 EXPORT_SYMBOL(kfifo_free);
 
+/**
+ * kfifo_skip - skip output data
+ * @fifo: the fifo to be used.
+ * @len: number of bytes to skip
+ */
+void kfifo_skip(struct kfifo *fifo, unsigned int len)
+{
+	if (len < kfifo_len(fifo)) {
+		__kfifo_add_out(fifo, len);
+		return;
+	}
+	kfifo_reset_out(fifo);
+}
+EXPORT_SYMBOL(kfifo_skip);
+
 /**
  * kfifo_in - puts some data into the FIFO
  * @fifo: the fifo to be used.
@@ -115,6 +131,7 @@ EXPORT_SYMBOL(kfifo_free);
 unsigned int kfifo_in(struct kfifo *fifo,
 			const unsigned char *from, unsigned int len)
 {
+	unsigned int off;
 	unsigned int l;
 
 	len = min(len, fifo->size - fifo->in + fifo->out);
@@ -126,21 +143,16 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	smp_mb();
 
+	off = __kfifo_off(fifo, fifo->in);
+
 	/* first put the data starting from fifo->in to buffer end */
-	l = min(len, fifo->size - (fifo->in & (fifo->size - 1)));
-	memcpy(fifo->buffer + (fifo->in & (fifo->size - 1)), from, l);
+	l = min(len, fifo->size - off);
+	memcpy(fifo->buffer + off, from, l);
 
 	/* then put the rest (if any) at the beginning of the buffer */
 	memcpy(fifo->buffer, from + l, len - l);
 
-	/*
-	 * Ensure that we add the bytes to the kfifo -before-
-	 * we update the fifo->in index.
-	 */
-
-	smp_wmb();
-
-	fifo->in += len;
+	__kfifo_add_in(fifo, len);
 
 	return len;
 }
@@ -161,6 +173,7 @@ EXPORT_SYMBOL(kfifo_in);
 unsigned int kfifo_out(struct kfifo *fifo,
 			 unsigned char *to, unsigned int len)
 {
+	unsigned int off;
 	unsigned int l;
 
 	len = min(len, fifo->in - fifo->out);
@@ -172,22 +185,116 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	smp_rmb();
 
+	off = __kfifo_off(fifo, fifo->out);
+
 	/* first get the data from fifo->out until the end of the buffer */
-	l = min(len, fifo->size - (fifo->out & (fifo->size - 1)));
-	memcpy(to, fifo->buffer + (fifo->out & (fifo->size - 1)), l);
+	l = min(len, fifo->size - off);
+	memcpy(to, fifo->buffer + off, l);
 
 	/* then get the rest (if any) from the beginning of the buffer */
 	memcpy(to + l, fifo->buffer, len - l);
 
+	__kfifo_add_out(fifo, len);
+
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out);
+
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int len)
+{
+	unsigned int off;
+	unsigned int l;
+	int ret;
+
+	len = min(len, fifo->size - fifo->in + fifo->out);
+
 	/*
-	 * Ensure that we remove the bytes from the kfifo -before-
-	 * we update the fifo->out index.
+	 * Ensure that we sample the fifo->out index -before- we
+	 * start putting bytes into the kfifo.
 	 */
 
 	smp_mb();
 
-	fifo->out += len;
+	off = __kfifo_off(fifo, fifo->in);
+
+	/* first put the data starting from fifo->in to buffer end */
+	l = min(len, fifo->size - off);
+	ret = copy_from_user(fifo->buffer + off, from, l);
+
+	if (unlikely(ret))
+		return l - ret;
+
+	/* then put the rest (if any) at the beginning of the buffer */
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+
+	if (unlikely(ret))
+		return len - ret;
+
+	__kfifo_add_in(fifo, len);
 
 	return len;
 }
-EXPORT_SYMBOL(kfifo_out);
+EXPORT_SYMBOL(kfifo_from_user);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len)
+{
+	unsigned int off;
+	unsigned int l;
+	int ret;
+
+	len = min(len, fifo->in - fifo->out);
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	off = __kfifo_off(fifo, fifo->out);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - off);
+	ret = copy_to_user(to, fifo->buffer + off, l);
+
+	if (unlikely(ret))
+		return l - ret;
+
+	/* then get the rest (if any) from the beginning of the buffer */
+	ret = copy_to_user(to + l, fifo->buffer, len - l);
+
+	if (unlikely(ret))
+		return len - ret;
+
+	__kfifo_add_out(fifo, len);
+
+	return len;
+}
+EXPORT_SYMBOL(kfifo_to_user);
+
-- 
cgit v1.2.3


From 86d4880313603810901f639ccb5c88ff13d4ad3c Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Mon, 21 Dec 2009 14:37:32 -0800
Subject: kfifo: add record handling functions

Add kfifo_in_rec() - puts some record data into the FIFO
 Add kfifo_out_rec() - gets some record data from the FIFO
 Add kfifo_from_user_rec() - puts some data from user space into the FIFO
 Add kfifo_to_user_rec() - gets data from the FIFO and write it to user space
 Add kfifo_peek_rec() - gets the size of the next FIFO record field
 Add kfifo_skip_rec() - skip the next fifo out record
 Add kfifo_avail_rec() - determinate the number of bytes available in a record FIFO

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/kfifo.c        | 286 +++++++++++++++++++++++++++++--------------
 2 files changed, 523 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index d3230fb08bc7..486e8ad3bb50 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -278,4 +278,334 @@ static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
 	return off & (fifo->size - 1);
 }
 
+/**
+ * __kfifo_peek_n internal helper function for determinate the length of
+ * the next record in the fifo
+ */
+static inline unsigned int __kfifo_peek_n(struct kfifo *fifo,
+				unsigned int recsize)
+{
+#define __KFIFO_GET(fifo, off, shift) \
+	((fifo)->buffer[__kfifo_off((fifo), (fifo)->out+(off))] << (shift))
+
+	unsigned int l;
+
+	l = __KFIFO_GET(fifo, 0, 0);
+
+	if (--recsize)
+		l |= __KFIFO_GET(fifo, 1, 8);
+
+	return l;
+#undef	__KFIFO_GET
+}
+
+/**
+ * __kfifo_poke_n internal helper function for storing the length of
+ * the next record into the fifo
+ */
+static inline void __kfifo_poke_n(struct kfifo *fifo,
+			unsigned int recsize, unsigned int n)
+{
+#define __KFIFO_PUT(fifo, off, val, shift) \
+		( \
+		(fifo)->buffer[__kfifo_off((fifo), (fifo)->in+(off))] = \
+		(unsigned char)((val) >> (shift)) \
+		)
+
+	__KFIFO_PUT(fifo, 0, n, 0);
+
+	if (--recsize)
+		__KFIFO_PUT(fifo, 1, n, 8);
+#undef	__KFIFO_PUT
+}
+
+/**
+ * __kfifo_in_... internal functions for put date into the fifo
+ * do not call it directly, use kfifo_in_rec() instead
+ */
+extern unsigned int __kfifo_in_n(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize);
+
+extern unsigned int __kfifo_in_generic(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize);
+
+static inline unsigned int __kfifo_in_rec(struct kfifo *fifo,
+	const void *from, unsigned int n, unsigned int recsize)
+{
+	unsigned int ret;
+
+	ret = __kfifo_in_n(fifo, from, n, recsize);
+
+	if (likely(ret == 0)) {
+		if (recsize)
+			__kfifo_poke_n(fifo, recsize, n);
+		__kfifo_add_in(fifo, n + recsize);
+	}
+	return ret;
+}
+
+/**
+ * kfifo_in_rec - puts some record data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @n: the length of the data to be added.
+ * @recsize: size of record field
+ *
+ * This function copies @n bytes from the @from into the FIFO and returns
+ * the number of bytes which cannot be copied.
+ * A returned value greater than the @n value means that the record doesn't
+ * fit into the buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_in_rec(struct kfifo *fifo,
+	void *from, unsigned int n, unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_in_generic(fifo, from, n, recsize);
+	return __kfifo_in_rec(fifo, from, n, recsize);
+}
+
+/**
+ * __kfifo_out_... internal functions for get date from the fifo
+ * do not call it directly, use kfifo_out_rec() instead
+ */
+extern unsigned int __kfifo_out_n(struct kfifo *fifo,
+	void *to, unsigned int reclen, unsigned int recsize);
+
+extern unsigned int __kfifo_out_generic(struct kfifo *fifo,
+	void *to, unsigned int n,
+	unsigned int recsize, unsigned int *total);
+
+static inline unsigned int __kfifo_out_rec(struct kfifo *fifo,
+	void *to, unsigned int n, unsigned int recsize,
+	unsigned int *total)
+{
+	unsigned int l;
+
+	if (!recsize) {
+		l = n;
+		if (total)
+			*total = l;
+	} else {
+		l = __kfifo_peek_n(fifo, recsize);
+		if (total)
+			*total = l;
+		if (n < l)
+			return l;
+	}
+
+	return __kfifo_out_n(fifo, to, l, recsize);
+}
+
+/**
+ * kfifo_out_rec - gets some record data from the FIFO
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @recsize: size of record field
+ * @total: pointer where the total number of to copied bytes should stored
+ *
+ * This function copies at most @n bytes from the FIFO to @to and returns the
+ * number of bytes which cannot be copied.
+ * A returned value greater than the @n value means that the record doesn't
+ * fit into the @to buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_out_rec(struct kfifo *fifo,
+	void *to, unsigned int n, unsigned int recsize,
+	unsigned int *total)
+
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_out_generic(fifo, to, n, recsize, total);
+	return __kfifo_out_rec(fifo, to, n, recsize, total);
+}
+
+/**
+ * __kfifo_from_user_... internal functions for transfer from user space into
+ * the fifo. do not call it directly, use kfifo_from_user_rec() instead
+ */
+extern unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize);
+
+extern unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize);
+
+static inline unsigned int __kfifo_from_user_rec(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize)
+{
+	unsigned int ret;
+
+	ret = __kfifo_from_user_n(fifo, from, n, recsize);
+
+	if (likely(ret == 0)) {
+		if (recsize)
+			__kfifo_poke_n(fifo, recsize, n);
+		__kfifo_add_in(fifo, n + recsize);
+	}
+	return ret;
+}
+
+/**
+ * kfifo_from_user_rec - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @n: the length of the data to be added.
+ * @recsize: size of record field
+ *
+ * This function copies @n bytes from the @from into the
+ * FIFO and returns the number of bytes which cannot be copied.
+ *
+ * If the returned value is equal or less the @n value, the copy_from_user()
+ * functions has failed. Otherwise the record doesn't fit into the buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_from_user_rec(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_from_user_generic(fifo, from, n, recsize);
+	return __kfifo_from_user_rec(fifo, from, n, recsize);
+}
+
+/**
+ * __kfifo_to_user_... internal functions for transfer fifo data into user space
+ * do not call it directly, use kfifo_to_user_rec() instead
+ */
+extern unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned int reclen,
+	unsigned int recsize);
+
+extern unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned int recsize,
+	unsigned int *total);
+
+static inline unsigned int __kfifo_to_user_rec(struct kfifo *fifo,
+	void __user *to, unsigned int n,
+	unsigned int recsize, unsigned int *total)
+{
+	unsigned int l;
+
+	if (!recsize) {
+		l = n;
+		if (total)
+			*total = l;
+	} else {
+		l = __kfifo_peek_n(fifo, recsize);
+		if (total)
+			*total = l;
+		if (n < l)
+			return l;
+	}
+
+	return __kfifo_to_user_n(fifo, to, n, l, recsize);
+}
+
+/**
+ * kfifo_to_user_rec - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @n: the size of the destination buffer.
+ * @recsize: size of record field
+ * @total: pointer where the total number of to copied bytes should stored
+ *
+ * This function copies at most @n bytes from the FIFO to the @to.
+ * In case of an error, the function returns the number of bytes which cannot
+ * be copied.
+ * If the returned value is equal or less the @n value, the copy_to_user()
+ * functions has failed. Otherwise the record doesn't fit into the @to buffer.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+static inline __must_check unsigned int kfifo_to_user_rec(struct kfifo *fifo,
+		void __user *to, unsigned int n, unsigned int recsize,
+		unsigned int *total)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_to_user_generic(fifo, to, n, recsize, total);
+	return __kfifo_to_user_rec(fifo, to, n, recsize, total);
+}
+
+/**
+ * __kfifo_peek_... internal functions for peek into the next fifo record
+ * do not call it directly, use kfifo_peek_rec() instead
+ */
+extern unsigned int __kfifo_peek_generic(struct kfifo *fifo,
+				unsigned int recsize);
+
+/**
+ * kfifo_peek_rec - gets the size of the next FIFO record data
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ *
+ * This function returns the size of the next FIFO record in number of bytes
+ */
+static inline __must_check unsigned int kfifo_peek_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		return __kfifo_peek_generic(fifo, recsize);
+	if (!recsize)
+		return kfifo_len(fifo);
+	return __kfifo_peek_n(fifo, recsize);
+}
+
+/**
+ * __kfifo_skip_... internal functions for skip the next fifo record
+ * do not call it directly, use kfifo_skip_rec() instead
+ */
+extern void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize);
+
+static inline void __kfifo_skip_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	unsigned int l;
+
+	if (recsize) {
+		l = __kfifo_peek_n(fifo, recsize);
+
+		if (l + recsize <= kfifo_len(fifo)) {
+			__kfifo_add_out(fifo, l + recsize);
+			return;
+		}
+	}
+	kfifo_reset_out(fifo);
+}
+
+/**
+ * kfifo_skip_rec - skip the next fifo out record
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ *
+ * This function skips the next FIFO record
+ */
+static inline void kfifo_skip_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	if (!__builtin_constant_p(recsize))
+		__kfifo_skip_generic(fifo, recsize);
+	else
+		__kfifo_skip_rec(fifo, recsize);
+}
+
+/**
+ * kfifo_avail_rec - returns the number of bytes available in a record FIFO
+ * @fifo: the fifo to be used.
+ * @recsize: size of record field
+ */
+static inline __must_check unsigned int kfifo_avail_rec(struct kfifo *fifo,
+	unsigned int recsize)
+{
+	unsigned int l = kfifo_size(fifo) - kfifo_len(fifo);
+
+	return (l > recsize) ? l - recsize : 0;
+}
+
 #endif
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 2a78425ef67f..e92d519f93b1 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -115,27 +115,11 @@ void kfifo_skip(struct kfifo *fifo, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_skip);
 
-/**
- * kfifo_in - puts some data into the FIFO
- * @fifo: the fifo to be used.
- * @from: the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @from buffer into
- * the FIFO depending on the free space, and returns the number of
- * bytes copied.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_in(struct kfifo *fifo,
-			const unsigned char *from, unsigned int len)
+static inline void __kfifo_in_data(struct kfifo *fifo,
+		const void *from, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 
-	len = min(len, fifo->size - fifo->in + fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->out index -before- we
 	 * start putting bytes into the kfifo.
@@ -143,7 +127,7 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	smp_mb();
 
-	off = __kfifo_off(fifo, fifo->in);
+	off = __kfifo_off(fifo, fifo->in + off);
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
@@ -151,33 +135,13 @@ unsigned int kfifo_in(struct kfifo *fifo,
 
 	/* then put the rest (if any) at the beginning of the buffer */
 	memcpy(fifo->buffer, from + l, len - l);
-
-	__kfifo_add_in(fifo, len);
-
-	return len;
 }
-EXPORT_SYMBOL(kfifo_in);
 
-/**
- * kfifo_out - gets some data from the FIFO
- * @fifo: the fifo to be used.
- * @to: where the data must be copied.
- * @len: the size of the destination buffer.
- *
- * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_out(struct kfifo *fifo,
-			 unsigned char *to, unsigned int len)
+static inline void __kfifo_out_data(struct kfifo *fifo,
+		void *to, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 
-	len = min(len, fifo->in - fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->in index -before- we
 	 * start removing bytes from the kfifo.
@@ -185,7 +149,7 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	smp_rmb();
 
-	off = __kfifo_off(fifo, fifo->out);
+	off = __kfifo_off(fifo, fifo->out + off);
 
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
@@ -193,34 +157,14 @@ unsigned int kfifo_out(struct kfifo *fifo,
 
 	/* then get the rest (if any) from the beginning of the buffer */
 	memcpy(to + l, fifo->buffer, len - l);
-
-	__kfifo_add_out(fifo, len);
-
-	return len;
 }
-EXPORT_SYMBOL(kfifo_out);
 
-/**
- * kfifo_from_user - puts some data from user space into the FIFO
- * @fifo: the fifo to be used.
- * @from: pointer to the data to be added.
- * @len: the length of the data to be added.
- *
- * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
- *
- * Note that with only one concurrent reader and one concurrent
- * writer, you don't need extra locking to use these functions.
- */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off)
 {
-	unsigned int off;
 	unsigned int l;
 	int ret;
 
-	len = min(len, fifo->size - fifo->in + fifo->out);
-
 	/*
 	 * Ensure that we sample the fifo->out index -before- we
 	 * start putting bytes into the kfifo.
@@ -228,29 +172,101 @@ unsigned int kfifo_from_user(struct kfifo *fifo,
 
 	smp_mb();
 
-	off = __kfifo_off(fifo, fifo->in);
+	off = __kfifo_off(fifo, fifo->in + off);
 
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
 
 	if (unlikely(ret))
-		return l - ret;
+		return ret + len - l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	return copy_from_user(fifo->buffer, from + l, len - l);
+}
+
+static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off)
+{
+	unsigned int l;
+	int ret;
+
+	/*
+	 * Ensure that we sample the fifo->in index -before- we
+	 * start removing bytes from the kfifo.
+	 */
+
+	smp_rmb();
+
+	off = __kfifo_off(fifo, fifo->out + off);
+
+	/* first get the data from fifo->out until the end of the buffer */
+	l = min(len, fifo->size - off);
+	ret = copy_to_user(to, fifo->buffer + off, l);
 
 	if (unlikely(ret))
-		return len - ret;
+		return ret + len - l;
 
-	__kfifo_add_in(fifo, len);
+	/* then get the rest (if any) from the beginning of the buffer */
+	return copy_to_user(to + l, fifo->buffer, len - l);
+}
 
+unsigned int __kfifo_in_n(struct kfifo *fifo,
+	const void *from, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_avail(fifo) < len + recsize)
+		return len + 1;
+
+	__kfifo_in_data(fifo, from, len, recsize);
+	return 0;
+}
+EXPORT_SYMBOL(__kfifo_in_n);
+
+/**
+ * kfifo_in - puts some data into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from buffer into
+ * the FIFO depending on the free space, and returns the number of
+ * bytes copied.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+				unsigned int len)
+{
+	len = min(kfifo_avail(fifo), len);
+
+	__kfifo_in_data(fifo, from, len, 0);
+	__kfifo_add_in(fifo, len);
 	return len;
 }
-EXPORT_SYMBOL(kfifo_from_user);
+EXPORT_SYMBOL(kfifo_in);
+
+unsigned int __kfifo_in_generic(struct kfifo *fifo,
+	const void *from, unsigned int len, unsigned int recsize)
+{
+	return __kfifo_in_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_in_generic);
+
+unsigned int __kfifo_out_n(struct kfifo *fifo,
+	void *to, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_len(fifo) < len + recsize)
+		return len;
+
+	__kfifo_out_data(fifo, to, len, recsize);
+	__kfifo_add_out(fifo, len + recsize);
+	return 0;
+}
+EXPORT_SYMBOL(__kfifo_out_n);
 
 /**
- * kfifo_to_user - gets data from the FIFO and write it to user space
+ * kfifo_out - gets some data from the FIFO
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
@@ -261,40 +277,124 @@ EXPORT_SYMBOL(kfifo_from_user);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
 {
-	unsigned int off;
-	unsigned int l;
-	int ret;
+	len = min(kfifo_len(fifo), len);
 
-	len = min(len, fifo->in - fifo->out);
+	__kfifo_out_data(fifo, to, len, 0);
+	__kfifo_add_out(fifo, len);
 
-	/*
-	 * Ensure that we sample the fifo->in index -before- we
-	 * start removing bytes from the kfifo.
-	 */
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out);
 
-	smp_rmb();
+unsigned int __kfifo_out_generic(struct kfifo *fifo,
+	void *to, unsigned int len, unsigned int recsize,
+	unsigned int *total)
+{
+	return __kfifo_out_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_out_generic);
 
-	off = __kfifo_off(fifo, fifo->out);
+unsigned int __kfifo_from_user_n(struct kfifo *fifo,
+	const void __user *from, unsigned int len, unsigned int recsize)
+{
+	if (kfifo_avail(fifo) < len + recsize)
+		return len + 1;
 
-	/* first get the data from fifo->out until the end of the buffer */
-	l = min(len, fifo->size - off);
-	ret = copy_to_user(to, fifo->buffer + off, l);
+	return __kfifo_from_user_data(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_n);
 
-	if (unlikely(ret))
-		return l - ret;
+/**
+ * kfifo_from_user - puts some data from user space into the FIFO
+ * @fifo: the fifo to be used.
+ * @from: pointer to the data to be added.
+ * @len: the length of the data to be added.
+ *
+ * This function copies at most @len bytes from the @from into the
+ * FIFO depending and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int len)
+{
+	len = min(kfifo_avail(fifo), len);
+	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	__kfifo_add_in(fifo, len);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_from_user);
 
-	/* then get the rest (if any) from the beginning of the buffer */
-	ret = copy_to_user(to + l, fifo->buffer, len - l);
+unsigned int __kfifo_from_user_generic(struct kfifo *fifo,
+	const void __user *from, unsigned int len, unsigned int recsize)
+{
+	return __kfifo_from_user_rec(fifo, from, len, recsize);
+}
+EXPORT_SYMBOL(__kfifo_from_user_generic);
 
-	if (unlikely(ret))
-		return len - ret;
+unsigned int __kfifo_to_user_n(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned int reclen,
+	unsigned int recsize)
+{
+	unsigned int ret;
 
-	__kfifo_add_out(fifo, len);
+	if (kfifo_len(fifo) < reclen + recsize)
+		return len;
 
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+
+	if (likely(ret == 0))
+		__kfifo_add_out(fifo, reclen + recsize);
+
+	return ret;
+}
+EXPORT_SYMBOL(__kfifo_to_user_n);
+
+/**
+ * kfifo_to_user - gets data from the FIFO and write it to user space
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ *
+ * This function copies at most @len bytes from the FIFO into the
+ * @to buffer and returns the number of copied bytes.
+ *
+ * Note that with only one concurrent reader and one concurrent
+ * writer, you don't need extra locking to use these functions.
+ */
+unsigned int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len)
+{
+	len = min(kfifo_len(fifo), len);
+	len -= __kfifo_to_user_data(fifo, to, len, 0);
+	__kfifo_add_out(fifo, len);
 	return len;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
+unsigned int __kfifo_to_user_generic(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned int recsize,
+	unsigned int *total)
+{
+	return __kfifo_to_user_rec(fifo, to, len, recsize, total);
+}
+EXPORT_SYMBOL(__kfifo_to_user_generic);
+
+unsigned int __kfifo_peek_generic(struct kfifo *fifo, unsigned int recsize)
+{
+	if (recsize == 0)
+		return kfifo_avail(fifo);
+
+	return __kfifo_peek_n(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_peek_generic);
+
+void __kfifo_skip_generic(struct kfifo *fifo, unsigned int recsize)
+{
+	__kfifo_skip_rec(fifo, recsize);
+}
+EXPORT_SYMBOL(__kfifo_skip_generic);
+
-- 
cgit v1.2.3


From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 23 Dec 2009 06:52:08 -0500
Subject: ext4, jbd2: Add barriers for file systems with exernal journals

This is a bit complicated because we are trying to optimize when we
send barriers to the fs data disk.  We could just throw in an extra
barrier to the data disk whenever we send a barrier to the journal
disk, but that's not always strictly necessary.

We only need to send a barrier during a commit when there are data
blocks which are must be written out due to an inode written in
ordered mode, or if fsync() depends on the commit to force data blocks
to disk.  Finally, before we drop transactions from the beginning of
the journal during a checkpoint operation, we need to guarantee that
any blocks that were flushed out to the data disk are firmly on the
rust platter before we drop the transaction from the journal.

Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/fsync.c      | 16 ++++++++++++++--
 fs/jbd2/checkpoint.c | 15 +++++++++++++++
 fs/jbd2/commit.c     | 19 +++++++++++--------
 include/linux/jbd2.h |  1 +
 4 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
 		return ext4_force_commit(inode->i_sb);
 
 	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
-	if (jbd2_log_start_commit(journal, commit_tid))
+	if (jbd2_log_start_commit(journal, commit_tid)) {
+		/*
+		 * When the journal is on a different device than the
+		 * fs data disk, we need to issue the barrier in
+		 * writeback mode.  (In ordered mode, the jbd2 layer
+		 * will take care of issuing the barrier.  In
+		 * data=journal, all of the data blocks are written to
+		 * the journal device.)
+		 */
+		if (ext4_should_writeback_data(inode) &&
+		    (journal->j_fs_dev != journal->j_dev) &&
+		    (journal->j_flags & JBD2_BARRIER))
+			blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 		jbd2_log_wait_commit(journal, commit_tid);
-	else if (journal->j_flags & JBD2_BARRIER)
+	} else if (journal->j_flags & JBD2_BARRIER)
 		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 	return ret;
 }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
 #include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 #include <trace/events/jbd2.h>
 
 /*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
+
+	/*
+	 * If there is an external journal, we need to make sure that
+	 * any data blocks that were recently written out --- perhaps
+	 * by jbd2_log_do_checkpoint() --- are flushed out before we
+	 * drop the transactions from the external journal.  It's
+	 * unlikely this will be necessary, especially with a
+	 * appropriately sized journal, but we need this to guarantee
+	 * correctness.  Fortunately jbd2_cleanup_journal_tail()
+	 * doesn't get called all that often.
+	 */
+	if ((journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 	if (!(journal->j_flags & JBD2_ABORT))
 		jbd2_journal_update_superblock(journal, 1);
 	return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 			ret = err;
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
+		commit_transaction->t_flushed_data_blocks = 1;
 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -708,8 +709,17 @@ start_journal_io:
 		}
 	}
 
-	/* Done it all: now write the commit record asynchronously. */
+	/* 
+	 * If the journal is not located on the file system device,
+	 * then we must flush the file system device before we issue
+	 * the commit record
+	 */
+	if (commit_transaction->t_flushed_data_blocks &&
+	    (journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 
+	/* Done it all: now write the commit record asynchronously. */
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
 				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
 		err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
 			blkdev_issue_flush(journal->j_dev, NULL);
 	}
 
-	/*
-	 * This is the right place to wait for data buffers both for ASYNC
-	 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
-	 * the commit block went to disk (which happens above). If commit is
-	 * SYNC, we need to wait for data buffers before we start writing
-	 * commit block, which happens below in such setting.
-	 */
 	err = journal_finish_inode_data_buffers(journal, commit_transaction);
 	if (err) {
 		printk(KERN_WARNING
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f1011f7f3d41..638ce4554c76 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -653,6 +653,7 @@ struct transaction_s
 	 * waiting for it to finish.
 	 */
 	unsigned int t_synchronous_commit:1;
+	unsigned int t_flushed_data_blocks:1;
 
 	/*
 	 * For use by the filesystem to store fs-specific data
-- 
cgit v1.2.3


From c459001fa4f71deafb62e00fa70d35f695498965 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Wed, 9 Dec 2009 03:05:30 +0300
Subject: ext3: quota macros cleanup [V2]

Currently all quota block reservation macros contains hardcoded "2"
aka MAXQUOTAS value. This is no good because in some places it is not
obvious to understand what does this digit represent. Let's introduce
new macro with self descriptive name.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c          | 8 ++++----
 fs/ext3/namei.c          | 8 ++++----
 include/linux/ext3_jbd.h | 7 +++++--
 3 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad14227f509e..455e6e6e5cb9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -970,7 +970,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
 		if (max_blocks > DIO_MAX_BLOCKS)
 			max_blocks = DIO_MAX_BLOCKS;
 		handle = ext3_journal_start(inode, DIO_CREDITS +
-				2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb));
+				EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
 			goto out;
@@ -3146,8 +3146,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
 		/* (user+group)*(old+new) structure, inode write (sb,
 		 * inode block, ? - but truncate inode update has it) */
-		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
-					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+		handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+					EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
@@ -3239,7 +3239,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
 	/* We know that structure was already allocated during vfs_dq_init so
 	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+	ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 #endif
 
 	return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index aad6400c9b77..81f7348b2de3 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1699,7 +1699,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1733,7 +1733,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1769,7 +1769,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2175,7 +2175,7 @@ static int ext3_symlink (struct inode * dir,
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+					EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index cf82d519be40..d7b5ddca99c2 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -44,13 +44,13 @@
 
 #define EXT3_DATA_TRANS_BLOCKS(sb)	(EXT3_SINGLEDATA_TRANS_BLOCKS + \
 					 EXT3_XATTR_TRANS_BLOCKS - 2 + \
-					 2*EXT3_QUOTA_TRANS_BLOCKS(sb))
+					 EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
 
 /* Delete operations potentially hit one directory's namespace plus an
  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
  * generous.  We can grow the delete transaction later if necessary. */
 
-#define EXT3_DELETE_TRANS_BLOCKS(sb)	(2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64)
+#define EXT3_DELETE_TRANS_BLOCKS(sb)   (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
 
 /* Define an arbitrary limit for the amount of data we will anticipate
  * writing to any given transaction.  For unbounded transactions such as
@@ -86,6 +86,9 @@
 #define EXT3_QUOTA_INIT_BLOCKS(sb) 0
 #define EXT3_QUOTA_DEL_BLOCKS(sb) 0
 #endif
+#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
 
 int
 ext3_mark_iloc_dirty(handle_t *handle,
-- 
cgit v1.2.3


From b462707e7ccad058ae151e5c5b06eb5cadcb737f Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 14 Dec 2009 15:21:12 +0300
Subject: Add unlocked version of inode_add_bytes() function

Quota code requires unlocked version of this function. Off course
we can just copy-paste the code, but copy-pasting is always an evil.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/stat.c          | 10 ++++++++--
 include/linux/fs.h |  1 +
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/stat.c b/fs/stat.c
index 075694e31d8b..c4ecd52c5737 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 */
 
-void inode_add_bytes(struct inode *inode, loff_t bytes)
+/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
+void __inode_add_bytes(struct inode *inode, loff_t bytes)
 {
-	spin_lock(&inode->i_lock);
 	inode->i_blocks += bytes >> 9;
 	bytes &= 511;
 	inode->i_bytes += bytes;
@@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
 		inode->i_blocks++;
 		inode->i_bytes -= 512;
 	}
+}
+
+void inode_add_bytes(struct inode *inode, loff_t bytes)
+{
+	spin_lock(&inode->i_lock);
+	__inode_add_bytes(inode, bytes);
 	spin_unlock(&inode->i_lock);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e3012e0ac06..9147ca88f253 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2297,6 +2297,7 @@ extern const struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
 extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
 loff_t inode_get_bytes(struct inode *inode);
-- 
cgit v1.2.3


From fd8fbfc1709822bd94247c5b2ab15a5f5041e103 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 14 Dec 2009 15:21:13 +0300
Subject: quota: decouple fs reserved space from quota reservation

Currently inode_reservation is managed by fs itself and this
reservation is transfered on dquot_transfer(). This means what
inode_reservation must always be in sync with
dquot->dq_dqb.dqb_rsvspace. Otherwise dquot_transfer() will result
in incorrect quota(WARN_ON in dquot_claim_reserved_space() will be
triggered)
This is not easy because of complex locking order issues
for example http://bugzilla.kernel.org/show_bug.cgi?id=14739

The patch introduce quota reservation field for each fs-inode
(fs specific inode is used in order to prevent bloating generic
vfs inode). This reservation is managed by quota code internally
similar to i_blocks/i_bytes and may not be always in sync with
internal fs reservation.

Also perform some code rearrangement:
- Unify dquot_reserve_space() and dquot_reserve_space()
- Unify dquot_release_reserved_space() and dquot_free_space()
- Also this patch add missing warning update to release_rsv()
  dquot_release_reserved_space() must call flush_warnings() as
  dquot_free_space() does.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 213 ++++++++++++++++++++++++++++----------------------
 include/linux/quota.h |   5 +-
 2 files changed, 122 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd6bb9a33c13..1cb8fa84300f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1318,6 +1318,67 @@ void vfs_dq_drop(struct inode *inode)
 }
 EXPORT_SYMBOL(vfs_dq_drop);
 
+/*
+ * inode_reserved_space is managed internally by quota, and protected by
+ * i_lock similar to i_blocks+i_bytes.
+ */
+static qsize_t *inode_reserved_space(struct inode * inode)
+{
+	/* Filesystem must explicitly define it's own method in order to use
+	 * quota reservation interface */
+	BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
+	return inode->i_sb->dq_op->get_reserved_space(inode);
+}
+
+static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) += number;
+	spin_unlock(&inode->i_lock);
+}
+
+
+static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) -= number;
+	__inode_add_bytes(inode, number);
+	spin_unlock(&inode->i_lock);
+}
+
+static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+{
+	spin_lock(&inode->i_lock);
+	*inode_reserved_space(inode) -= number;
+	spin_unlock(&inode->i_lock);
+}
+
+static qsize_t inode_get_rsv_space(struct inode *inode)
+{
+	qsize_t ret;
+	spin_lock(&inode->i_lock);
+	ret = *inode_reserved_space(inode);
+	spin_unlock(&inode->i_lock);
+	return ret;
+}
+
+static void inode_incr_space(struct inode *inode, qsize_t number,
+				int reserve)
+{
+	if (reserve)
+		inode_add_rsv_space(inode, number);
+	else
+		inode_add_bytes(inode, number);
+}
+
+static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
+{
+	if (reserve)
+		inode_sub_rsv_space(inode, number);
+	else
+		inode_sub_bytes(inode, number);
+}
+
 /*
  * Following four functions update i_blocks+i_bytes fields and
  * quota information (together with appropriate checks)
@@ -1336,6 +1397,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 	int cnt, ret = QUOTA_OK;
 	char warntype[MAXQUOTAS];
 
+	/*
+	 * First test before acquiring mutex - solves deadlocks when we
+	 * re-enter the quota code and are already holding the mutex
+	 */
+	if (IS_NOQUOTA(inode)) {
+		inode_incr_space(inode, number, reserve);
+		goto out;
+	}
+
+	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	if (IS_NOQUOTA(inode)) {
+		inode_incr_space(inode, number, reserve);
+		goto out_unlock;
+	}
+
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 
@@ -1346,7 +1422,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
 		    == NO_QUOTA) {
 			ret = NO_QUOTA;
-			goto out_unlock;
+			spin_unlock(&dq_data_lock);
+			goto out_flush_warn;
 		}
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1357,64 +1434,32 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		else
 			dquot_incr_space(inode->i_dquot[cnt], number);
 	}
-	if (!reserve)
-		inode_add_bytes(inode, number);
-out_unlock:
+	inode_incr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
-	flush_warnings(inode->i_dquot, warntype);
-	return ret;
-}
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
-	int cnt, ret = QUOTA_OK;
-
-	/*
-	 * First test before acquiring mutex - solves deadlocks when we
-	 * re-enter the quota code and are already holding the mutex
-	 */
-	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
-		goto out;
-	}
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
-		goto out_unlock;
-	}
-
-	ret = __dquot_alloc_space(inode, number, warn, 0);
-	if (ret == NO_QUOTA)
-		goto out_unlock;
 
+	if (reserve)
+		goto out_flush_warn;
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+out_flush_warn:
+	flush_warnings(inode->i_dquot, warntype);
 out_unlock:
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
 	return ret;
 }
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+{
+	return __dquot_alloc_space(inode, number, warn, 0);
+}
 EXPORT_SYMBOL(dquot_alloc_space);
 
 int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
 {
-	int ret = QUOTA_OK;
-
-	if (IS_NOQUOTA(inode))
-		goto out;
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode))
-		goto out_unlock;
-
-	ret = __dquot_alloc_space(inode, number, warn, 1);
-out_unlock:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return ret;
+	return __dquot_alloc_space(inode, number, warn, 1);
 }
 EXPORT_SYMBOL(dquot_reserve_space);
 
@@ -1471,14 +1516,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 	int ret = QUOTA_OK;
 
 	if (IS_NOQUOTA(inode)) {
-		inode_add_bytes(inode, number);
+		inode_claim_rsv_space(inode, number);
 		goto out;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode))	{
 		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		inode_add_bytes(inode, number);
+		inode_claim_rsv_space(inode, number);
 		goto out;
 	}
 
@@ -1490,7 +1535,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 							number);
 	}
 	/* Update inode bytes */
-	inode_add_bytes(inode, number);
+	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
@@ -1502,39 +1547,10 @@ out:
 }
 EXPORT_SYMBOL(dquot_claim_space);
 
-/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
-	int cnt;
-
-	if (IS_NOQUOTA(inode))
-		goto out;
-
-	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode))
-		goto out_unlock;
-
-	spin_lock(&dq_data_lock);
-	/* Release reserved dquots */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_free_reserved_space(inode->i_dquot[cnt], number);
-	}
-	spin_unlock(&dq_data_lock);
-
-out_unlock:
-	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return;
-}
-EXPORT_SYMBOL(dquot_release_reserved_space);
-
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_space(struct inode *inode, qsize_t number)
+int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
@@ -1543,7 +1559,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode)) {
 out_sub:
-		inode_sub_bytes(inode, number);
+		inode_decr_space(inode, number, reserve);
 		return QUOTA_OK;
 	}
 
@@ -1558,20 +1574,42 @@ out_sub:
 		if (!inode->i_dquot[cnt])
 			continue;
 		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
-		dquot_decr_space(inode->i_dquot[cnt], number);
+		if (reserve)
+			dquot_free_reserved_space(inode->i_dquot[cnt], number);
+		else
+			dquot_decr_space(inode->i_dquot[cnt], number);
 	}
-	inode_sub_bytes(inode, number);
+	inode_decr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
+
+	if (reserve)
+		goto out_unlock;
 	/* Dirtify all the dquots - this can block when journalling */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (inode->i_dquot[cnt])
 			mark_dquot_dirty(inode->i_dquot[cnt]);
+out_unlock:
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
+
+int dquot_free_space(struct inode *inode, qsize_t number)
+{
+	return  __dquot_free_space(inode, number, 0);
+}
 EXPORT_SYMBOL(dquot_free_space);
 
+/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+	__dquot_free_space(inode, number, 1);
+
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
 /*
  * This operation can block, but only after everything is updated
  */
@@ -1609,19 +1647,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 }
 EXPORT_SYMBOL(dquot_free_inode);
 
-/*
- * call back function, get reserved quota space from underlying fs
- */
-qsize_t dquot_get_reserved_space(struct inode *inode)
-{
-	qsize_t reserved_space = 0;
-
-	if (sb_any_quota_active(inode->i_sb) &&
-	    inode->i_sb->dq_op->get_reserved_space)
-		reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
-	return reserved_space;
-}
-
 /*
  * Transfer the number of inode and blocks from one diskquota to an other.
  *
@@ -1665,7 +1690,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	}
 	spin_lock(&dq_data_lock);
 	cur_space = inode_get_bytes(inode);
-	rsv_space = dquot_get_reserved_space(inode);
+	rsv_space = inode_get_rsv_space(inode);
 	space = cur_space + rsv_space;
 	/* Build the transfer_from list and check the limits */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e70e62194243..a6861f117480 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -315,8 +315,9 @@ struct dquot_operations {
 	int (*claim_space) (struct inode *, qsize_t);
 	/* release rsved quota for delayed alloc */
 	void (*release_rsv) (struct inode *, qsize_t);
-	/* get reserved quota for delayed alloc */
-	qsize_t (*get_reserved_space) (struct inode *);
+	/* get reserved quota for delayed alloc, value returned is managed by
+	 * quota code only */
+	qsize_t *(*get_reserved_space) (struct inode *);
 };
 
 /* Operations handling requests from userspace */
-- 
cgit v1.2.3


From b8a052d01669977f224255b0f9f2737018171ddb Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 14 Dec 2009 13:00:30 -0600
Subject: ext3: Replace lock/unlock_super() with an explicit lock for the
 orphan list

Use a separate lock to protect the orphan list, so we can stop
overloading the use of lock_super().

Port of ext4 commit 3b9d4ed26680771295d904a6b83e88e620780893
by Theodore Ts'o <tytso@mit.edu>.

CC: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/namei.c            | 20 +++++++++++---------
 fs/ext3/super.c            |  1 +
 include/linux/ext3_fs_sb.h |  1 +
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 81f7348b2de3..7b0e44f7d66f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1920,7 +1920,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	struct ext3_iloc iloc;
 	int err = 0, rc;
 
-	lock_super(sb);
+	mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
 	if (!list_empty(&EXT3_I(inode)->i_orphan))
 		goto out_unlock;
 
@@ -1929,9 +1929,13 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 
 	/* @@@ FIXME: Observation from aviro:
 	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
-	 * here (on lock_super()), so race with ext3_link() which might bump
+	 * here (on s_orphan_lock), so race with ext3_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
+	 *
+	 * tytso, 4/25/2009: I'm not sure how that could happen;
+	 * shouldn't the fs core protect us from these sort of
+	 * unlink()/link() races?
 	 */
 	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1968,7 +1972,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
-	unlock_super(sb);
+	mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
 	ext3_std_error(inode->i_sb, err);
 	return err;
 }
@@ -1986,11 +1990,9 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 	struct ext3_iloc iloc;
 	int err = 0;
 
-	lock_super(inode->i_sb);
-	if (list_empty(&ei->i_orphan)) {
-		unlock_super(inode->i_sb);
-		return 0;
-	}
+	mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
+	if (list_empty(&ei->i_orphan))
+		goto out;
 
 	ino_next = NEXT_ORPHAN(inode);
 	prev = ei->i_orphan.prev;
@@ -2040,7 +2042,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 out_err:
 	ext3_std_error(inode->i_sb, err);
 out:
-	unlock_super(inode->i_sb);
+	mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
 	return err;
 
 out_brelse:
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 806b8b780add..97dd3828384c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1928,6 +1928,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sb->dq_op = &ext3_quota_operations;
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+	mutex_init(&sbi->s_orphan_lock);
 
 	sb->s_root = NULL;
 
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index f07f34de2f0e..dd61d83026a0 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -72,6 +72,7 @@ struct ext3_sb_info {
 	struct inode * s_journal_inode;
 	struct journal_s * s_journal;
 	struct list_head s_orphan;
+	struct mutex s_orphan_lock;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
 #ifdef CONFIG_JBD_DEBUG
-- 
cgit v1.2.3


From 96d2a495c25d525873529b736cdb63ad502b101c Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 14 Dec 2009 13:01:05 -0600
Subject: ext3: Replace lock/unlock_super() with an explicit lock for resizing

Use a separate lock to protect s_groups_count and the other block
group descriptors which get changed via an on-line resize operation,
so we can stop overloading the use of lock_super().

Port of ext4 commit 32ed5058ce90024efcd811254b4b1de0468099df by
Theodore Ts'o <tytso@mit.edu>.

CC: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/resize.c           | 35 ++++++++++++++++++-----------------
 fs/ext3/super.c            |  1 +
 include/linux/ext3_fs_sb.h |  1 +
 3 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5f83b6179178..54351ac7cef9 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -209,7 +209,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		err = -EBUSY;
 		goto exit_journal;
@@ -324,7 +324,7 @@ exit_bh:
 	brelse(bh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext3_journal_stop(handle)) && !err)
 		err = err2;
 
@@ -662,11 +662,12 @@ exit_free:
  * important part is that the new block and inode counts are in the backup
  * superblocks, and the location of the new group metadata in the GDT backups.
  *
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted.  We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time.  The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted.  We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time.  The resize
+ * which changed s_groups_count will backup again.
  */
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
@@ -825,7 +826,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
 		ext3_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	/*
 	 * OK, now we've set up the new group.  Time to make it active.
 	 *
-	 * Current kernels don't lock all allocations via lock_super(),
+	 * We do not lock all allocations via s_resize_lock
 	 * so we have to be safe wrt. concurrent accesses the group
 	 * data.  So we need to be careful to set all of the relevant
 	 * group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 *
 	 * The precise rules we use are:
 	 *
-	 * * Writers of s_groups_count *must* hold lock_super
+	 * * Writers of s_groups_count *must* hold s_resize_lock
 	 * AND
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 *   data and before modifying the groups count
 	 *
-	 * * Readers must hold lock_super() over the access
+	 * * Readers must hold s_resize_lock over the access
 	 * OR
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 *   and before reading any dependent data.
@@ -936,7 +937,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
 
 exit_journal:
-	unlock_super(sb);
+	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext3_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
@@ -973,7 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
-	 * taking lock_super() below. */
+	 * taking the s_resize_lock below. */
 	o_blocks_count = le32_to_cpu(es->s_blocks_count);
 	o_groups_count = EXT3_SB(sb)->s_groups_count;
 
@@ -1045,11 +1046,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 		goto exit_put;
 	}
 
-	lock_super(sb);
+	mutex_lock(&EXT3_SB(sb)->s_resize_lock);
 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
 		ext3_warning(sb, __func__,
 			     "multiple resizers run on filesystem!");
-		unlock_super(sb);
+		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 		ext3_journal_stop(handle);
 		err = -EBUSY;
 		goto exit_put;
@@ -1059,13 +1060,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 						 EXT3_SB(sb)->s_sbh))) {
 		ext3_warning(sb, __func__,
 			     "error %d on journal write access", err);
-		unlock_super(sb);
+		mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 		ext3_journal_stop(handle);
 		goto exit_put;
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
 	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	unlock_super(sb);
+	mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
 	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
 	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 97dd3828384c..afa2b569da10 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1929,6 +1929,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
+	mutex_init(&sbi->s_resize_lock);
 
 	sb->s_root = NULL;
 
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index dd61d83026a0..258088ab3c6b 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -73,6 +73,7 @@ struct ext3_sb_info {
 	struct journal_s * s_journal;
 	struct list_head s_orphan;
 	struct mutex s_orphan_lock;
+	struct mutex s_resize_lock;
 	unsigned long s_commit_interval;
 	struct block_device *journal_bdev;
 #ifdef CONFIG_JBD_DEBUG
-- 
cgit v1.2.3


From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 23 Dec 2009 07:57:07 -0500
Subject: fs-writeback: Add helper function to start writeback if idle

ext4, at least, would like to start pushing on writeback if it starts
to get close to ENOSPC when reserving worst-case blocks for delalloc
writes.  Writing out delalloc data will convert those worst-case
predictions into usually smaller actual usage, freeing up space
before we hit ENOSPC based on this speculation.

Thanks to Jens for the suggestion for the helper function,
& the naming help.

I've made the helper return status on whether writeback was
started even though I don't plan to use it in the ext4 patch;
it seems like it would be potentially useful to test this
in some cases.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c         | 17 +++++++++++++++++
 include/linux/writeback.h |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..f6c2155e0026 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1186,6 +1186,23 @@ void writeback_inodes_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
+/**
+ * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * @sb: the superblock
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_if_idle(struct super_block *sb)
+{
+	if (!writeback_in_progress(sb->s_bdi)) {
+		writeback_inodes_sb(sb);
+		return 1;
+	} else
+		return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+
 /**
  * sync_inodes_sb	-	sync sb inode pages
  * @sb: the superblock
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c18c008f4bbf..76e8903cd204 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -70,6 +70,7 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
+int writeback_inodes_sb_if_idle(struct super_block *);
 void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wbc(struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-- 
cgit v1.2.3


From 119eecc831a42bd090543568932e440c6831f1bb Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Wed, 23 Dec 2009 09:10:48 +0100
Subject: Fix usb_serial_probe() problem introduced by the recent kfifo changes

The USB serial code was a new user of the kfifo API, and it was missed
when porting things to the new kfifo API.

Please make the write_fifo in place.  Here is my patch to fix the
regression and full ported version.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Reported-and-tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Greg KH <greg@kroah.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/serial/generic.c    | 12 ++++++------
 drivers/usb/serial/usb-serial.c |  5 ++---
 include/linux/usb/serial.h      |  3 ++-
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index b0f1183755c9..f1ea3a33b6e6 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -276,7 +276,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	if (port->write_urb_busy)
 		start_io = false;
 	else {
-		start_io = (kfifo_len(port->write_fifo) != 0);
+		start_io = (kfifo_len(&port->write_fifo) != 0);
 		port->write_urb_busy = start_io;
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -285,7 +285,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 		return 0;
 
 	data = port->write_urb->transfer_buffer;
-	count = kfifo_out_locked(port->write_fifo, data, port->bulk_out_size, &port->lock);
+	count = kfifo_out_locked(&port->write_fifo, data, port->bulk_out_size, &port->lock);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
 
 	/* set up our urb */
@@ -345,7 +345,7 @@ int usb_serial_generic_write(struct tty_struct *tty,
 		return usb_serial_multi_urb_write(tty, port,
 						  buf, count);
 
-	count = kfifo_in_locked(port->write_fifo, buf, count, &port->lock);
+	count = kfifo_in_locked(&port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
 
 	if (result >= 0)
@@ -370,7 +370,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
 	} else if (serial->num_bulk_out)
-		room = port->write_fifo->size - kfifo_len(port->write_fifo);
+		room = kfifo_avail(&port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
@@ -391,7 +391,7 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 		chars = port->tx_bytes_flight;
 		spin_unlock_irqrestore(&port->lock, flags);
 	} else if (serial->num_bulk_out)
-		chars = kfifo_len(port->write_fifo);
+		chars = kfifo_len(&port->write_fifo);
 
 	dbg("%s - returns %d", __func__, chars);
 	return chars;
@@ -507,7 +507,7 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 		if (status) {
 			dbg("%s - nonzero multi-urb write bulk status "
 				"received: %d", __func__, status);
-			kfifo_reset(port->write_fifo);
+			kfifo_reset_out(&port->write_fifo);
 		} else
 			usb_serial_generic_write_start(port);
 	}
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 636a4f23445e..33c85f7084f8 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -595,8 +595,7 @@ static void port_release(struct device *dev)
 	usb_free_urb(port->write_urb);
 	usb_free_urb(port->interrupt_in_urb);
 	usb_free_urb(port->interrupt_out_urb);
-	if (!IS_ERR(port->write_fifo) && port->write_fifo)
-		kfifo_free(port->write_fifo);
+	kfifo_free(&port->write_fifo);
 	kfree(port->bulk_in_buffer);
 	kfree(port->bulk_out_buffer);
 	kfree(port->interrupt_in_buffer);
@@ -939,7 +938,7 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		if (kfifo_alloc(port->write_fifo, PAGE_SIZE, GFP_KERNEL))
+		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index acf6e457c04b..1819396ed501 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -16,6 +16,7 @@
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/sysrq.h>
+#include <linux/kfifo.h>
 
 #define SERIAL_TTY_MAJOR	188	/* Nice legal number now */
 #define SERIAL_TTY_MINORS	254	/* loads of devices :) */
@@ -94,7 +95,7 @@ struct usb_serial_port {
 	unsigned char		*bulk_out_buffer;
 	int			bulk_out_size;
 	struct urb		*write_urb;
-	struct kfifo		*write_fifo;
+	struct kfifo		write_fifo;
 	int			write_urb_busy;
 	__u8			bulk_out_endpointAddress;
 
-- 
cgit v1.2.3


From 9c717de946ed7f5782e6dffacf2d05859073058c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 23 Dec 2009 09:23:33 -0800
Subject: kfifo: fix Error/broken kernel-doc notation

Fix kernel-doc errors and warnings in new header file kfifo.h.
Don't use kernel-doc "/**" for internal functions whose comments
are not in kernel-doc format.

kernel-doc section header names (like "Note:") must be unique
per function.  Looks like I need to document that.

  Error(include/linux/kfifo.h:76): duplicate section name 'Note'
  Warning(include/linux/kfifo.h:88): Excess function parameter 'size' description in 'INIT_KFIFO'
  Error(include/linux/kfifo.h:101): duplicate section name 'Note'
  Warning(include/linux/kfifo.h:257): No description found for parameter 'fifo'
    (many of this last type, from internal functions)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 486e8ad3bb50..3d44e9c65a8e 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -69,8 +69,8 @@ struct kfifo {
  * @name: name of the declared kfifo datatype
  * @size: size of the fifo buffer
  *
- * Note: the macro can be used inside struct or union declaration
- * Note: the macro creates two objects:
+ * Note1: the macro can be used inside struct or union declaration
+ * Note2: the macro creates two objects:
  *  A kfifo object with the given name and a buffer for the kfifo
  *  object named name##kfifo_buffer
  */
@@ -83,7 +83,6 @@ union { \
 /**
  * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
  */
 #define INIT_KFIFO(name) \
 	name = __kfifo_initializer(sizeof(name##kfifo_buffer) - \
@@ -94,8 +93,8 @@ union { \
  * @name: name of the declared kfifo datatype
  * @size: size of the fifo buffer
  *
- * Note: the macro can be used for global and local kfifo data type variables
- * Note: the macro creates two objects:
+ * Note1: the macro can be used for global and local kfifo data type variables
+ * Note2: the macro creates two objects:
  *  A kfifo object with the given name and a buffer for the kfifo
  *  object named name##kfifo_buffer
  */
@@ -249,7 +248,7 @@ extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
 extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
 	void __user *to, unsigned int n);
 
-/**
+/*
  * __kfifo_add_out internal helper function for updating the out offset
  */
 static inline void __kfifo_add_out(struct kfifo *fifo,
@@ -259,7 +258,7 @@ static inline void __kfifo_add_out(struct kfifo *fifo,
 	fifo->out += off;
 }
 
-/**
+/*
  * __kfifo_add_in internal helper function for updating the in offset
  */
 static inline void __kfifo_add_in(struct kfifo *fifo,
@@ -269,7 +268,7 @@ static inline void __kfifo_add_in(struct kfifo *fifo,
 	fifo->in += off;
 }
 
-/**
+/*
  * __kfifo_off internal helper function for calculating the index of a
  * given offeset
  */
@@ -278,7 +277,7 @@ static inline unsigned int __kfifo_off(struct kfifo *fifo, unsigned int off)
 	return off & (fifo->size - 1);
 }
 
-/**
+/*
  * __kfifo_peek_n internal helper function for determinate the length of
  * the next record in the fifo
  */
@@ -299,7 +298,7 @@ static inline unsigned int __kfifo_peek_n(struct kfifo *fifo,
 #undef	__KFIFO_GET
 }
 
-/**
+/*
  * __kfifo_poke_n internal helper function for storing the length of
  * the next record into the fifo
  */
@@ -319,7 +318,7 @@ static inline void __kfifo_poke_n(struct kfifo *fifo,
 #undef	__KFIFO_PUT
 }
 
-/**
+/*
  * __kfifo_in_... internal functions for put date into the fifo
  * do not call it directly, use kfifo_in_rec() instead
  */
@@ -367,7 +366,7 @@ static inline __must_check unsigned int kfifo_in_rec(struct kfifo *fifo,
 	return __kfifo_in_rec(fifo, from, n, recsize);
 }
 
-/**
+/*
  * __kfifo_out_... internal functions for get date from the fifo
  * do not call it directly, use kfifo_out_rec() instead
  */
@@ -425,7 +424,7 @@ static inline __must_check unsigned int kfifo_out_rec(struct kfifo *fifo,
 	return __kfifo_out_rec(fifo, to, n, recsize, total);
 }
 
-/**
+/*
  * __kfifo_from_user_... internal functions for transfer from user space into
  * the fifo. do not call it directly, use kfifo_from_user_rec() instead
  */
@@ -474,7 +473,7 @@ static inline __must_check unsigned int kfifo_from_user_rec(struct kfifo *fifo,
 	return __kfifo_from_user_rec(fifo, from, n, recsize);
 }
 
-/**
+/*
  * __kfifo_to_user_... internal functions for transfer fifo data into user space
  * do not call it directly, use kfifo_to_user_rec() instead
  */
@@ -533,7 +532,7 @@ static inline __must_check unsigned int kfifo_to_user_rec(struct kfifo *fifo,
 	return __kfifo_to_user_rec(fifo, to, n, recsize, total);
 }
 
-/**
+/*
  * __kfifo_peek_... internal functions for peek into the next fifo record
  * do not call it directly, use kfifo_peek_rec() instead
  */
@@ -557,7 +556,7 @@ static inline __must_check unsigned int kfifo_peek_rec(struct kfifo *fifo,
 	return __kfifo_peek_n(fifo, recsize);
 }
 
-/**
+/*
  * __kfifo_skip_... internal functions for skip the next fifo record
  * do not call it directly, use kfifo_skip_rec() instead
  */
-- 
cgit v1.2.3


From 26579ab70aa0e0ea434e6e100279d2f67c094431 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:19 +0200
Subject: Driver core: device_attribute parameters can often be const*

Most device_attributes are const, and are begging to be
put in a ro section. However, the create and remove
file interfaces were failing to propagate the const promise
which the only functions they call offer.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/filesystems/sysfs.txt | 8 ++++----
 drivers/base/core.c                 | 6 ++++--
 include/linux/device.h              | 4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index b245d524d568..a4ac2b98c613 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -91,8 +91,8 @@ struct device_attribute {
 			 const char *buf, size_t count);
 };
 
-int device_create_file(struct device *, struct device_attribute *);
-void device_remove_file(struct device *, struct device_attribute *);
+int device_create_file(struct device *, const struct device_attribute *);
+void device_remove_file(struct device *, const struct device_attribute *);
 
 It also defines this helper for defining device attributes: 
 
@@ -316,8 +316,8 @@ DEVICE_ATTR(_name, _mode, _show, _store);
 
 Creation/Removal:
 
-int device_create_file(struct device *device, struct device_attribute * attr);
-void device_remove_file(struct device * dev, struct device_attribute * attr);
+int device_create_file(struct device *dev, const struct device_attribute * attr);
+void device_remove_file(struct device *dev, const struct device_attribute * attr);
 
 
 - bus drivers (include/linux/device.h)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index f1290cbd1350..2fd9e611f8a6 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -446,7 +446,8 @@ struct kset *devices_kset;
  * @dev: device.
  * @attr: device attribute descriptor.
  */
-int device_create_file(struct device *dev, struct device_attribute *attr)
+int device_create_file(struct device *dev,
+		       const struct device_attribute *attr)
 {
 	int error = 0;
 	if (dev)
@@ -459,7 +460,8 @@ int device_create_file(struct device *dev, struct device_attribute *attr)
  * @dev: device.
  * @attr: device attribute descriptor.
  */
-void device_remove_file(struct device *dev, struct device_attribute *attr)
+void device_remove_file(struct device *dev,
+			const struct device_attribute *attr)
 {
 	if (dev)
 		sysfs_remove_file(&dev->kobj, &attr->attr);
diff --git a/include/linux/device.h b/include/linux/device.h
index 2a73d9bcbc9c..aa5b3e66a147 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -319,9 +319,9 @@ struct device_attribute {
 struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
 
 extern int __must_check device_create_file(struct device *device,
-					   struct device_attribute *entry);
+					   const struct device_attribute *entry);
 extern void device_remove_file(struct device *dev,
-			       struct device_attribute *attr);
+			       const struct device_attribute *attr);
 extern int __must_check device_create_bin_file(struct device *dev,
 					       struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
-- 
cgit v1.2.3


From 66ecb92be9eb579df93add22d19843e7869f168e Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:20 +0200
Subject: Driver core: bin_attribute parameters can often be const*

Many struct bin_attribute descriptors are purely read-only
structures, and there's no need to change them. Therefore
make the promise not to, which will let those descriptors
be put in a ro section.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 6 ++++--
 fs/sysfs/bin.c         | 6 ++++--
 include/linux/device.h | 6 +++---
 include/linux/sysfs.h  | 9 +++++----
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2fd9e611f8a6..83afc8b8f27b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -472,7 +472,8 @@ void device_remove_file(struct device *dev,
  * @dev: device.
  * @attr: device binary attribute descriptor.
  */
-int device_create_bin_file(struct device *dev, struct bin_attribute *attr)
+int device_create_bin_file(struct device *dev,
+			   const struct bin_attribute *attr)
 {
 	int error = -EINVAL;
 	if (dev)
@@ -486,7 +487,8 @@ EXPORT_SYMBOL_GPL(device_create_bin_file);
  * @dev: device.
  * @attr: device binary attribute descriptor.
  */
-void device_remove_bin_file(struct device *dev, struct bin_attribute *attr)
+void device_remove_bin_file(struct device *dev,
+			    const struct bin_attribute *attr)
 {
 	if (dev)
 		sysfs_remove_bin_file(&dev->kobj, attr);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 60c702bc10ae..a0a500af24a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -483,7 +483,8 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd)
  *	@attr:	attribute descriptor.
  */
 
-int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+int sysfs_create_bin_file(struct kobject *kobj,
+			  const struct bin_attribute *attr)
 {
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
@@ -497,7 +498,8 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
  *	@attr:	attribute descriptor.
  */
 
-void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+void sysfs_remove_bin_file(struct kobject *kobj,
+			   const struct bin_attribute *attr)
 {
 	sysfs_hash_and_remove(kobj->sd, attr->attr.name);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index aa5b3e66a147..10d74ce93a46 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -319,13 +319,13 @@ struct device_attribute {
 struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
 
 extern int __must_check device_create_file(struct device *device,
-					   const struct device_attribute *entry);
+					const struct device_attribute *entry);
 extern void device_remove_file(struct device *dev,
 			       const struct device_attribute *attr);
 extern int __must_check device_create_bin_file(struct device *dev,
-					       struct bin_attribute *attr);
+					const struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
-				   struct bin_attribute *attr);
+				   const struct bin_attribute *attr);
 extern int device_schedule_callback_owner(struct device *dev,
 		void (*func)(struct device *dev), struct module *owner);
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 9d68fed50f11..cfa83083a2d4 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -99,8 +99,9 @@ int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr,
 void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);
 
 int __must_check sysfs_create_bin_file(struct kobject *kobj,
-				       struct bin_attribute *attr);
-void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
+				       const struct bin_attribute *attr);
+void sysfs_remove_bin_file(struct kobject *kobj,
+			   const struct bin_attribute *attr);
 
 int __must_check sysfs_create_link(struct kobject *kobj, struct kobject *target,
 				   const char *name);
@@ -175,13 +176,13 @@ static inline void sysfs_remove_file(struct kobject *kobj,
 }
 
 static inline int sysfs_create_bin_file(struct kobject *kobj,
-					struct bin_attribute *attr)
+					const struct bin_attribute *attr)
 {
 	return 0;
 }
 
 static inline void sysfs_remove_bin_file(struct kobject *kobj,
-					 struct bin_attribute *attr)
+					 const struct bin_attribute *attr)
 {
 }
 
-- 
cgit v1.2.3


From 099c2f21d8cf0724b85abb2c589d6276953781b7 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Fri, 18 Dec 2009 15:34:21 +0200
Subject: Driver core: driver_attribute parameters can often be const*

Many struct driver_attribute descriptors are purely read-only
structures, and there's no need to change them. Therefore make
the promise not to, which will let those descriptors be put in
a ro section.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/driver-model/driver.txt | 4 ++--
 Documentation/filesystems/sysfs.txt   | 4 ++--
 drivers/base/driver.c                 | 4 ++--
 include/linux/device.h                | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/driver.txt b/Documentation/driver-model/driver.txt
index 60120fb3b961..d2cd6fb8ba9e 100644
--- a/Documentation/driver-model/driver.txt
+++ b/Documentation/driver-model/driver.txt
@@ -226,5 +226,5 @@ struct driver_attribute driver_attr_debug;
 This can then be used to add and remove the attribute from the
 driver's directory using:
 
-int driver_create_file(struct device_driver *, struct driver_attribute *);
-void driver_remove_file(struct device_driver *, struct driver_attribute *);
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index a4ac2b98c613..931c806642c5 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -358,7 +358,7 @@ DRIVER_ATTR(_name, _mode, _show, _store)
 
 Creation/Removal:
 
-int driver_create_file(struct device_driver *, struct driver_attribute *);
-void driver_remove_file(struct device_driver *, struct driver_attribute *);
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
 
 
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index f367885a7646..90c9fff09ead 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(driver_find_device);
  * @attr: driver attribute descriptor.
  */
 int driver_create_file(struct device_driver *drv,
-		       struct driver_attribute *attr)
+		       const struct driver_attribute *attr)
 {
 	int error;
 	if (drv)
@@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(driver_create_file);
  * @attr: driver attribute descriptor.
  */
 void driver_remove_file(struct device_driver *drv,
-			struct driver_attribute *attr)
+			const struct driver_attribute *attr)
 {
 	if (drv)
 		sysfs_remove_file(&drv->p->kobj, &attr->attr);
diff --git a/include/linux/device.h b/include/linux/device.h
index 10d74ce93a46..a62799f2ab00 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -166,9 +166,9 @@ struct driver_attribute driver_attr_##_name =		\
 	__ATTR(_name, _mode, _show, _store)
 
 extern int __must_check driver_create_file(struct device_driver *driver,
-					   struct driver_attribute *attr);
+					const struct driver_attribute *attr);
 extern void driver_remove_file(struct device_driver *driver,
-			       struct driver_attribute *attr);
+			       const struct driver_attribute *attr);
 
 extern int __must_check driver_add_kobj(struct device_driver *drv,
 					struct kobject *kobj,
-- 
cgit v1.2.3


From 29d249ed80c80b479df78e456e6809223c648505 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 18 Dec 2009 09:59:48 -0800
Subject: Staging: dst: remove from the tree

DST is dead, no one is using it and upstream
has abandoned it, so remove it from the tree because
it is not going anywhere.

Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/Kconfig           |   2 -
 drivers/staging/Makefile          |   1 -
 drivers/staging/dst/Kconfig       |  67 ---
 drivers/staging/dst/Makefile      |   3 -
 drivers/staging/dst/crypto.c      | 733 -----------------------------
 drivers/staging/dst/dcore.c       | 968 --------------------------------------
 drivers/staging/dst/export.c      | 660 --------------------------
 drivers/staging/dst/state.c       | 844 ---------------------------------
 drivers/staging/dst/thread_pool.c | 348 --------------
 drivers/staging/dst/trans.c       | 337 -------------
 include/linux/dst.h               | 587 -----------------------
 11 files changed, 4550 deletions(-)
 delete mode 100644 drivers/staging/dst/Kconfig
 delete mode 100644 drivers/staging/dst/Makefile
 delete mode 100644 drivers/staging/dst/crypto.c
 delete mode 100644 drivers/staging/dst/dcore.c
 delete mode 100644 drivers/staging/dst/export.c
 delete mode 100644 drivers/staging/dst/state.c
 delete mode 100644 drivers/staging/dst/thread_pool.c
 delete mode 100644 drivers/staging/dst/trans.c
 delete mode 100644 include/linux/dst.h

(limited to 'include/linux')

diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index db0de940949e..94eb86319ff3 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -87,8 +87,6 @@ source "drivers/staging/frontier/Kconfig"
 
 source "drivers/staging/dream/Kconfig"
 
-source "drivers/staging/dst/Kconfig"
-
 source "drivers/staging/pohmelfs/Kconfig"
 
 source "drivers/staging/b3dfg/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 73c6a71155e0..b5e67b889f60 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_RTL8192E)		+= rtl8192e/
 obj-$(CONFIG_INPUT_MIMIO)	+= mimio/
 obj-$(CONFIG_TRANZPORT)		+= frontier/
 obj-$(CONFIG_DREAM)		+= dream/
-obj-$(CONFIG_DST)		+= dst/
 obj-$(CONFIG_POHMELFS)		+= pohmelfs/
 obj-$(CONFIG_B3DFG)		+= b3dfg/
 obj-$(CONFIG_IDE_PHISON)	+= phison/
diff --git a/drivers/staging/dst/Kconfig b/drivers/staging/dst/Kconfig
deleted file mode 100644
index 448d342ac2a2..000000000000
--- a/drivers/staging/dst/Kconfig
+++ /dev/null
@@ -1,67 +0,0 @@
-config DST
-	tristate "Distributed storage"
-	depends on NET && CRYPTO && SYSFS && BLK_DEV
-	select CONNECTOR
-	---help---
-	DST is a network block device storage, which can be used to organize
-	exported storage on the remote nodes into the local block device.
-
-	DST works on top of any network media and protocol; it is just a matter
-	of configuration utility to understand the correct addresses. The most
-	common example is TCP over IP, which allows to pass through firewalls and
-	create remote backup storage in a different datacenter. DST requires
-	single port to be enabled on the exporting node and outgoing connections
-	on the local node.
-
-	DST works with in-kernel client and server, which improves performance by
-	eliminating unneded data copies and by not depending on the version
-	of the external IO components. It requires userspace configuration utility
-	though.
-
-	DST uses transaction model, when each store has to be explicitly acked
-	from the remote node to be considered as successfully written. There
-	may be lots of in-flight transactions. When remote host does not ack
-	the transaction it will be resent predefined number of times with specified
-	timeouts between them. All those parameters are configurable. Transactions
-	are marked as failed after all resends complete unsuccessfully; having
-	long enough resend timeout and/or large number of resends allows not to
-	return error to the higher (FS usually) layer in case of short network
-	problems or remote node outages. In case of network RAID setup this means
-	that storage will not degrade until transactions are marked as failed, and
-	thus will not force checksum recalculation and data rebuild. In case of
-	connection failure DST will try to reconnect to the remote node automatically.
-	DST sends ping commands at idle time to detect if remote node is alive.
-
-	Because of transactional model it is possible to use zero-copy sending
-	without worry of data corruption (which in turn could be detected by the
-	strong checksums though).
-
-	DST may fully encrypt the data channel in case of untrusted channel and implement
-	strong checksum of the transferred data. It is possible to configure algorithms
-	and crypto keys; they should match on both sides of the network channel.
-	Crypto processing does not introduce noticeble performance overhead, since DST
-	uses configurable pool of threads to perform crypto processing.
-
-	DST utilizes memory pool model of all its transaction allocations (it is the
-	only additional allocation on the client) and server allocations (bio pools,
-	while pages are allocated from the slab).
-
-	At startup DST performs a simple negotiation with the export node to determine
-	access permissions and size of the exported storage. It can be extended if
-	new parameters should be autonegotiated.
-
-	DST carries block IO flags in the protocol, which allows to transparently implement
-	barriers and sync/flush operations. Those flags are used in the export node where
-	IO against the local storage is performed, which means that sync write will be sync
-	on the remote node too, which in turn improves data integrity and improved resistance
-	to errors and data corruption during power outages or storage damages.
-
-	Homepage: http://www.ioremap.net/projects/dst
-	Userspace configuration utility and the latest releases: http://www.ioremap.net/archive/dst/
-
-config DST_DEBUG
-	bool "DST debug"
-	depends on DST
-	---help---
-	This option will enable HEAVY debugging of the DST.
-	Turn it on ONLY if you have to debug some really obscure problem.
diff --git a/drivers/staging/dst/Makefile b/drivers/staging/dst/Makefile
deleted file mode 100644
index 3a8b0cf9643e..000000000000
--- a/drivers/staging/dst/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_DST) += nst.o
-
-nst-y := dcore.o state.o export.o thread_pool.o crypto.o trans.o
diff --git a/drivers/staging/dst/crypto.c b/drivers/staging/dst/crypto.c
deleted file mode 100644
index 351295c97a4b..000000000000
--- a/drivers/staging/dst/crypto.c
+++ /dev/null
@@ -1,733 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bio.h>
-#include <linux/crypto.h>
-#include <linux/dst.h>
-#include <linux/kernel.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-
-/*
- * Tricky bastard, but IV can be more complex with time...
- */
-static inline u64 dst_gen_iv(struct dst_trans *t)
-{
-	return t->gen;
-}
-
-/*
- * Crypto machinery: hash/cipher support for the given crypto controls.
- */
-static struct crypto_hash *dst_init_hash(struct dst_crypto_ctl *ctl, u8 *key)
-{
-	int err;
-	struct crypto_hash *hash;
-
-	hash = crypto_alloc_hash(ctl->hash_algo, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(hash)) {
-		err = PTR_ERR(hash);
-		dprintk("%s: failed to allocate hash '%s', err: %d.\n",
-				__func__, ctl->hash_algo, err);
-		goto err_out_exit;
-	}
-
-	ctl->crypto_attached_size = crypto_hash_digestsize(hash);
-
-	if (!ctl->hash_keysize)
-		return hash;
-
-	err = crypto_hash_setkey(hash, key, ctl->hash_keysize);
-	if (err) {
-		dprintk("%s: failed to set key for hash '%s', err: %d.\n",
-				__func__, ctl->hash_algo, err);
-		goto err_out_free;
-	}
-
-	return hash;
-
-err_out_free:
-	crypto_free_hash(hash);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-static struct crypto_ablkcipher *dst_init_cipher(struct dst_crypto_ctl *ctl,
-		u8 *key)
-{
-	int err = -EINVAL;
-	struct crypto_ablkcipher *cipher;
-
-	if (!ctl->cipher_keysize)
-		goto err_out_exit;
-
-	cipher = crypto_alloc_ablkcipher(ctl->cipher_algo, 0, 0);
-	if (IS_ERR(cipher)) {
-		err = PTR_ERR(cipher);
-		dprintk("%s: failed to allocate cipher '%s', err: %d.\n",
-				__func__, ctl->cipher_algo, err);
-		goto err_out_exit;
-	}
-
-	crypto_ablkcipher_clear_flags(cipher, ~0);
-
-	err = crypto_ablkcipher_setkey(cipher, key, ctl->cipher_keysize);
-	if (err) {
-		dprintk("%s: failed to set key for cipher '%s', err: %d.\n",
-				__func__, ctl->cipher_algo, err);
-		goto err_out_free;
-	}
-
-	return cipher;
-
-err_out_free:
-	crypto_free_ablkcipher(cipher);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Crypto engine has a pool of pages to encrypt data into before sending
- * it over the network. This pool is freed/allocated here.
- */
-static void dst_crypto_pages_free(struct dst_crypto_engine *e)
-{
-	unsigned int i;
-
-	for (i = 0; i < e->page_num; ++i)
-		__free_page(e->pages[i]);
-	kfree(e->pages);
-}
-
-static int dst_crypto_pages_alloc(struct dst_crypto_engine *e, int num)
-{
-	int i;
-
-	e->pages = kmalloc(num * sizeof(struct page **), GFP_KERNEL);
-	if (!e->pages)
-		return -ENOMEM;
-
-	for (i = 0; i < num; ++i) {
-		e->pages[i] = alloc_page(GFP_KERNEL);
-		if (!e->pages[i])
-			goto err_out_free_pages;
-	}
-
-	e->page_num = num;
-	return 0;
-
-err_out_free_pages:
-	while (--i >= 0)
-		__free_page(e->pages[i]);
-
-	kfree(e->pages);
-	return -ENOMEM;
-}
-
-/*
- * Initialize crypto engine for given node.
- * Setup cipher/hash, keys, pool of threads and private data.
- */
-static int dst_crypto_engine_init(struct dst_crypto_engine *e,
-		struct dst_node *n)
-{
-	int err;
-	struct dst_crypto_ctl *ctl = &n->crypto;
-
-	err = dst_crypto_pages_alloc(e, n->max_pages);
-	if (err)
-		goto err_out_exit;
-
-	e->size = PAGE_SIZE;
-	e->data = kmalloc(e->size, GFP_KERNEL);
-	if (!e->data) {
-		err = -ENOMEM;
-		goto err_out_free_pages;
-	}
-
-	if (ctl->hash_algo[0]) {
-		e->hash = dst_init_hash(ctl, n->hash_key);
-		if (IS_ERR(e->hash)) {
-			err = PTR_ERR(e->hash);
-			e->hash = NULL;
-			goto err_out_free;
-		}
-	}
-
-	if (ctl->cipher_algo[0]) {
-		e->cipher = dst_init_cipher(ctl, n->cipher_key);
-		if (IS_ERR(e->cipher)) {
-			err = PTR_ERR(e->cipher);
-			e->cipher = NULL;
-			goto err_out_free_hash;
-		}
-	}
-
-	return 0;
-
-err_out_free_hash:
-	crypto_free_hash(e->hash);
-err_out_free:
-	kfree(e->data);
-err_out_free_pages:
-	dst_crypto_pages_free(e);
-err_out_exit:
-	return err;
-}
-
-static void dst_crypto_engine_exit(struct dst_crypto_engine *e)
-{
-	if (e->hash)
-		crypto_free_hash(e->hash);
-	if (e->cipher)
-		crypto_free_ablkcipher(e->cipher);
-	dst_crypto_pages_free(e);
-	kfree(e->data);
-}
-
-/*
- * Waiting for cipher processing to be completed.
- */
-struct dst_crypto_completion {
-	struct completion		complete;
-	int				error;
-};
-
-static void dst_crypto_complete(struct crypto_async_request *req, int err)
-{
-	struct dst_crypto_completion *c = req->data;
-
-	if (err == -EINPROGRESS)
-		return;
-
-	dprintk("%s: req: %p, err: %d.\n", __func__, req, err);
-	c->error = err;
-	complete(&c->complete);
-}
-
-static int dst_crypto_process(struct ablkcipher_request *req,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src,
-		void *iv, int enc, unsigned long timeout)
-{
-	struct dst_crypto_completion c;
-	int err;
-
-	init_completion(&c.complete);
-	c.error = -EINPROGRESS;
-
-	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-					dst_crypto_complete, &c);
-
-	ablkcipher_request_set_crypt(req, sg_src, sg_dst, sg_src->length, iv);
-
-	if (enc)
-		err = crypto_ablkcipher_encrypt(req);
-	else
-		err = crypto_ablkcipher_decrypt(req);
-
-	switch (err) {
-	case -EINPROGRESS:
-	case -EBUSY:
-		err = wait_for_completion_interruptible_timeout(&c.complete,
-				timeout);
-		if (!err)
-			err = -ETIMEDOUT;
-		else
-			err = c.error;
-		break;
-	default:
-		break;
-	}
-
-	return err;
-}
-
-/*
- * DST uses generic iteration approach for data crypto processing.
- * Single block IO request is switched into array of scatterlists,
- * which are submitted to the crypto processing iterator.
- *
- * Input and output iterator initialization are different, since
- * in output case we can not encrypt data in-place and need a
- * temporary storage, which is then being sent to the remote peer.
- */
-static int dst_trans_iter_out(struct bio *bio, struct dst_crypto_engine *e,
-		int (*iterator) (struct dst_crypto_engine *e,
-				  struct scatterlist *dst,
-				  struct scatterlist *src))
-{
-	struct bio_vec *bv;
-	int err, i;
-
-	sg_init_table(e->src, bio->bi_vcnt);
-	sg_init_table(e->dst, bio->bi_vcnt);
-
-	bio_for_each_segment(bv, bio, i) {
-		sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-		sg_set_page(&e->dst[i], e->pages[i], bv->bv_len, bv->bv_offset);
-
-		err = iterator(e, &e->dst[i], &e->src[i]);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dst_trans_iter_in(struct bio *bio, struct dst_crypto_engine *e,
-		int (*iterator) (struct dst_crypto_engine *e,
-				  struct scatterlist *dst,
-				  struct scatterlist *src))
-{
-	struct bio_vec *bv;
-	int err, i;
-
-	sg_init_table(e->src, bio->bi_vcnt);
-	sg_init_table(e->dst, bio->bi_vcnt);
-
-	bio_for_each_segment(bv, bio, i) {
-		sg_set_page(&e->src[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-		sg_set_page(&e->dst[i], bv->bv_page, bv->bv_len, bv->bv_offset);
-
-		err = iterator(e, &e->dst[i], &e->src[i]);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dst_crypt_iterator(struct dst_crypto_engine *e,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src)
-{
-	struct ablkcipher_request *req = e->data;
-	u8 iv[32];
-
-	memset(iv, 0, sizeof(iv));
-
-	memcpy(iv, &e->iv, sizeof(e->iv));
-
-	return dst_crypto_process(req, sg_dst, sg_src, iv, e->enc, e->timeout);
-}
-
-static int dst_crypt(struct dst_crypto_engine *e, struct bio *bio)
-{
-	struct ablkcipher_request *req = e->data;
-
-	memset(req, 0, sizeof(struct ablkcipher_request));
-	ablkcipher_request_set_tfm(req, e->cipher);
-
-	if (e->enc)
-		return dst_trans_iter_out(bio, e, dst_crypt_iterator);
-	else
-		return dst_trans_iter_in(bio, e, dst_crypt_iterator);
-}
-
-static int dst_hash_iterator(struct dst_crypto_engine *e,
-		struct scatterlist *sg_dst, struct scatterlist *sg_src)
-{
-	return crypto_hash_update(e->data, sg_src, sg_src->length);
-}
-
-static int dst_hash(struct dst_crypto_engine *e, struct bio *bio, void *dst)
-{
-	struct hash_desc *desc = e->data;
-	int err;
-
-	desc->tfm = e->hash;
-	desc->flags = 0;
-
-	err = crypto_hash_init(desc);
-	if (err)
-		return err;
-
-	err = dst_trans_iter_in(bio, e, dst_hash_iterator);
-	if (err)
-		return err;
-
-	err = crypto_hash_final(desc, dst);
-	if (err)
-		return err;
-
-	return 0;
-}
-
-/*
- * Initialize/cleanup a crypto thread. The only thing it should
- * do is to allocate a pool of pages as temporary storage.
- * And to setup cipher and/or hash.
- */
-static void *dst_crypto_thread_init(void *data)
-{
-	struct dst_node *n = data;
-	struct dst_crypto_engine *e;
-	int err = -ENOMEM;
-
-	e = kzalloc(sizeof(struct dst_crypto_engine), GFP_KERNEL);
-	if (!e)
-		goto err_out_exit;
-	e->src = kcalloc(2 * n->max_pages, sizeof(struct scatterlist),
-			GFP_KERNEL);
-	if (!e->src)
-		goto err_out_free;
-
-	e->dst = e->src + n->max_pages;
-
-	err = dst_crypto_engine_init(e, n);
-	if (err)
-		goto err_out_free_all;
-
-	return e;
-
-err_out_free_all:
-	kfree(e->src);
-err_out_free:
-	kfree(e);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-static void dst_crypto_thread_cleanup(void *private)
-{
-	struct dst_crypto_engine *e = private;
-
-	dst_crypto_engine_exit(e);
-	kfree(e->src);
-	kfree(e);
-}
-
-/*
- * Initialize crypto engine for given node: store keys, create pool
- * of threads, initialize each one.
- *
- * Each thread has unique ID, but 0 and 1 are reserved for receiving and
- * accepting threads (if export node), so IDs could start from 2, but starting
- * them from 10 allows easily understand what this thread is for.
- */
-int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl)
-{
-	void *key = (ctl + 1);
-	int err = -ENOMEM, i;
-	char name[32];
-
-	if (ctl->hash_keysize) {
-		n->hash_key = kmalloc(ctl->hash_keysize, GFP_KERNEL);
-		if (!n->hash_key)
-			goto err_out_exit;
-		memcpy(n->hash_key, key, ctl->hash_keysize);
-	}
-
-	if (ctl->cipher_keysize) {
-		n->cipher_key = kmalloc(ctl->cipher_keysize, GFP_KERNEL);
-		if (!n->cipher_key)
-			goto err_out_free_hash;
-		memcpy(n->cipher_key, key, ctl->cipher_keysize);
-	}
-	memcpy(&n->crypto, ctl, sizeof(struct dst_crypto_ctl));
-
-	for (i = 0; i < ctl->thread_num; ++i) {
-		snprintf(name, sizeof(name), "%s-crypto-%d", n->name, i);
-		/* Unique ids... */
-		err = thread_pool_add_worker(n->pool, name, i + 10,
-			dst_crypto_thread_init, dst_crypto_thread_cleanup, n);
-		if (err)
-			goto err_out_free_threads;
-	}
-
-	return 0;
-
-err_out_free_threads:
-	while (--i >= 0)
-		thread_pool_del_worker_id(n->pool, i+10);
-
-	if (ctl->cipher_keysize)
-		kfree(n->cipher_key);
-	ctl->cipher_keysize = 0;
-err_out_free_hash:
-	if (ctl->hash_keysize)
-		kfree(n->hash_key);
-	ctl->hash_keysize = 0;
-err_out_exit:
-	return err;
-}
-
-void dst_node_crypto_exit(struct dst_node *n)
-{
-	struct dst_crypto_ctl *ctl = &n->crypto;
-
-	if (ctl->cipher_algo[0] || ctl->hash_algo[0]) {
-		kfree(n->hash_key);
-		kfree(n->cipher_key);
-	}
-}
-
-/*
- * Thrad pool setup callback. Just stores a transaction in private data.
- */
-static int dst_trans_crypto_setup(void *crypto_engine, void *trans)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-
-	e->private = trans;
-	return 0;
-}
-
-#if 0
-static void dst_dump_bio(struct bio *bio)
-{
-	u8 *p;
-	struct bio_vec *bv;
-	int i;
-
-	bio_for_each_segment(bv, bio, i) {
-		dprintk("%s: %llu/%u: size: %u, offset: %u, data: ",
-				__func__, bio->bi_sector, bio->bi_size,
-				bv->bv_len, bv->bv_offset);
-
-		p = kmap(bv->bv_page) + bv->bv_offset;
-		for (i = 0; i < bv->bv_len; ++i)
-			printk(KERN_DEBUG "%02x ", p[i]);
-		kunmap(bv->bv_page);
-		printk("\n");
-	}
-}
-#endif
-
-/*
- * Encrypt/hash data and send it to the network.
- */
-static int dst_crypto_process_sending(struct dst_crypto_engine *e,
-		struct bio *bio, u8 *hash)
-{
-	int err;
-
-	if (e->cipher) {
-		err = dst_crypt(e, bio);
-		if (err)
-			goto err_out_exit;
-	}
-
-	if (e->hash) {
-		err = dst_hash(e, bio, hash);
-		if (err)
-			goto err_out_exit;
-
-#ifdef CONFIG_DST_DEBUG
-		{
-			unsigned int i;
-
-			/* dst_dump_bio(bio); */
-
-			printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash: ",
-				__func__, (u64)bio->bi_sector,
-				bio->bi_size, bio_data_dir(bio));
-			for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
-					printk("%02x ", hash[i]);
-			printk("\n");
-		}
-#endif
-	}
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Check if received data is valid. Decipher if it is.
- */
-static int dst_crypto_process_receiving(struct dst_crypto_engine *e,
-		struct bio *bio, u8 *hash, u8 *recv_hash)
-{
-	int err;
-
-	if (e->hash) {
-		int mismatch;
-
-		err = dst_hash(e, bio, hash);
-		if (err)
-			goto err_out_exit;
-
-		mismatch = !!memcmp(recv_hash, hash,
-				crypto_hash_digestsize(e->hash));
-#ifdef CONFIG_DST_DEBUG
-		/* dst_dump_bio(bio); */
-
-		printk(KERN_DEBUG "%s: bio: %llu/%u, rw: %lu, hash mismatch: %d",
-			__func__, (u64)bio->bi_sector, bio->bi_size,
-			bio_data_dir(bio), mismatch);
-		if (mismatch) {
-			unsigned int i;
-
-			printk(", recv/calc: ");
-			for (i = 0; i < crypto_hash_digestsize(e->hash); ++i)
-				printk("%02x/%02x ", recv_hash[i], hash[i]);
-
-		}
-		printk("\n");
-#endif
-		err = -1;
-		if (mismatch)
-			goto err_out_exit;
-	}
-
-	if (e->cipher) {
-		err = dst_crypt(e, bio);
-		if (err)
-			goto err_out_exit;
-	}
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Thread pool callback to encrypt data and send it to the netowork.
- */
-static int dst_trans_crypto_action(void *crypto_engine, void *schedule_data)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-	struct dst_trans *t = schedule_data;
-	struct bio *bio = t->bio;
-	int err;
-
-	dprintk("%s: t: %p, gen: %llu, cipher: %p, hash: %p.\n",
-			__func__, t, t->gen, e->cipher, e->hash);
-
-	e->enc = t->enc;
-	e->iv = dst_gen_iv(t);
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_crypto_process_sending(e, bio, t->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		if (e->hash) {
-			t->cmd.csize = crypto_hash_digestsize(e->hash);
-			t->cmd.size += t->cmd.csize;
-		}
-
-		return dst_trans_send(t);
-	} else {
-		u8 *hash = e->data + e->size/2;
-
-		err = dst_crypto_process_receiving(e, bio, hash, t->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		dst_trans_remove(t);
-		dst_trans_put(t);
-	}
-
-	return 0;
-
-err_out_exit:
-	t->error = err;
-	dst_trans_put(t);
-	return err;
-}
-
-/*
- * Schedule crypto processing for given transaction.
- */
-int dst_trans_crypto(struct dst_trans *t)
-{
-	struct dst_node *n = t->n;
-	int err;
-
-	err = thread_pool_schedule(n->pool,
-		dst_trans_crypto_setup, dst_trans_crypto_action,
-		t, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	dst_trans_put(t);
-	return err;
-}
-
-/*
- * Crypto machinery for the export node.
- */
-static int dst_export_crypto_setup(void *crypto_engine, void *bio)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-
-	e->private = bio;
-	return 0;
-}
-
-static int dst_export_crypto_action(void *crypto_engine, void *schedule_data)
-{
-	struct dst_crypto_engine *e = crypto_engine;
-	struct bio *bio = schedule_data;
-	struct dst_export_priv *p = bio->bi_private;
-	int err;
-
-	dprintk("%s: e: %p, data: %p, bio: %llu/%u, dir: %lu.\n",
-			__func__, e, e->data, (u64)bio->bi_sector,
-			bio->bi_size, bio_data_dir(bio));
-
-	e->enc = (bio_data_dir(bio) == READ);
-	e->iv = p->cmd.id;
-
-	if (bio_data_dir(bio) == WRITE) {
-		u8 *hash = e->data + e->size/2;
-
-		err = dst_crypto_process_receiving(e, bio, hash, p->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		generic_make_request(bio);
-	} else {
-		err = dst_crypto_process_sending(e, bio, p->cmd.hash);
-		if (err)
-			goto err_out_exit;
-
-		if (e->hash) {
-			p->cmd.csize = crypto_hash_digestsize(e->hash);
-			p->cmd.size += p->cmd.csize;
-		}
-
-		err = dst_export_send_bio(bio);
-	}
-	return 0;
-
-err_out_exit:
-	bio_put(bio);
-	return err;
-}
-
-int dst_export_crypto(struct dst_node *n, struct bio *bio)
-{
-	int err;
-
-	err = thread_pool_schedule(n->pool,
-		dst_export_crypto_setup, dst_export_crypto_action,
-		bio, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	bio_put(bio);
-	return err;
-}
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
deleted file mode 100644
index c83ca7e3d048..000000000000
--- a/drivers/staging/dst/dcore.c
+++ /dev/null
@@ -1,968 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/buffer_head.h>
-#include <linux/connector.h>
-#include <linux/dst.h>
-#include <linux/device.h>
-#include <linux/jhash.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-
-#include <linux/in.h>
-#include <linux/in6.h>
-
-#include <net/sock.h>
-
-static int dst_major;
-
-static DEFINE_MUTEX(dst_hash_lock);
-static struct list_head *dst_hashtable;
-static unsigned int dst_hashtable_size = 128;
-module_param(dst_hashtable_size, uint, 0644);
-
-static char dst_name[] = "Dementianting goldfish";
-
-static DEFINE_IDR(dst_index_idr);
-static struct cb_id cn_dst_id = { CN_DST_IDX, CN_DST_VAL };
-
-/*
- * DST sysfs tree for device called 'storage':
- *
- * /sys/bus/dst/devices/storage/
- * /sys/bus/dst/devices/storage/type : 192.168.4.80:1025
- * /sys/bus/dst/devices/storage/size : 800
- * /sys/bus/dst/devices/storage/name : storage
- */
-
-static int dst_dev_match(struct device *dev, struct device_driver *drv)
-{
-	return 1;
-}
-
-static struct bus_type dst_dev_bus_type = {
-	.name 		= "dst",
-	.match 		= &dst_dev_match,
-};
-
-static void dst_node_release(struct device *dev)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	kfree(info);
-}
-
-static struct device dst_node_dev = {
-	.bus 		= &dst_dev_bus_type,
-	.release 	= &dst_node_release
-};
-
-/*
- * Setting size of the node after it was changed.
- */
-static void dst_node_set_size(struct dst_node *n)
-{
-	struct block_device *bdev;
-
-	set_capacity(n->disk, n->size >> 9);
-
-	bdev = bdget_disk(n->disk, 0);
-	if (bdev) {
-		mutex_lock(&bdev->bd_inode->i_mutex);
-		i_size_write(bdev->bd_inode, n->size);
-		mutex_unlock(&bdev->bd_inode->i_mutex);
-		bdput(bdev);
-	}
-}
-
-/*
- * Distributed storage request processing function.
- */
-static int dst_request(struct request_queue *q, struct bio *bio)
-{
-	struct dst_node *n = q->queuedata;
-	int err = -EIO;
-
-	if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
-		/*
-		 * This is a dirty^Wnice hack, but if we complete this
-		 * operation with -EOPNOTSUPP like intended, XFS
-		 * will stuck and freeze the machine. This may be
-		 * not particulary XFS problem though, but it is the
-		 * only FS which sends empty barrier at umount time
-		 * I worked with.
-		 *
-		 * Empty barriers are not allowed anyway, see 51fd77bd9f512
-		 * for example, although later it was changed to
-		 * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
-		 * work in this case.
-		 */
-		/* err = -EOPNOTSUPP; */
-		err = 0;
-		goto end_io;
-	}
-
-	bio_get(bio);
-
-	return dst_process_bio(n, bio);
-
-end_io:
-	bio_endio(bio, err);
-	return err;
-}
-
-/*
- * Open/close callbacks for appropriate block device.
- */
-static int dst_bdev_open(struct block_device *bdev, fmode_t mode)
-{
-	struct dst_node *n = bdev->bd_disk->private_data;
-
-	dst_node_get(n);
-	return 0;
-}
-
-static int dst_bdev_release(struct gendisk *disk, fmode_t mode)
-{
-	struct dst_node *n = disk->private_data;
-
-	dst_node_put(n);
-	return 0;
-}
-
-static struct block_device_operations dst_blk_ops = {
-	.open		= dst_bdev_open,
-	.release	= dst_bdev_release,
-	.owner		= THIS_MODULE,
-};
-
-/*
- * Block layer binding - disk is created when array is fully configured
- * by userspace request.
- */
-static int dst_node_create_disk(struct dst_node *n)
-{
-	int err = -ENOMEM;
-	u32 index = 0;
-
-	n->queue = blk_init_queue(NULL, NULL);
-	if (!n->queue)
-		goto err_out_exit;
-
-	n->queue->queuedata = n;
-	blk_queue_make_request(n->queue, dst_request);
-	blk_queue_max_phys_segments(n->queue, n->max_pages);
-	blk_queue_max_hw_segments(n->queue, n->max_pages);
-
-	err = -ENOMEM;
-	n->disk = alloc_disk(1);
-	if (!n->disk)
-		goto err_out_free_queue;
-
-	if (!(n->state->permissions & DST_PERM_WRITE)) {
-		printk(KERN_INFO "DST node %s attached read-only.\n", n->name);
-		set_disk_ro(n->disk, 1);
-	}
-
-	if (!idr_pre_get(&dst_index_idr, GFP_KERNEL))
-		goto err_out_put;
-
-	mutex_lock(&dst_hash_lock);
-	err = idr_get_new(&dst_index_idr, NULL, &index);
-	mutex_unlock(&dst_hash_lock);
-	if (err)
-		goto err_out_put;
-
-	n->disk->major = dst_major;
-	n->disk->first_minor = index;
-	n->disk->fops = &dst_blk_ops;
-	n->disk->queue = n->queue;
-	n->disk->private_data = n;
-	snprintf(n->disk->disk_name, sizeof(n->disk->disk_name),
-			"dst-%s", n->name);
-
-	return 0;
-
-err_out_put:
-	put_disk(n->disk);
-err_out_free_queue:
-	blk_cleanup_queue(n->queue);
-err_out_exit:
-	return err;
-}
-
-/*
- * Sysfs machinery: show device's size.
- */
-static ssize_t dst_show_size(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	return sprintf(buf, "%llu\n", info->size);
-}
-
-/*
- * Show local exported device.
- */
-static ssize_t dst_show_local(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-
-	return sprintf(buf, "%s\n", info->local);
-}
-
-/*
- * Shows type of the remote node - device major/minor number
- * for local nodes and address (af_inet ipv4/ipv6 only) for remote nodes.
- */
-static ssize_t dst_show_type(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dst_info *info = container_of(dev, struct dst_info, device);
-	int family = info->net.addr.sa_family;
-
-	if (family == AF_INET) {
-		struct sockaddr_in *sin = (struct sockaddr_in *)&info->net.addr;
-		return sprintf(buf, "%u.%u.%u.%u:%d\n",
-			NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
-	} else if (family == AF_INET6) {
-		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)
-				&info->net.addr;
-		return sprintf(buf,
-			"%pi6:%d\n",
-			&sin->sin6_addr, ntohs(sin->sin6_port));
-	} else {
-		int i, sz = PAGE_SIZE - 2; /* 0 symbol and '\n' below */
-		int size, addrlen = info->net.addr.sa_data_len;
-		unsigned char *a = (unsigned char *)&info->net.addr.sa_data;
-		char *buf_orig = buf;
-
-		size = snprintf(buf, sz, "family: %d, addrlen: %u, addr: ",
-				family, addrlen);
-		sz -= size;
-		buf += size;
-
-		for (i = 0; i < addrlen; ++i) {
-			if (sz < 3)
-				break;
-
-			size = snprintf(buf, sz, "%02x ", a[i]);
-			sz -= size;
-			buf += size;
-		}
-		buf += sprintf(buf, "\n");
-
-		return buf - buf_orig;
-	}
-	return 0;
-}
-
-static struct device_attribute dst_node_attrs[] = {
-	__ATTR(size, 0444, dst_show_size, NULL),
-	__ATTR(type, 0444, dst_show_type, NULL),
-	__ATTR(local, 0444, dst_show_local, NULL),
-};
-
-static int dst_create_node_attributes(struct dst_node *n)
-{
-	int err, i;
-
-	for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i) {
-		err = device_create_file(&n->info->device,
-				&dst_node_attrs[i]);
-		if (err)
-			goto err_out_remove_all;
-	}
-	return 0;
-
-err_out_remove_all:
-	while (--i >= 0)
-		device_remove_file(&n->info->device,
-				&dst_node_attrs[i]);
-
-	return err;
-}
-
-static void dst_remove_node_attributes(struct dst_node *n)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(dst_node_attrs); ++i)
-		device_remove_file(&n->info->device,
-				&dst_node_attrs[i]);
-}
-
-/*
- * Sysfs cleanup and initialization.
- * Shows number of useful parameters.
- */
-static void dst_node_sysfs_exit(struct dst_node *n)
-{
-	if (n->info) {
-		dst_remove_node_attributes(n);
-		device_unregister(&n->info->device);
-		n->info = NULL;
-	}
-}
-
-static int dst_node_sysfs_init(struct dst_node *n)
-{
-	int err;
-
-	n->info = kzalloc(sizeof(struct dst_info), GFP_KERNEL);
-	if (!n->info)
-		return -ENOMEM;
-
-	memcpy(&n->info->device, &dst_node_dev, sizeof(struct device));
-	n->info->size = n->size;
-
-	dev_set_name(&n->info->device, "dst-%s", n->name);
-	err = device_register(&n->info->device);
-	if (err) {
-		dprintk(KERN_ERR "Failed to register node '%s', err: %d.\n",
-				n->name, err);
-		goto err_out_exit;
-	}
-
-	dst_create_node_attributes(n);
-
-	return 0;
-
-err_out_exit:
-	kfree(n->info);
-	n->info = NULL;
-	return err;
-}
-
-/*
- * DST node hash tables machinery.
- */
-static inline unsigned int dst_hash(char *str, unsigned int size)
-{
-	return jhash(str, size, 0) % dst_hashtable_size;
-}
-
-static void dst_node_remove(struct dst_node *n)
-{
-	mutex_lock(&dst_hash_lock);
-	list_del_init(&n->node_entry);
-	mutex_unlock(&dst_hash_lock);
-}
-
-static void dst_node_add(struct dst_node *n)
-{
-	unsigned hash = dst_hash(n->name, sizeof(n->name));
-
-	mutex_lock(&dst_hash_lock);
-	list_add_tail(&n->node_entry, &dst_hashtable[hash]);
-	mutex_unlock(&dst_hash_lock);
-}
-
-/*
- * Cleaning node when it is about to be freed.
- * There are still users of the socket though,
- * so connection cleanup should be protected.
- */
-static void dst_node_cleanup(struct dst_node *n)
-{
-	struct dst_state *st = n->state;
-
-	if (!st)
-		return;
-
-	if (n->queue) {
-		blk_cleanup_queue(n->queue);
-
-		mutex_lock(&dst_hash_lock);
-		idr_remove(&dst_index_idr, n->disk->first_minor);
-		mutex_unlock(&dst_hash_lock);
-
-		put_disk(n->disk);
-	}
-
-	if (n->bdev) {
-		sync_blockdev(n->bdev);
-		close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
-	}
-
-	dst_state_lock(st);
-	st->need_exit = 1;
-	dst_state_exit_connected(st);
-	dst_state_unlock(st);
-
-	wake_up(&st->thread_wait);
-
-	dst_state_put(st);
-	n->state = NULL;
-}
-
-/*
- * Free security attributes attached to given node.
- */
-static void dst_security_exit(struct dst_node *n)
-{
-	struct dst_secure *s, *tmp;
-
-	list_for_each_entry_safe(s, tmp, &n->security_list, sec_entry) {
-		list_del(&s->sec_entry);
-		kfree(s);
-	}
-}
-
-/*
- * Free node when there are no more users.
- * Actually node has to be freed on behalf od userspace process,
- * since there are number of threads, which are embedded in the
- * node, so they can not exit and free node from there, that is
- * why there is a wakeup if reference counter is not equal to zero.
- */
-void dst_node_put(struct dst_node *n)
-{
-	if (unlikely(!n))
-		return;
-
-	dprintk("%s: n: %p, refcnt: %d.\n",
-			__func__, n, atomic_read(&n->refcnt));
-
-	if (atomic_dec_and_test(&n->refcnt)) {
-		dst_node_remove(n);
-		n->trans_scan_timeout = 0;
-		dst_node_cleanup(n);
-		thread_pool_destroy(n->pool);
-		dst_node_sysfs_exit(n);
-		dst_node_crypto_exit(n);
-		dst_security_exit(n);
-		dst_node_trans_exit(n);
-
-		kfree(n);
-
-		dprintk("%s: freed n: %p.\n", __func__, n);
-	} else {
-		wake_up(&n->wait);
-	}
-}
-
-/*
- * Setting up export device: lookup by the name, get its size
- * and setup listening socket, which will accept clients, which
- * will submit IO for given storage.
- */
-static int dst_setup_export(struct dst_node *n, struct dst_ctl *ctl,
-		struct dst_export_ctl *le)
-{
-	int err;
-
-	snprintf(n->info->local, sizeof(n->info->local), "%s", le->device);
-
-	n->bdev = open_bdev_exclusive(le->device, FMODE_READ|FMODE_WRITE, NULL);
-	if (IS_ERR(n->bdev))
-		return PTR_ERR(n->bdev);
-
-	if (n->size != 0)
-		n->size = min_t(loff_t, n->bdev->bd_inode->i_size, n->size);
-	else
-		n->size = n->bdev->bd_inode->i_size;
-
-	n->info->size = n->size;
-	err = dst_node_init_listened(n, le);
-	if (err)
-		goto err_out_cleanup;
-
-	return 0;
-
-err_out_cleanup:
-	close_bdev_exclusive(n->bdev, FMODE_READ|FMODE_WRITE);
-	n->bdev = NULL;
-
-	return err;
-}
-
-/* Empty thread pool callbacks for the network processing threads. */
-static inline void *dst_thread_network_init(void *data)
-{
-	dprintk("%s: data: %p.\n", __func__, data);
-	return data;
-}
-
-static inline void dst_thread_network_cleanup(void *data)
-{
-	dprintk("%s: data: %p.\n", __func__, data);
-}
-
-/*
- * Allocate DST node and initialize some of its parameters.
- */
-static struct dst_node *dst_alloc_node(struct dst_ctl *ctl,
-		int (*start)(struct dst_node *),
-		int num)
-{
-	struct dst_node *n;
-	int err;
-
-	n = kzalloc(sizeof(struct dst_node), GFP_KERNEL);
-	if (!n)
-		return NULL;
-
-	INIT_LIST_HEAD(&n->node_entry);
-
-	INIT_LIST_HEAD(&n->security_list);
-	mutex_init(&n->security_lock);
-
-	init_waitqueue_head(&n->wait);
-
-	n->trans_scan_timeout = msecs_to_jiffies(ctl->trans_scan_timeout);
-	if (!n->trans_scan_timeout)
-		n->trans_scan_timeout = HZ;
-
-	n->trans_max_retries = ctl->trans_max_retries;
-	if (!n->trans_max_retries)
-		n->trans_max_retries = 10;
-
-	/*
-	 * Pretty much arbitrary default numbers.
-	 * 32 matches maximum number of pages in bio originated from ext3 (31).
-	 */
-	n->max_pages = ctl->max_pages;
-	if (!n->max_pages)
-		n->max_pages = 32;
-
-	if (n->max_pages > 1024)
-		n->max_pages = 1024;
-
-	n->start = start;
-	n->size = ctl->size;
-
-	atomic_set(&n->refcnt, 1);
-	atomic_long_set(&n->gen, 0);
-	snprintf(n->name, sizeof(n->name), "%s", ctl->name);
-
-	err = dst_node_sysfs_init(n);
-	if (err)
-		goto err_out_free;
-
-	n->pool = thread_pool_create(num, n->name, dst_thread_network_init,
-			dst_thread_network_cleanup, n);
-	if (IS_ERR(n->pool)) {
-		err = PTR_ERR(n->pool);
-		goto err_out_sysfs_exit;
-	}
-
-	dprintk("%s: n: %p, name: %s.\n", __func__, n, n->name);
-
-	return n;
-
-err_out_sysfs_exit:
-	dst_node_sysfs_exit(n);
-err_out_free:
-	kfree(n);
-	return NULL;
-}
-
-/*
- * Starting a node, connected to the remote server:
- * register block device and initialize transaction mechanism.
- * In revers order though.
- *
- * It will autonegotiate some parameters with the remote node
- * and update local if needed.
- *
- * Transaction initialization should be the last thing before
- * starting the node, since transaction should include not only
- * block IO, but also crypto related data (if any), which are
- * initialized separately.
- */
-static int dst_start_remote(struct dst_node *n)
-{
-	int err;
-
-	err = dst_node_trans_init(n, sizeof(struct dst_trans));
-	if (err)
-		return err;
-
-	err = dst_node_create_disk(n);
-	if (err)
-		return err;
-
-	dst_node_set_size(n);
-	add_disk(n->disk);
-
-	dprintk("DST: started remote node '%s', minor: %d.\n",
-			n->name, n->disk->first_minor);
-
-	return 0;
-}
-
-/*
- * Adding remote node and initialize connection.
- */
-static int dst_add_remote(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-	struct dst_network_ctl *rctl = data;
-
-	if (n)
-		return -EEXIST;
-
-	if (size != sizeof(struct dst_network_ctl))
-		return -EINVAL;
-
-	n = dst_alloc_node(ctl, dst_start_remote, 1);
-	if (!n)
-		return -ENOMEM;
-
-	memcpy(&n->info->net, rctl, sizeof(struct dst_network_ctl));
-	err = dst_node_init_connected(n, rctl);
-	if (err)
-		goto err_out_free;
-
-	dst_node_add(n);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	return err;
-}
-
-/*
- * Adding export node: initializing block device and listening socket.
- */
-static int dst_add_export(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-	struct dst_export_ctl *le = data;
-
-	if (n)
-		return -EEXIST;
-
-	if (size != sizeof(struct dst_export_ctl))
-		return -EINVAL;
-
-	n = dst_alloc_node(ctl, dst_start_export, 2);
-	if (!n)
-		return -EINVAL;
-
-	err = dst_setup_export(n, ctl, le);
-	if (err)
-		goto err_out_free;
-
-	dst_node_add(n);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	return err;
-}
-
-static int dst_node_remove_unload(struct dst_node *n)
-{
-	printk(KERN_INFO "STOPPED name: '%s', size: %llu.\n",
-			n->name, n->size);
-
-	if (n->disk)
-		del_gendisk(n->disk);
-
-	dst_node_remove(n);
-	dst_node_sysfs_exit(n);
-
-	/*
-	 * This is not a hack. Really.
-	 * Node's reference counter allows to implement fine grained
-	 * node freeing, but since all transactions (which hold node's
-	 * reference counter) are processed in the dedicated thread,
-	 * it is possible that reference will hit zero in that thread,
-	 * so we will not be able to exit thread and cleanup the node.
-	 *
-	 * So, we remove disk, so no new activity is possible, and
-	 * wait until all pending transaction are completed (either
-	 * in receiving thread or by timeout in workqueue), in this
-	 * case reference counter will be less or equal to 2 (once set in
-	 * dst_alloc_node() and then in connector message parser;
-	 * or when we force module unloading, and connector message
-	 * parser does not hold a reference, in this case reference
-	 * counter will be equal to 1),
-	 * and subsequent dst_node_put() calls will free the node.
-	 */
-	dprintk("%s: going to sleep with %d refcnt.\n",
-			__func__, atomic_read(&n->refcnt));
-	wait_event(n->wait, atomic_read(&n->refcnt) <= 2);
-
-	dst_node_put(n);
-	return 0;
-}
-
-/*
- * Remove node from the hash table.
- */
-static int dst_del_node(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	if (!n)
-		return -ENODEV;
-
-	return dst_node_remove_unload(n);
-}
-
-/*
- * Initialize crypto processing for given node.
- */
-static int dst_crypto_init(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	struct dst_crypto_ctl *crypto = data;
-
-	if (!n)
-		return -ENODEV;
-
-	if (size != sizeof(struct dst_crypto_ctl) + crypto->hash_keysize +
-			crypto->cipher_keysize)
-		return -EINVAL;
-
-	if (n->trans_cache)
-		return -EEXIST;
-
-	return dst_node_crypto_init(n, crypto);
-}
-
-/*
- * Security attributes for given node.
- */
-static int dst_security_init(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	struct dst_secure *s;
-
-	if (!n)
-		return -ENODEV;
-
-	if (size != sizeof(struct dst_secure_user))
-		return -EINVAL;
-
-	s = kmalloc(sizeof(struct dst_secure), GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
-
-	memcpy(&s->sec, data, size);
-
-	mutex_lock(&n->security_lock);
-	list_add_tail(&s->sec_entry, &n->security_list);
-	mutex_unlock(&n->security_lock);
-
-	return 0;
-}
-
-/*
- * Kill'em all!
- */
-static int dst_start_node(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size)
-{
-	int err;
-
-	if (!n)
-		return -ENODEV;
-
-	if (n->trans_cache)
-		return 0;
-
-	err = n->start(n);
-	if (err)
-		return err;
-
-	printk(KERN_INFO "STARTED name: '%s', size: %llu.\n", n->name, n->size);
-	return 0;
-}
-
-typedef int (*dst_command_func)(struct dst_node *n, struct dst_ctl *ctl,
-		void *data, unsigned int size);
-
-/*
- * List of userspace commands.
- */
-static dst_command_func dst_commands[] = {
-	[DST_ADD_REMOTE] = &dst_add_remote,
-	[DST_ADD_EXPORT] = &dst_add_export,
-	[DST_DEL_NODE] = &dst_del_node,
-	[DST_CRYPTO] = &dst_crypto_init,
-	[DST_SECURITY] = &dst_security_init,
-	[DST_START] = &dst_start_node,
-};
-
-/*
- * Configuration parser.
- */
-static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct dst_ctl *ctl;
-	int err;
-	struct dst_ctl_ack ack;
-	struct dst_node *n = NULL, *tmp;
-	unsigned int hash;
-
-	if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto out;
-	}
-
-	if (msg->len < sizeof(struct dst_ctl)) {
-		err = -EBADMSG;
-		goto out;
-	}
-
-	ctl = (struct dst_ctl *)msg->data;
-
-	if (ctl->cmd >= DST_CMD_MAX) {
-		err = -EINVAL;
-		goto out;
-	}
-	hash = dst_hash(ctl->name, sizeof(ctl->name));
-
-	mutex_lock(&dst_hash_lock);
-	list_for_each_entry(tmp, &dst_hashtable[hash], node_entry) {
-		if (!memcmp(tmp->name, ctl->name, sizeof(tmp->name))) {
-			n = tmp;
-			dst_node_get(n);
-			break;
-		}
-	}
-	mutex_unlock(&dst_hash_lock);
-
-	err = dst_commands[ctl->cmd](n, ctl, msg->data + sizeof(struct dst_ctl),
-			msg->len - sizeof(struct dst_ctl));
-
-	dst_node_put(n);
-out:
-	memcpy(&ack.msg, msg, sizeof(struct cn_msg));
-
-	ack.msg.ack = msg->ack + 1;
-	ack.msg.len = sizeof(struct dst_ctl_ack) - sizeof(struct cn_msg);
-
-	ack.error = err;
-
-	cn_netlink_send(&ack.msg, 0, GFP_KERNEL);
-}
-
-/*
- * Global initialization: sysfs, hash table, block device registration,
- * connector and various caches.
- */
-static int __init dst_sysfs_init(void)
-{
-	return bus_register(&dst_dev_bus_type);
-}
-
-static void dst_sysfs_exit(void)
-{
-	bus_unregister(&dst_dev_bus_type);
-}
-
-static int __init dst_hashtable_init(void)
-{
-	unsigned int i;
-
-	dst_hashtable = kcalloc(dst_hashtable_size, sizeof(struct list_head),
-			GFP_KERNEL);
-	if (!dst_hashtable)
-		return -ENOMEM;
-
-	for (i = 0; i < dst_hashtable_size; ++i)
-		INIT_LIST_HEAD(&dst_hashtable[i]);
-
-	return 0;
-}
-
-static void dst_hashtable_exit(void)
-{
-	unsigned int i;
-	struct dst_node *n, *tmp;
-
-	for (i = 0; i < dst_hashtable_size; ++i) {
-		list_for_each_entry_safe(n, tmp, &dst_hashtable[i], node_entry) {
-			dst_node_remove_unload(n);
-		}
-	}
-
-	kfree(dst_hashtable);
-}
-
-static int __init dst_sys_init(void)
-{
-	int err = -ENOMEM;
-
-	err = dst_hashtable_init();
-	if (err)
-		goto err_out_exit;
-
-	err = dst_export_init();
-	if (err)
-		goto err_out_hashtable_exit;
-
-	err = register_blkdev(dst_major, DST_NAME);
-	if (err < 0)
-		goto err_out_export_exit;
-	if (err)
-		dst_major = err;
-
-	err = dst_sysfs_init();
-	if (err)
-		goto err_out_unregister;
-
-	err = cn_add_callback(&cn_dst_id, "DST", cn_dst_callback);
-	if (err)
-		goto err_out_sysfs_exit;
-
-	printk(KERN_INFO "Distributed storage, '%s' release.\n", dst_name);
-
-	return 0;
-
-err_out_sysfs_exit:
-	dst_sysfs_exit();
-err_out_unregister:
-	unregister_blkdev(dst_major, DST_NAME);
-err_out_export_exit:
-	dst_export_exit();
-err_out_hashtable_exit:
-	dst_hashtable_exit();
-err_out_exit:
-	return err;
-}
-
-static void __exit dst_sys_exit(void)
-{
-	cn_del_callback(&cn_dst_id);
-	unregister_blkdev(dst_major, DST_NAME);
-	dst_hashtable_exit();
-	dst_sysfs_exit();
-	dst_export_exit();
-}
-
-module_init(dst_sys_init);
-module_exit(dst_sys_exit);
-
-MODULE_DESCRIPTION("Distributed storage");
-MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/dst/export.c b/drivers/staging/dst/export.c
deleted file mode 100644
index c324230e8b60..000000000000
--- a/drivers/staging/dst/export.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/dst.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-
-#include <net/sock.h>
-
-/*
- * Export bioset is used for server block IO requests.
- */
-static struct bio_set *dst_bio_set;
-
-int __init dst_export_init(void)
-{
-	int err = -ENOMEM;
-
-	dst_bio_set = bioset_create(32, sizeof(struct dst_export_priv));
-	if (!dst_bio_set)
-		goto err_out_exit;
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-void dst_export_exit(void)
-{
-	bioset_free(dst_bio_set);
-}
-
-/*
- * When client connects and autonegotiates with the server node,
- * its permissions are checked in a security attributes and sent
- * back.
- */
-static unsigned int dst_check_permissions(struct dst_state *main,
-		struct dst_state *st)
-{
-	struct dst_node *n = main->node;
-	struct dst_secure *sentry;
-	struct dst_secure_user *s;
-	struct saddr *sa = &st->ctl.addr;
-	unsigned int perm = 0;
-
-	mutex_lock(&n->security_lock);
-	list_for_each_entry(sentry, &n->security_list, sec_entry) {
-		s = &sentry->sec;
-
-		if (s->addr.sa_family != sa->sa_family)
-			continue;
-
-		if (s->addr.sa_data_len != sa->sa_data_len)
-			continue;
-
-		/*
-		 * This '2' below is a port field. This may be very wrong to do
-		 * in atalk for example though. If there will be any need
-		 * to extent protocol to something else, I can create
-		 * per-family helpers and use them instead of this memcmp.
-		 */
-		if (memcmp(s->addr.sa_data + 2, sa->sa_data + 2,
-					sa->sa_data_len - 2))
-			continue;
-
-		perm = s->permissions;
-	}
-	mutex_unlock(&n->security_lock);
-
-	return perm;
-}
-
-/*
- * Accept new client: allocate appropriate network state and check permissions.
- */
-static struct dst_state *dst_accept_client(struct dst_state *st)
-{
-	unsigned int revents = 0;
-	unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
-	unsigned int mask = err_mask | POLLIN;
-	struct dst_node *n = st->node;
-	int err = 0;
-	struct socket *sock = NULL;
-	struct dst_state *new;
-
-	while (!err && !sock) {
-		revents = dst_state_poll(st);
-
-		if (!(revents & mask)) {
-			DEFINE_WAIT(wait);
-
-			for (;;) {
-				prepare_to_wait(&st->thread_wait,
-						&wait, TASK_INTERRUPTIBLE);
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				revents = dst_state_poll(st);
-
-				if (revents & mask)
-					break;
-
-				if (signal_pending(current))
-					break;
-
-				/*
-				 * Magic HZ? Polling check above is not safe in
-				 * all cases (like socket reset in BH context),
-				 * so it is simpler just to postpone it to the
-				 * process context instead of implementing
-				 * special locking there.
-				 */
-				schedule_timeout(HZ);
-			}
-			finish_wait(&st->thread_wait, &wait);
-		}
-
-		err = -ECONNRESET;
-		dst_state_lock(st);
-
-		dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n",
-			__func__, st, revents, revents & err_mask,
-			revents & POLLIN);
-
-		if (revents & err_mask) {
-			dprintk("%s: revents: %x, socket: %p, err: %d.\n",
-					__func__, revents, st->socket, err);
-			err = -ECONNRESET;
-		}
-
-		if (!n->trans_scan_timeout || st->need_exit)
-			err = -ENODEV;
-
-		if (st->socket && (revents & POLLIN))
-			err = kernel_accept(st->socket, &sock, 0);
-
-		dst_state_unlock(st);
-	}
-
-	if (err)
-		goto err_out_exit;
-
-	new = dst_state_alloc(st->node);
-	if (IS_ERR(new)) {
-		err = -ENOMEM;
-		goto err_out_release;
-	}
-	new->socket = sock;
-
-	new->ctl.addr.sa_data_len = sizeof(struct sockaddr);
-	err = kernel_getpeername(sock, (struct sockaddr *)&new->ctl.addr,
-			(int *)&new->ctl.addr.sa_data_len);
-	if (err)
-		goto err_out_put;
-
-	new->permissions = dst_check_permissions(st, new);
-	if (new->permissions == 0) {
-		err = -EPERM;
-		dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
-				"Client is not allowed to connect");
-		goto err_out_put;
-	}
-
-	err = dst_poll_init(new);
-	if (err)
-		goto err_out_put;
-
-	dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
-			"Connected client");
-
-	return new;
-
-err_out_put:
-	dst_state_put(new);
-err_out_release:
-	sock_release(sock);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Each server's block request sometime finishes.
- * Usually it happens in hard irq context of the appropriate controller,
- * so to play good with all cases we just queue BIO into the queue
- * and wake up processing thread, which gets completed request and
- * send (encrypting if needed) it back to the client (if it was a read
- * request), or sends back reply that writing successfully completed.
- */
-static int dst_export_process_request_queue(struct dst_state *st)
-{
-	unsigned long flags;
-	struct dst_export_priv *p = NULL;
-	struct bio *bio;
-	int err = 0;
-
-	while (!list_empty(&st->request_list)) {
-		spin_lock_irqsave(&st->request_lock, flags);
-		if (!list_empty(&st->request_list)) {
-			p = list_first_entry(&st->request_list,
-				struct dst_export_priv, request_entry);
-			list_del(&p->request_entry);
-		}
-		spin_unlock_irqrestore(&st->request_lock, flags);
-
-		if (!p)
-			break;
-
-		bio = p->bio;
-
-		if (dst_need_crypto(st->node) && (bio_data_dir(bio) == READ))
-			err = dst_export_crypto(st->node, bio);
-		else
-			err = dst_export_send_bio(bio);
-
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-/*
- * Cleanup export state.
- * It has to wait until all requests are finished,
- * and then free them all.
- */
-static void dst_state_cleanup_export(struct dst_state *st)
-{
-	struct dst_export_priv *p;
-	unsigned long flags;
-
-	/*
-	 * This loop waits for all pending bios to be completed and freed.
-	 */
-	while (atomic_read(&st->refcnt) > 1) {
-		dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n",
-				__func__, st, atomic_read(&st->refcnt),
-				list_empty(&st->request_list));
-		wait_event_timeout(st->thread_wait,
-				(atomic_read(&st->refcnt) == 1) ||
-				!list_empty(&st->request_list),
-				HZ/2);
-
-		while (!list_empty(&st->request_list)) {
-			p = NULL;
-			spin_lock_irqsave(&st->request_lock, flags);
-			if (!list_empty(&st->request_list)) {
-				p = list_first_entry(&st->request_list,
-					struct dst_export_priv, request_entry);
-				list_del(&p->request_entry);
-			}
-			spin_unlock_irqrestore(&st->request_lock, flags);
-
-			if (p)
-				bio_put(p->bio);
-
-			dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: "
-				"%p.\n", __func__, st, atomic_read(&st->refcnt),
-				list_empty(&st->request_list), p);
-		}
-	}
-
-	dst_state_put(st);
-}
-
-/*
- * Client accepting thread.
- * Not only accepts new connection, but also schedules receiving thread
- * and performs request completion described above.
- */
-static int dst_accept(void *init_data, void *schedule_data)
-{
-	struct dst_state *main_st = schedule_data;
-	struct dst_node *n = init_data;
-	struct dst_state *st;
-	int err;
-
-	while (n->trans_scan_timeout && !main_st->need_exit) {
-		dprintk("%s: main_st: %p, n: %p.\n", __func__, main_st, n);
-		st = dst_accept_client(main_st);
-		if (IS_ERR(st))
-			continue;
-
-		err = dst_state_schedule_receiver(st);
-		if (!err) {
-			while (n->trans_scan_timeout) {
-				err = wait_event_interruptible_timeout(st->thread_wait,
-					!list_empty(&st->request_list) ||
-					!n->trans_scan_timeout ||
-					st->need_exit,
-					HZ);
-
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				if (list_empty(&st->request_list))
-					continue;
-
-				err = dst_export_process_request_queue(st);
-				if (err)
-					break;
-			}
-
-			st->need_exit = 1;
-			wake_up(&st->thread_wait);
-		}
-
-		dst_state_cleanup_export(st);
-	}
-
-	dprintk("%s: freeing listening socket st: %p.\n", __func__, main_st);
-
-	dst_state_lock(main_st);
-	dst_poll_exit(main_st);
-	dst_state_socket_release(main_st);
-	dst_state_unlock(main_st);
-	dst_state_put(main_st);
-	dprintk("%s: freed listening socket st: %p.\n", __func__, main_st);
-
-	return 0;
-}
-
-int dst_start_export(struct dst_node *n)
-{
-	if (list_empty(&n->security_list)) {
-		printk(KERN_ERR "You are trying to export node '%s' "
-				"without security attributes.\nNo clients will "
-				"be allowed to connect. Exiting.\n", n->name);
-		return -EINVAL;
-	}
-	return dst_node_trans_init(n, sizeof(struct dst_export_priv));
-}
-
-/*
- * Initialize listening state and schedule accepting thread.
- */
-int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-	struct dst_network_ctl *ctl = &le->ctl;
-
-	memcpy(&n->info->net, ctl, sizeof(struct dst_network_ctl));
-
-	st = dst_state_alloc(n);
-	if (IS_ERR(st)) {
-		err = PTR_ERR(st);
-		goto err_out_exit;
-	}
-	memcpy(&st->ctl, ctl, sizeof(struct dst_network_ctl));
-
-	err = dst_state_socket_create(st);
-	if (err)
-		goto err_out_put;
-
-	st->socket->sk->sk_reuse = 1;
-
-	err = kernel_bind(st->socket, (struct sockaddr *)&ctl->addr,
-			ctl->addr.sa_data_len);
-	if (err)
-		goto err_out_socket_release;
-
-	err = kernel_listen(st->socket, 1024);
-	if (err)
-		goto err_out_socket_release;
-	n->state = st;
-
-	err = dst_poll_init(st);
-	if (err)
-		goto err_out_socket_release;
-
-	dst_state_get(st);
-
-	err = thread_pool_schedule(n->pool, dst_thread_setup,
-			dst_accept, st, MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto err_out_poll_exit;
-
-	return 0;
-
-err_out_poll_exit:
-	dst_poll_exit(st);
-err_out_socket_release:
-	dst_state_socket_release(st);
-err_out_put:
-	dst_state_put(st);
-err_out_exit:
-	n->state = NULL;
-	return err;
-}
-
-/*
- * Free bio and related private data.
- * Also drop a reference counter for appropriate state,
- * which waits when there are no more block IOs in-flight.
- */
-static void dst_bio_destructor(struct bio *bio)
-{
-	struct bio_vec *bv;
-	struct dst_export_priv *priv = bio->bi_private;
-	int i;
-
-	bio_for_each_segment(bv, bio, i) {
-		if (!bv->bv_page)
-			break;
-
-		__free_page(bv->bv_page);
-	}
-
-	if (priv)
-		dst_state_put(priv->state);
-	bio_free(bio, dst_bio_set);
-}
-
-/*
- * Block IO completion. Queue request to be sent back to
- * the client (or just confirmation).
- */
-static void dst_bio_end_io(struct bio *bio, int err)
-{
-	struct dst_export_priv *p = bio->bi_private;
-	struct dst_state *st = p->state;
-	unsigned long flags;
-
-	spin_lock_irqsave(&st->request_lock, flags);
-	list_add_tail(&p->request_entry, &st->request_list);
-	spin_unlock_irqrestore(&st->request_lock, flags);
-
-	wake_up(&st->thread_wait);
-}
-
-/*
- * Allocate read request for the server.
- */
-static int dst_export_read_request(struct bio *bio, unsigned int total_size)
-{
-	unsigned int size;
-	struct page *page;
-	int err;
-
-	while (total_size) {
-		err = -ENOMEM;
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			goto err_out_exit;
-
-		size = min_t(unsigned int, PAGE_SIZE, total_size);
-
-		err = bio_add_page(bio, page, size, 0);
-		dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n",
-				__func__, (u64)bio->bi_sector, bio->bi_size,
-				size, err);
-		if (err <= 0)
-			goto err_out_free_page;
-
-		total_size -= size;
-	}
-
-	return 0;
-
-err_out_free_page:
-	__free_page(page);
-err_out_exit:
-	return err;
-}
-
-/*
- * Allocate write request for the server.
- * Should not only get pages, but also read data from the network.
- */
-static int dst_export_write_request(struct dst_state *st,
-		struct bio *bio, unsigned int total_size)
-{
-	unsigned int size;
-	struct page *page;
-	void *data;
-	int err;
-
-	while (total_size) {
-		err = -ENOMEM;
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			goto err_out_exit;
-
-		data = kmap(page);
-		if (!data)
-			goto err_out_free_page;
-
-		size = min_t(unsigned int, PAGE_SIZE, total_size);
-
-		err = dst_data_recv(st, data, size);
-		if (err)
-			goto err_out_unmap_page;
-
-		err = bio_add_page(bio, page, size, 0);
-		if (err <= 0)
-			goto err_out_unmap_page;
-
-		kunmap(page);
-
-		total_size -= size;
-	}
-
-	return 0;
-
-err_out_unmap_page:
-	kunmap(page);
-err_out_free_page:
-	__free_page(page);
-err_out_exit:
-	return err;
-}
-
-/*
- * Groovy, we've gotten an IO request from the client.
- * Allocate BIO from the bioset, private data from the mempool
- * and lots of pages for IO.
- */
-int dst_process_io(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	struct bio *bio;
-	struct dst_export_priv *priv;
-	int err = -ENOMEM;
-
-	if (unlikely(!n->bdev)) {
-		err = -EINVAL;
-		goto err_out_exit;
-	}
-
-	bio = bio_alloc_bioset(GFP_KERNEL,
-			PAGE_ALIGN(cmd->size) >> PAGE_SHIFT,
-			dst_bio_set);
-	if (!bio)
-		goto err_out_exit;
-
-	priv = (struct dst_export_priv *)(((void *)bio) -
-			sizeof (struct dst_export_priv));
-
-	priv->state = dst_state_get(st);
-	priv->bio = bio;
-
-	bio->bi_private = priv;
-	bio->bi_end_io = dst_bio_end_io;
-	bio->bi_destructor = dst_bio_destructor;
-	bio->bi_bdev = n->bdev;
-
-	/*
-	 * Server side is only interested in two low bits:
-	 * uptodate (set by itself actually) and rw block
-	 */
-	bio->bi_flags |= cmd->flags & 3;
-
-	bio->bi_rw = cmd->rw;
-	bio->bi_size = 0;
-	bio->bi_sector = cmd->sector;
-
-	dst_bio_to_cmd(bio, &priv->cmd, DST_IO_RESPONSE, cmd->id);
-
-	priv->cmd.flags = 0;
-	priv->cmd.size = cmd->size;
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_recv_cdata(st, priv->cmd.hash);
-		if (err)
-			goto err_out_free;
-
-		err = dst_export_write_request(st, bio, cmd->size);
-		if (err)
-			goto err_out_free;
-
-		if (dst_need_crypto(n))
-			return dst_export_crypto(n, bio);
-	} else {
-		err = dst_export_read_request(bio, cmd->size);
-		if (err)
-			goto err_out_free;
-	}
-
-	dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n",
-			__func__, (u64)bio->bi_sector, bio->bi_size,
-			bio->bi_rw, bio_data_dir(bio),
-			bio->bi_flags, bio->bi_phys_segments);
-
-	generic_make_request(bio);
-
-	return 0;
-
-err_out_free:
-	bio_put(bio);
-err_out_exit:
-	return err;
-}
-
-/*
- * Ok, block IO is ready, let's send it back to the client...
- */
-int dst_export_send_bio(struct bio *bio)
-{
-	struct dst_export_priv *p = bio->bi_private;
-	struct dst_state *st = p->state;
-	struct dst_cmd *cmd = &p->cmd;
-	int err;
-
-	dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n",
-			__func__, cmd->id, (u64)bio->bi_sector, bio->bi_size,
-			cmd->csize, bio->bi_flags, bio->bi_rw);
-
-	dst_convert_cmd(cmd);
-
-	dst_state_lock(st);
-	if (!st->socket) {
-		err = -ECONNRESET;
-		goto err_out_unlock;
-	}
-
-	if (bio_data_dir(bio) == WRITE) {
-		/* ... or just confirmation that writing has completed. */
-		cmd->size = cmd->csize = 0;
-		err = dst_data_send_header(st->socket, cmd,
-				sizeof(struct dst_cmd), 0);
-		if (err)
-			goto err_out_unlock;
-	} else {
-		err = dst_send_bio(st, cmd, bio);
-		if (err)
-			goto err_out_unlock;
-	}
-
-	dst_state_unlock(st);
-
-	bio_put(bio);
-	return 0;
-
-err_out_unlock:
-	dst_state_unlock(st);
-
-	bio_put(bio);
-	return err;
-}
diff --git a/drivers/staging/dst/state.c b/drivers/staging/dst/state.c
deleted file mode 100644
index 02a05e6c48c3..000000000000
--- a/drivers/staging/dst/state.c
+++ /dev/null
@@ -1,844 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/buffer_head.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/connector.h>
-#include <linux/dst.h>
-#include <linux/device.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-
-#include <net/sock.h>
-
-/*
- * Polling machinery.
- */
-
-struct dst_poll_helper {
-	poll_table		pt;
-	struct dst_state	*st;
-};
-
-static int dst_queue_wake(wait_queue_t *wait, unsigned mode,
-		int sync, void *key)
-{
-	struct dst_state *st = container_of(wait, struct dst_state, wait);
-
-	wake_up(&st->thread_wait);
-	return 1;
-}
-
-static void dst_queue_func(struct file *file, wait_queue_head_t *whead,
-				 poll_table *pt)
-{
-	struct dst_state *st = container_of(pt, struct dst_poll_helper, pt)->st;
-
-	st->whead = whead;
-	init_waitqueue_func_entry(&st->wait, dst_queue_wake);
-	add_wait_queue(whead, &st->wait);
-}
-
-void dst_poll_exit(struct dst_state *st)
-{
-	if (st->whead) {
-		remove_wait_queue(st->whead, &st->wait);
-		st->whead = NULL;
-	}
-}
-
-int dst_poll_init(struct dst_state *st)
-{
-	struct dst_poll_helper ph;
-
-	ph.st = st;
-	init_poll_funcptr(&ph.pt, &dst_queue_func);
-
-	st->socket->ops->poll(NULL, st->socket, &ph.pt);
-	return 0;
-}
-
-/*
- * Header receiving function - may block.
- */
-static int dst_data_recv_header(struct socket *sock,
-		void *data, unsigned int size, int block)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	iov.iov_base = data;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = (block) ? MSG_WAITALL : MSG_DONTWAIT;
-
-	err = kernel_recvmsg(sock, &msg, &iov, 1, iov.iov_len,
-			msg.msg_flags);
-	if (err != size)
-		return -1;
-
-	return 0;
-}
-
-/*
- * Header sending function - may block.
- */
-int dst_data_send_header(struct socket *sock,
-		void *data, unsigned int size, int more)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	iov.iov_base = data;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = MSG_WAITALL | (more ? MSG_MORE : 0);
-
-	err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
-	if (err != size) {
-		dprintk("%s: size: %u, more: %d, err: %d.\n",
-				__func__, size, more, err);
-		return -1;
-	}
-
-	return 0;
-}
-
-/*
- * Block autoconfiguration: request size of the storage and permissions.
- */
-static int dst_request_remote_config(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	int err = -EINVAL;
-	struct dst_cmd *cmd = st->data;
-
-	memset(cmd, 0, sizeof(struct dst_cmd));
-	cmd->cmd = DST_CFG;
-
-	dst_convert_cmd(cmd);
-
-	err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
-	if (err)
-		goto out;
-
-	err = dst_data_recv_header(st->socket, cmd, sizeof(struct dst_cmd), 1);
-	if (err)
-		goto out;
-
-	dst_convert_cmd(cmd);
-
-	if (cmd->cmd != DST_CFG) {
-		err = -EINVAL;
-		dprintk("%s: checking result: cmd: %d, size reported: %llu.\n",
-			__func__, cmd->cmd, cmd->sector);
-		goto out;
-	}
-
-	if (n->size != 0)
-		n->size = min_t(loff_t, n->size, cmd->sector);
-	else
-		n->size = cmd->sector;
-
-	n->info->size = n->size;
-	st->permissions = cmd->rw;
-
-out:
-	dprintk("%s: n: %p, err: %d, size: %llu, permission: %x.\n",
-			__func__, n, err, n->size, st->permissions);
-	return err;
-}
-
-/*
- * Socket machinery.
- */
-
-#define DST_DEFAULT_TIMEO	20000
-
-int dst_state_socket_create(struct dst_state *st)
-{
-	int err;
-	struct socket *sock;
-	struct dst_network_ctl *ctl = &st->ctl;
-
-	err = sock_create(ctl->addr.sa_family, ctl->type, ctl->proto, &sock);
-	if (err < 0)
-		return err;
-
-	sock->sk->sk_sndtimeo = sock->sk->sk_rcvtimeo =
-		msecs_to_jiffies(DST_DEFAULT_TIMEO);
-	sock->sk->sk_allocation = GFP_NOIO;
-
-	st->socket = st->read_socket = sock;
-	return 0;
-}
-
-void dst_state_socket_release(struct dst_state *st)
-{
-	dprintk("%s: st: %p, socket: %p, n: %p.\n",
-			__func__, st, st->socket, st->node);
-	if (st->socket) {
-		sock_release(st->socket);
-		st->socket = NULL;
-		st->read_socket = NULL;
-	}
-}
-
-void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str)
-{
-	if (sk->ops->family == AF_INET) {
-		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
-		printk(KERN_INFO "%s %u.%u.%u.%u:%d.\n", str,
-			NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
-	} else if (sk->ops->family == AF_INET6) {
-		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)sa;
-		printk(KERN_INFO "%s %pi6:%d",
-			str, &sin->sin6_addr, ntohs(sin->sin6_port));
-	}
-}
-
-void dst_state_exit_connected(struct dst_state *st)
-{
-	if (st->socket) {
-		dst_poll_exit(st);
-		st->socket->ops->shutdown(st->socket, 2);
-
-		dst_dump_addr(st->socket, (struct sockaddr *)&st->ctl.addr,
-				"Disconnected peer");
-		dst_state_socket_release(st);
-	}
-}
-
-static int dst_state_init_connected(struct dst_state *st)
-{
-	int err;
-	struct dst_network_ctl *ctl = &st->ctl;
-
-	err = dst_state_socket_create(st);
-	if (err)
-		goto err_out_exit;
-
-	err = kernel_connect(st->socket, (struct sockaddr *)&st->ctl.addr,
-			st->ctl.addr.sa_data_len, 0);
-	if (err)
-		goto err_out_release;
-
-	err = dst_poll_init(st);
-	if (err)
-		goto err_out_release;
-
-	dst_dump_addr(st->socket, (struct sockaddr *)&ctl->addr,
-			"Connected to peer");
-
-	return 0;
-
-err_out_release:
-	dst_state_socket_release(st);
-err_out_exit:
-	return err;
-}
-
-/*
- * State reset is used to reconnect to the remote peer.
- * May fail, but who cares, we will try again later.
- */
-static inline void dst_state_reset_nolock(struct dst_state *st)
-{
-	dst_state_exit_connected(st);
-	dst_state_init_connected(st);
-}
-
-static inline void dst_state_reset(struct dst_state *st)
-{
-	dst_state_lock(st);
-	dst_state_reset_nolock(st);
-	dst_state_unlock(st);
-}
-
-/*
- * Basic network sending/receiving functions.
- * Blocked mode is used.
- */
-static int dst_data_recv_raw(struct dst_state *st, void *buf, u64 size)
-{
-	struct msghdr msg;
-	struct kvec iov;
-	int err;
-
-	BUG_ON(!size);
-
-	iov.iov_base = buf;
-	iov.iov_len = size;
-
-	msg.msg_iov = (struct iovec *)&iov;
-	msg.msg_iovlen = 1;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = MSG_DONTWAIT;
-
-	err = kernel_recvmsg(st->socket, &msg, &iov, 1, iov.iov_len,
-			msg.msg_flags);
-	if (err <= 0) {
-		dprintk("%s: failed to recv data: size: %llu, err: %d.\n",
-				__func__, size, err);
-		if (err == 0)
-			err = -ECONNRESET;
-
-		dst_state_exit_connected(st);
-	}
-
-	return err;
-}
-
-/*
- * Ping command to early detect failed nodes.
- */
-static int dst_send_ping(struct dst_state *st)
-{
-	struct dst_cmd *cmd = st->data;
-	int err = -ECONNRESET;
-
-	dst_state_lock(st);
-	if (st->socket) {
-		memset(cmd, 0, sizeof(struct dst_cmd));
-
-		cmd->cmd = __cpu_to_be32(DST_PING);
-
-		err = dst_data_send_header(st->socket, cmd,
-				sizeof(struct dst_cmd), 0);
-	}
-	dprintk("%s: st: %p, socket: %p, err: %d.\n", __func__,
-			st, st->socket, err);
-	dst_state_unlock(st);
-
-	return err;
-}
-
-/*
- * Receiving function, which should either return error or read
- * whole block request. If there was no traffic for a one second,
- * send a ping, since remote node may die.
- */
-int dst_data_recv(struct dst_state *st, void *data, unsigned int size)
-{
-	unsigned int revents = 0;
-	unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
-	unsigned int mask = err_mask | POLLIN;
-	struct dst_node *n = st->node;
-	int err = 0;
-
-	while (size && !err) {
-		revents = dst_state_poll(st);
-
-		if (!(revents & mask)) {
-			DEFINE_WAIT(wait);
-
-			for (;;) {
-				prepare_to_wait(&st->thread_wait, &wait,
-						TASK_INTERRUPTIBLE);
-				if (!n->trans_scan_timeout || st->need_exit)
-					break;
-
-				revents = dst_state_poll(st);
-
-				if (revents & mask)
-					break;
-
-				if (signal_pending(current))
-					break;
-
-				if (!schedule_timeout(HZ)) {
-					err = dst_send_ping(st);
-					if (err)
-						return err;
-				}
-
-				continue;
-			}
-			finish_wait(&st->thread_wait, &wait);
-		}
-
-		err = -ECONNRESET;
-		dst_state_lock(st);
-
-		if (st->socket && (st->read_socket == st->socket) &&
-				(revents & POLLIN)) {
-			err = dst_data_recv_raw(st, data, size);
-			if (err > 0) {
-				data += err;
-				size -= err;
-				err = 0;
-			}
-		}
-
-		if (revents & err_mask || !st->socket) {
-			dprintk("%s: revents: %x, socket: %p, size: %u, "
-					"err: %d.\n", __func__, revents,
-					st->socket, size, err);
-			err = -ECONNRESET;
-		}
-
-		dst_state_unlock(st);
-
-		if (!n->trans_scan_timeout)
-			err = -ENODEV;
-	}
-
-	return err;
-}
-
-/*
- * Send block autoconf reply.
- */
-static int dst_process_cfg(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	int err;
-
-	cmd->sector = n->size;
-	cmd->rw = st->permissions;
-
-	dst_convert_cmd(cmd);
-
-	dst_state_lock(st);
-	err = dst_data_send_header(st->socket, cmd, sizeof(struct dst_cmd), 0);
-	dst_state_unlock(st);
-
-	return err;
-}
-
-/*
- * Receive block IO from the network.
- */
-static int dst_recv_bio(struct dst_state *st, struct bio *bio,
-		unsigned int total_size)
-{
-	struct bio_vec *bv;
-	int i, err;
-	void *data;
-	unsigned int sz;
-
-	bio_for_each_segment(bv, bio, i) {
-		sz = min(total_size, bv->bv_len);
-
-		dprintk("%s: bio: %llu/%u, total: %u, len: %u, sz: %u, "
-				"off: %u.\n", __func__, (u64)bio->bi_sector,
-				bio->bi_size, total_size, bv->bv_len, sz,
-				bv->bv_offset);
-
-		data = kmap(bv->bv_page) + bv->bv_offset;
-		err = dst_data_recv(st, data, sz);
-		kunmap(bv->bv_page);
-
-		bv->bv_len = sz;
-
-		if (err)
-			return err;
-
-		total_size -= sz;
-		if (total_size == 0)
-			break;
-	}
-
-	return 0;
-}
-
-/*
- * Our block IO has just completed and arrived: get it.
- */
-static int dst_process_io_response(struct dst_state *st)
-{
-	struct dst_node *n = st->node;
-	struct dst_cmd *cmd = st->data;
-	struct dst_trans *t;
-	int err = 0;
-	struct bio *bio;
-
-	mutex_lock(&n->trans_lock);
-	t = dst_trans_search(n, cmd->id);
-	mutex_unlock(&n->trans_lock);
-
-	if (!t)
-		goto err_out_exit;
-
-	bio = t->bio;
-
-	dprintk("%s: bio: %llu/%u, cmd_size: %u, csize: %u, dir: %lu.\n",
-		__func__, (u64)bio->bi_sector, bio->bi_size, cmd->size,
-		cmd->csize, bio_data_dir(bio));
-
-	if (bio_data_dir(bio) == READ) {
-		if (bio->bi_size != cmd->size - cmd->csize)
-			goto err_out_exit;
-
-		if (dst_need_crypto(n)) {
-			err = dst_recv_cdata(st, t->cmd.hash);
-			if (err)
-				goto err_out_exit;
-		}
-
-		err = dst_recv_bio(st, t->bio, bio->bi_size);
-		if (err)
-			goto err_out_exit;
-
-		if (dst_need_crypto(n))
-			return dst_trans_crypto(t);
-	} else {
-		err = -EBADMSG;
-		if (cmd->size || cmd->csize)
-			goto err_out_exit;
-	}
-
-	dst_trans_remove(t);
-	dst_trans_put(t);
-
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Receive crypto data.
- */
-int dst_recv_cdata(struct dst_state *st, void *cdata)
-{
-	struct dst_cmd *cmd = st->data;
-	struct dst_node *n = st->node;
-	struct dst_crypto_ctl *c = &n->crypto;
-	int err;
-
-	if (cmd->csize != c->crypto_attached_size) {
-		dprintk("%s: cmd: cmd: %u, sector: %llu, size: %u, "
-				"csize: %u != digest size %u.\n",
-				__func__, cmd->cmd, cmd->sector, cmd->size,
-				cmd->csize, c->crypto_attached_size);
-		err = -EINVAL;
-		goto err_out_exit;
-	}
-
-	err = dst_data_recv(st, cdata, cmd->csize);
-	if (err)
-		goto err_out_exit;
-
-	cmd->size -= cmd->csize;
-	return 0;
-
-err_out_exit:
-	return err;
-}
-
-/*
- * Receive the command and start its processing.
- */
-static int dst_recv_processing(struct dst_state *st)
-{
-	int err = -EINTR;
-	struct dst_cmd *cmd = st->data;
-
-	/*
-	 * If socket will be reset after this statement, then
-	 * dst_data_recv() will just fail and loop will
-	 * start again, so it can be done without any locks.
-	 *
-	 * st->read_socket is needed to prevents state machine
-	 * breaking between this data reading and subsequent one
-	 * in protocol specific functions during connection reset.
-	 * In case of reset we have to read next command and do
-	 * not expect data for old command to magically appear in
-	 * new connection.
-	 */
-	st->read_socket = st->socket;
-	err = dst_data_recv(st, cmd, sizeof(struct dst_cmd));
-	if (err)
-		goto out_exit;
-
-	dst_convert_cmd(cmd);
-
-	dprintk("%s: cmd: %u, size: %u, csize: %u, id: %llu, "
-			"sector: %llu, flags: %llx, rw: %llx.\n",
-			__func__, cmd->cmd, cmd->size,
-			cmd->csize, cmd->id, cmd->sector,
-			cmd->flags, cmd->rw);
-
-	/*
-	 * This should catch protocol breakage and random garbage
-	 * instead of commands.
-	 */
-	if (unlikely(cmd->csize > st->size - sizeof(struct dst_cmd))) {
-		err = -EBADMSG;
-		goto out_exit;
-	}
-
-	err = -EPROTO;
-	switch (cmd->cmd) {
-	case DST_IO_RESPONSE:
-		err = dst_process_io_response(st);
-		break;
-	case DST_IO:
-		err = dst_process_io(st);
-		break;
-	case DST_CFG:
-		err = dst_process_cfg(st);
-		break;
-	case DST_PING:
-		err = 0;
-		break;
-	default:
-		break;
-	}
-
-out_exit:
-	return err;
-}
-
-/*
- * Receiving thread. For the client node we should try to reconnect,
- * for accepted client we just drop the state and expect it to reconnect.
- */
-static int dst_recv(void *init_data, void *schedule_data)
-{
-	struct dst_state *st = schedule_data;
-	struct dst_node *n = init_data;
-	int err = 0;
-
-	dprintk("%s: start st: %p, n: %p, scan: %lu, need_exit: %d.\n",
-			__func__, st, n, n->trans_scan_timeout, st->need_exit);
-
-	while (n->trans_scan_timeout && !st->need_exit) {
-		err = dst_recv_processing(st);
-		if (err < 0) {
-			if (!st->ctl.type)
-				break;
-
-			if (!n->trans_scan_timeout || st->need_exit)
-				break;
-
-			dst_state_reset(st);
-			msleep(1000);
-		}
-	}
-
-	st->need_exit = 1;
-	wake_up(&st->thread_wait);
-
-	dprintk("%s: freeing receiving socket st: %p.\n", __func__, st);
-	dst_state_lock(st);
-	dst_state_exit_connected(st);
-	dst_state_unlock(st);
-	dst_state_put(st);
-
-	dprintk("%s: freed receiving socket st: %p.\n", __func__, st);
-
-	return err;
-}
-
-/*
- * Network state dies here and borns couple of lines below.
- * This object is the main network state processing engine:
- * sending, receiving, reconnections, all network related
- * tasks are handled on behalf of the state.
- */
-static void dst_state_free(struct dst_state *st)
-{
-	dprintk("%s: st: %p.\n", __func__, st);
-	if (st->cleanup)
-		st->cleanup(st);
-	kfree(st->data);
-	kfree(st);
-}
-
-struct dst_state *dst_state_alloc(struct dst_node *n)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-
-	st = kzalloc(sizeof(struct dst_state), GFP_KERNEL);
-	if (!st)
-		goto err_out_exit;
-
-	st->node = n;
-	st->need_exit = 0;
-
-	st->size = PAGE_SIZE;
-	st->data = kmalloc(st->size, GFP_KERNEL);
-	if (!st->data)
-		goto err_out_free;
-
-	spin_lock_init(&st->request_lock);
-	INIT_LIST_HEAD(&st->request_list);
-
-	mutex_init(&st->state_lock);
-	init_waitqueue_head(&st->thread_wait);
-
-	/*
-	 * One for processing thread, another one for node itself.
-	 */
-	atomic_set(&st->refcnt, 2);
-
-	dprintk("%s: st: %p, n: %p.\n", __func__, st, st->node);
-
-	return st;
-
-err_out_free:
-	kfree(st);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-int dst_state_schedule_receiver(struct dst_state *st)
-{
-	return thread_pool_schedule_private(st->node->pool, dst_thread_setup,
-			dst_recv, st, MAX_SCHEDULE_TIMEOUT, st->node);
-}
-
-/*
- * Initialize client's connection to the remote peer: allocate state,
- * connect and perform block IO autoconfiguration.
- */
-int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r)
-{
-	struct dst_state *st;
-	int err = -ENOMEM;
-
-	st = dst_state_alloc(n);
-	if (IS_ERR(st)) {
-		err = PTR_ERR(st);
-		goto err_out_exit;
-	}
-	memcpy(&st->ctl, r, sizeof(struct dst_network_ctl));
-
-	err = dst_state_init_connected(st);
-	if (err)
-		goto err_out_free_data;
-
-	err = dst_request_remote_config(st);
-	if (err)
-		goto err_out_exit_connected;
-	n->state = st;
-
-	err = dst_state_schedule_receiver(st);
-	if (err)
-		goto err_out_exit_connected;
-
-	return 0;
-
-err_out_exit_connected:
-	dst_state_exit_connected(st);
-err_out_free_data:
-	dst_state_free(st);
-err_out_exit:
-	n->state = NULL;
-	return err;
-}
-
-void dst_state_put(struct dst_state *st)
-{
-	dprintk("%s: st: %p, refcnt: %d.\n",
-			__func__, st, atomic_read(&st->refcnt));
-	if (atomic_dec_and_test(&st->refcnt))
-		dst_state_free(st);
-}
-
-/*
- * Send block IO to the network one by one using zero-copy ->sendpage().
- */
-int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio)
-{
-	struct bio_vec *bv;
-	struct dst_crypto_ctl *c = &st->node->crypto;
-	int err, i = 0;
-	int flags = MSG_WAITALL;
-
-	err = dst_data_send_header(st->socket, cmd,
-		sizeof(struct dst_cmd) + c->crypto_attached_size, bio->bi_vcnt);
-	if (err)
-		goto err_out_exit;
-
-	bio_for_each_segment(bv, bio, i) {
-		if (i < bio->bi_vcnt - 1)
-			flags |= MSG_MORE;
-
-		err = kernel_sendpage(st->socket, bv->bv_page, bv->bv_offset,
-				bv->bv_len, flags);
-		if (err <= 0)
-			goto err_out_exit;
-	}
-
-	return 0;
-
-err_out_exit:
-	dprintk("%s: %d/%d, flags: %x, err: %d.\n",
-			__func__, i, bio->bi_vcnt, flags, err);
-	return err;
-}
-
-/*
- * Send transaction to the remote peer.
- */
-int dst_trans_send(struct dst_trans *t)
-{
-	int err;
-	struct dst_state *st = t->n->state;
-	struct bio *bio = t->bio;
-
-	dst_convert_cmd(&t->cmd);
-
-	dst_state_lock(st);
-	if (!st->socket) {
-		err = dst_state_init_connected(st);
-		if (err)
-			goto err_out_unlock;
-	}
-
-	if (bio_data_dir(bio) == WRITE) {
-		err = dst_send_bio(st, &t->cmd, t->bio);
-	} else {
-		err = dst_data_send_header(st->socket, &t->cmd,
-				sizeof(struct dst_cmd), 0);
-	}
-	if (err)
-		goto err_out_reset;
-
-	dst_state_unlock(st);
-	return 0;
-
-err_out_reset:
-	dst_state_reset_nolock(st);
-err_out_unlock:
-	dst_state_unlock(st);
-
-	return err;
-}
diff --git a/drivers/staging/dst/thread_pool.c b/drivers/staging/dst/thread_pool.c
deleted file mode 100644
index 29a82b2602f3..000000000000
--- a/drivers/staging/dst/thread_pool.c
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/dst.h>
-#include <linux/kthread.h>
-#include <linux/slab.h>
-
-/*
- * Thread pool abstraction allows to schedule a work to be performed
- * on behalf of kernel thread. One does not operate with threads itself,
- * instead user provides setup and cleanup callbacks for thread pool itself,
- * and action and cleanup callbacks for each submitted work.
- *
- * Each worker has private data initialized at creation time and data,
- * provided by user at scheduling time.
- *
- * When action is being performed, thread can not be used by other users,
- * instead they will sleep until there is free thread to pick their work.
- */
-struct thread_pool_worker {
-	struct list_head	worker_entry;
-
-	struct task_struct	*thread;
-
-	struct thread_pool	*pool;
-
-	int			error;
-	int			has_data;
-	int			need_exit;
-	unsigned int		id;
-
-	wait_queue_head_t	wait;
-
-	void			*private;
-	void			*schedule_data;
-
-	int			(*action)(void *private, void *schedule_data);
-	void			(*cleanup)(void *private);
-};
-
-static void thread_pool_exit_worker(struct thread_pool_worker *w)
-{
-	kthread_stop(w->thread);
-
-	w->cleanup(w->private);
-	kfree(w);
-}
-
-/*
- * Called to mark thread as ready and allow users to schedule new work.
- */
-static void thread_pool_worker_make_ready(struct thread_pool_worker *w)
-{
-	struct thread_pool *p = w->pool;
-
-	mutex_lock(&p->thread_lock);
-
-	if (!w->need_exit) {
-		list_move_tail(&w->worker_entry, &p->ready_list);
-		w->has_data = 0;
-		mutex_unlock(&p->thread_lock);
-
-		wake_up(&p->wait);
-	} else {
-		p->thread_num--;
-		list_del(&w->worker_entry);
-		mutex_unlock(&p->thread_lock);
-
-		thread_pool_exit_worker(w);
-	}
-}
-
-/*
- * Thread action loop: waits until there is new work.
- */
-static int thread_pool_worker_func(void *data)
-{
-	struct thread_pool_worker *w = data;
-
-	while (!kthread_should_stop()) {
-		wait_event_interruptible(w->wait,
-			kthread_should_stop() || w->has_data);
-
-		if (kthread_should_stop())
-			break;
-
-		if (!w->has_data)
-			continue;
-
-		w->action(w->private, w->schedule_data);
-		thread_pool_worker_make_ready(w);
-	}
-
-	return 0;
-}
-
-/*
- * Remove single worker without specifying which one.
- */
-void thread_pool_del_worker(struct thread_pool *p)
-{
-	struct thread_pool_worker *w = NULL;
-
-	while (!w && p->thread_num) {
-		wait_event(p->wait, !list_empty(&p->ready_list) ||
-				!p->thread_num);
-
-		dprintk("%s: locking list_empty: %d, thread_num: %d.\n",
-				__func__, list_empty(&p->ready_list),
-				p->thread_num);
-
-		mutex_lock(&p->thread_lock);
-		if (!list_empty(&p->ready_list)) {
-			w = list_first_entry(&p->ready_list,
-					struct thread_pool_worker,
-					worker_entry);
-
-			dprintk("%s: deleting w: %p, thread_num: %d, "
-					"list: %p [%p.%p].\n", __func__,
-					w, p->thread_num, &p->ready_list,
-					p->ready_list.prev, p->ready_list.next);
-
-			p->thread_num--;
-			list_del(&w->worker_entry);
-		}
-		mutex_unlock(&p->thread_lock);
-	}
-
-	if (w)
-		thread_pool_exit_worker(w);
-	dprintk("%s: deleted w: %p, thread_num: %d.\n",
-			__func__, w, p->thread_num);
-}
-
-/*
- * Remove a worker with given ID.
- */
-void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id)
-{
-	struct thread_pool_worker *w;
-	int found = 0;
-
-	mutex_lock(&p->thread_lock);
-	list_for_each_entry(w, &p->ready_list, worker_entry) {
-		if (w->id == id) {
-			found = 1;
-			p->thread_num--;
-			list_del(&w->worker_entry);
-			break;
-		}
-	}
-
-	if (!found) {
-		list_for_each_entry(w, &p->active_list, worker_entry) {
-			if (w->id == id) {
-				w->need_exit = 1;
-				break;
-			}
-		}
-	}
-	mutex_unlock(&p->thread_lock);
-
-	if (found)
-		thread_pool_exit_worker(w);
-}
-
-/*
- * Add new worker thread with given parameters.
- * If initialization callback fails, return error.
- */
-int thread_pool_add_worker(struct thread_pool *p,
-		char *name,
-		unsigned int id,
-		void *(*init)(void *private),
-		void (*cleanup)(void *private),
-		void *private)
-{
-	struct thread_pool_worker *w;
-	int err = -ENOMEM;
-
-	w = kzalloc(sizeof(struct thread_pool_worker), GFP_KERNEL);
-	if (!w)
-		goto err_out_exit;
-
-	w->pool = p;
-	init_waitqueue_head(&w->wait);
-	w->cleanup = cleanup;
-	w->id = id;
-
-	w->thread = kthread_run(thread_pool_worker_func, w, "%s", name);
-	if (IS_ERR(w->thread)) {
-		err = PTR_ERR(w->thread);
-		goto err_out_free;
-	}
-
-	w->private = init(private);
-	if (IS_ERR(w->private)) {
-		err = PTR_ERR(w->private);
-		goto err_out_stop_thread;
-	}
-
-	mutex_lock(&p->thread_lock);
-	list_add_tail(&w->worker_entry, &p->ready_list);
-	p->thread_num++;
-	mutex_unlock(&p->thread_lock);
-
-	return 0;
-
-err_out_stop_thread:
-	kthread_stop(w->thread);
-err_out_free:
-	kfree(w);
-err_out_exit:
-	return err;
-}
-
-/*
- * Destroy the whole pool.
- */
-void thread_pool_destroy(struct thread_pool *p)
-{
-	while (p->thread_num) {
-		dprintk("%s: num: %d.\n", __func__, p->thread_num);
-		thread_pool_del_worker(p);
-	}
-
-	kfree(p);
-}
-
-/*
- * Create a pool with given number of threads.
- * They will have sequential IDs started from zero.
- */
-struct thread_pool *thread_pool_create(int num, char *name,
-		void *(*init)(void *private),
-		void (*cleanup)(void *private),
-		void *private)
-{
-	struct thread_pool_worker *w, *tmp;
-	struct thread_pool *p;
-	int err = -ENOMEM;
-	int i;
-
-	p = kzalloc(sizeof(struct thread_pool), GFP_KERNEL);
-	if (!p)
-		goto err_out_exit;
-
-	init_waitqueue_head(&p->wait);
-	mutex_init(&p->thread_lock);
-	INIT_LIST_HEAD(&p->ready_list);
-	INIT_LIST_HEAD(&p->active_list);
-	p->thread_num = 0;
-
-	for (i = 0; i < num; ++i) {
-		err = thread_pool_add_worker(p, name, i, init,
-				cleanup, private);
-		if (err)
-			goto err_out_free_all;
-	}
-
-	return p;
-
-err_out_free_all:
-	list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
-		list_del(&w->worker_entry);
-		thread_pool_exit_worker(w);
-	}
-	kfree(p);
-err_out_exit:
-	return ERR_PTR(err);
-}
-
-/*
- * Schedule execution of the action on a given thread,
- * provided ID pointer has to match previously stored
- * private data.
- */
-int thread_pool_schedule_private(struct thread_pool *p,
-		int (*setup)(void *private, void *data),
-		int (*action)(void *private, void *data),
-		void *data, long timeout, void *id)
-{
-	struct thread_pool_worker *w, *tmp, *worker = NULL;
-	int err = 0;
-
-	while (!worker && !err) {
-		timeout = wait_event_interruptible_timeout(p->wait,
-				!list_empty(&p->ready_list),
-				timeout);
-
-		if (!timeout) {
-			err = -ETIMEDOUT;
-			break;
-		}
-
-		worker = NULL;
-		mutex_lock(&p->thread_lock);
-		list_for_each_entry_safe(w, tmp, &p->ready_list, worker_entry) {
-			if (id && id != w->private)
-				continue;
-
-			worker = w;
-
-			list_move_tail(&w->worker_entry, &p->active_list);
-
-			err = setup(w->private, data);
-			if (!err) {
-				w->schedule_data = data;
-				w->action = action;
-				w->has_data = 1;
-				wake_up(&w->wait);
-			} else {
-				list_move_tail(&w->worker_entry,
-						&p->ready_list);
-			}
-
-			break;
-		}
-		mutex_unlock(&p->thread_lock);
-	}
-
-	return err;
-}
-
-/*
- * Schedule execution on arbitrary thread from the pool.
- */
-int thread_pool_schedule(struct thread_pool *p,
-		int (*setup)(void *private, void *data),
-		int (*action)(void *private, void *data),
-		void *data, long timeout)
-{
-	return thread_pool_schedule_private(p, setup,
-			action, data, timeout, NULL);
-}
diff --git a/drivers/staging/dst/trans.c b/drivers/staging/dst/trans.c
deleted file mode 100644
index 1c36a6bc31d5..000000000000
--- a/drivers/staging/dst/trans.c
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/bio.h>
-#include <linux/dst.h>
-#include <linux/slab.h>
-#include <linux/mempool.h>
-
-/*
- * Transaction memory pool size.
- */
-static int dst_mempool_num = 32;
-module_param(dst_mempool_num, int, 0644);
-
-/*
- * Transaction tree management.
- */
-static inline int dst_trans_cmp(dst_gen_t gen, dst_gen_t new)
-{
-	if (gen < new)
-		return 1;
-	if (gen > new)
-		return -1;
-	return 0;
-}
-
-struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen)
-{
-	struct rb_root *root = &node->trans_root;
-	struct rb_node *n = root->rb_node;
-	struct dst_trans *t, *ret = NULL;
-	int cmp;
-
-	while (n) {
-		t = rb_entry(n, struct dst_trans, trans_entry);
-
-		cmp = dst_trans_cmp(t->gen, gen);
-		if (cmp < 0)
-			n = n->rb_left;
-		else if (cmp > 0)
-			n = n->rb_right;
-		else {
-			ret = t;
-			break;
-		}
-	}
-
-	dprintk("%s: %s transaction: id: %llu.\n", __func__,
-			(ret) ? "found" : "not found", gen);
-
-	return ret;
-}
-
-static int dst_trans_insert(struct dst_trans *new)
-{
-	struct rb_root *root = &new->n->trans_root;
-	struct rb_node **n = &root->rb_node, *parent = NULL;
-	struct dst_trans *ret = NULL, *t;
-	int cmp;
-
-	while (*n) {
-		parent = *n;
-
-		t = rb_entry(parent, struct dst_trans, trans_entry);
-
-		cmp = dst_trans_cmp(t->gen, new->gen);
-		if (cmp < 0)
-			n = &parent->rb_left;
-		else if (cmp > 0)
-			n = &parent->rb_right;
-		else {
-			ret = t;
-			break;
-		}
-	}
-
-	new->send_time = jiffies;
-	if (ret) {
-		printk(KERN_DEBUG "%s: exist: old: gen: %llu, bio: %llu/%u, "
-			"send_time: %lu, new: gen: %llu, bio: %llu/%u, "
-			"send_time: %lu.\n", __func__,
-			ret->gen, (u64)ret->bio->bi_sector,
-			ret->bio->bi_size, ret->send_time,
-			new->gen, (u64)new->bio->bi_sector,
-			new->bio->bi_size, new->send_time);
-		return -EEXIST;
-	}
-
-	rb_link_node(&new->trans_entry, parent, n);
-	rb_insert_color(&new->trans_entry, root);
-
-	dprintk("%s: inserted: gen: %llu, bio: %llu/%u, send_time: %lu.\n",
-		__func__, new->gen, (u64)new->bio->bi_sector,
-		new->bio->bi_size, new->send_time);
-
-	return 0;
-}
-
-int dst_trans_remove_nolock(struct dst_trans *t)
-{
-	struct dst_node *n = t->n;
-
-	if (t->trans_entry.rb_parent_color) {
-		rb_erase(&t->trans_entry, &n->trans_root);
-		t->trans_entry.rb_parent_color = 0;
-	}
-	return 0;
-}
-
-int dst_trans_remove(struct dst_trans *t)
-{
-	int ret;
-	struct dst_node *n = t->n;
-
-	mutex_lock(&n->trans_lock);
-	ret = dst_trans_remove_nolock(t);
-	mutex_unlock(&n->trans_lock);
-
-	return ret;
-}
-
-/*
- * When transaction is completed and there are no more users,
- * we complete appriate block IO request with given error status.
- */
-void dst_trans_put(struct dst_trans *t)
-{
-	if (atomic_dec_and_test(&t->refcnt)) {
-		struct bio *bio = t->bio;
-
-		dprintk("%s: completed t: %p, gen: %llu, bio: %p.\n",
-				__func__, t, t->gen, bio);
-
-		bio_endio(bio, t->error);
-		bio_put(bio);
-
-		dst_node_put(t->n);
-		mempool_free(t, t->n->trans_pool);
-	}
-}
-
-/*
- * Process given block IO request: allocate transaction, insert it into the tree
- * and send/schedule crypto processing.
- */
-int dst_process_bio(struct dst_node *n, struct bio *bio)
-{
-	struct dst_trans *t;
-	int err = -ENOMEM;
-
-	t = mempool_alloc(n->trans_pool, GFP_NOFS);
-	if (!t)
-		goto err_out_exit;
-
-	t->n = dst_node_get(n);
-	t->bio = bio;
-	t->error = 0;
-	t->retries = 0;
-	atomic_set(&t->refcnt, 1);
-	t->gen = atomic_long_inc_return(&n->gen);
-
-	t->enc = bio_data_dir(bio);
-	dst_bio_to_cmd(bio, &t->cmd, DST_IO, t->gen);
-
-	mutex_lock(&n->trans_lock);
-	err = dst_trans_insert(t);
-	mutex_unlock(&n->trans_lock);
-	if (err)
-		goto err_out_free;
-
-	dprintk("%s: gen: %llu, bio: %llu/%u, dir/enc: %d, need_crypto: %d.\n",
-			__func__, t->gen, (u64)bio->bi_sector,
-			bio->bi_size, t->enc, dst_need_crypto(n));
-
-	if (dst_need_crypto(n) && t->enc)
-		dst_trans_crypto(t);
-	else
-		dst_trans_send(t);
-
-	return 0;
-
-err_out_free:
-	dst_node_put(n);
-	mempool_free(t, n->trans_pool);
-err_out_exit:
-	bio_endio(bio, err);
-	bio_put(bio);
-	return err;
-}
-
-/*
- * Scan for timeout/stale transactions.
- * Each transaction is being resent multiple times before error completion.
- */
-static void dst_trans_scan(struct work_struct *work)
-{
-	struct dst_node *n = container_of(work, struct dst_node,
-			trans_work.work);
-	struct rb_node *rb_node;
-	struct dst_trans *t;
-	unsigned long timeout = n->trans_scan_timeout;
-	int num = 10 * n->trans_max_retries;
-
-	mutex_lock(&n->trans_lock);
-
-	for (rb_node = rb_first(&n->trans_root); rb_node; ) {
-		t = rb_entry(rb_node, struct dst_trans, trans_entry);
-
-		if (timeout && time_after(t->send_time + timeout, jiffies)
-				&& t->retries == 0)
-			break;
-#if 0
-		dprintk("%s: t: %p, gen: %llu, n: %s, retries: %u, max: %u.\n",
-			__func__, t, t->gen, n->name,
-			t->retries, n->trans_max_retries);
-#endif
-		if (--num == 0)
-			break;
-
-		dst_trans_get(t);
-
-		rb_node = rb_next(rb_node);
-
-		if (timeout && (++t->retries < n->trans_max_retries)) {
-			dst_trans_send(t);
-		} else {
-			t->error = -ETIMEDOUT;
-			dst_trans_remove_nolock(t);
-			dst_trans_put(t);
-		}
-
-		dst_trans_put(t);
-	}
-
-	mutex_unlock(&n->trans_lock);
-
-	/*
-	 * If no timeout specified then system is in the middle of exiting
-	 * process, so no need to reschedule scanning process again.
-	 */
-	if (timeout) {
-		if (!num)
-			timeout = HZ;
-		schedule_delayed_work(&n->trans_work, timeout);
-	}
-}
-
-/*
- * Flush all transactions and mark them as timed out.
- * Destroy transaction pools.
- */
-void dst_node_trans_exit(struct dst_node *n)
-{
-	struct dst_trans *t;
-	struct rb_node *rb_node;
-
-	if (!n->trans_cache)
-		return;
-
-	dprintk("%s: n: %p, cancelling the work.\n", __func__, n);
-	cancel_delayed_work_sync(&n->trans_work);
-	flush_scheduled_work();
-	dprintk("%s: n: %p, work has been cancelled.\n", __func__, n);
-
-	for (rb_node = rb_first(&n->trans_root); rb_node; ) {
-		t = rb_entry(rb_node, struct dst_trans, trans_entry);
-
-		dprintk("%s: t: %p, gen: %llu, n: %s.\n",
-			__func__, t, t->gen, n->name);
-
-		rb_node = rb_next(rb_node);
-
-		t->error = -ETIMEDOUT;
-		dst_trans_remove_nolock(t);
-		dst_trans_put(t);
-	}
-
-	mempool_destroy(n->trans_pool);
-	kmem_cache_destroy(n->trans_cache);
-}
-
-/*
- * Initialize transaction storage for given node.
- * Transaction stores not only control information,
- * but also network command and crypto data (if needed)
- * to reduce number of allocations. Thus transaction size
- * differs from node to node.
- */
-int dst_node_trans_init(struct dst_node *n, unsigned int size)
-{
-	/*
-	 * We need this, since node with given name can be dropped from the
-	 * hash table, but be still alive, so subsequent creation of the node
-	 * with the same name may collide with existing cache name.
-	 */
-
-	snprintf(n->cache_name, sizeof(n->cache_name), "%s-%p", n->name, n);
-
-	n->trans_cache = kmem_cache_create(n->cache_name,
-			size + n->crypto.crypto_attached_size,
-			0, 0, NULL);
-	if (!n->trans_cache)
-		goto err_out_exit;
-
-	n->trans_pool = mempool_create_slab_pool(dst_mempool_num,
-			n->trans_cache);
-	if (!n->trans_pool)
-		goto err_out_cache_destroy;
-
-	mutex_init(&n->trans_lock);
-	n->trans_root = RB_ROOT;
-
-	INIT_DELAYED_WORK(&n->trans_work, dst_trans_scan);
-	schedule_delayed_work(&n->trans_work, n->trans_scan_timeout);
-
-	dprintk("%s: n: %p, size: %u, crypto: %u.\n",
-		__func__, n, size, n->crypto.crypto_attached_size);
-
-	return 0;
-
-err_out_cache_destroy:
-	kmem_cache_destroy(n->trans_cache);
-err_out_exit:
-	return -ENOMEM;
-}
diff --git a/include/linux/dst.h b/include/linux/dst.h
deleted file mode 100644
index e26fed84b1aa..000000000000
--- a/include/linux/dst.h
+++ /dev/null
@@ -1,587 +0,0 @@
-/*
- * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __DST_H
-#define __DST_H
-
-#include <linux/types.h>
-#include <linux/connector.h>
-
-#define DST_NAMELEN		32
-#define DST_NAME		"dst"
-
-enum {
-	/* Remove node with given id from storage */
-	DST_DEL_NODE	= 0,
-	/* Add remote node with given id to the storage */
-	DST_ADD_REMOTE,
-	/* Add local node with given id to the storage to be exported and used by remote peers */
-	DST_ADD_EXPORT,
-	/* Crypto initialization command (hash/cipher used to protect the connection) */
-	DST_CRYPTO,
-	/* Security attributes for given connection (permissions for example) */
-	DST_SECURITY,
-	/* Register given node in the block layer subsystem */
-	DST_START,
-	DST_CMD_MAX
-};
-
-struct dst_ctl
-{
-	/* Storage name */
-	char			name[DST_NAMELEN];
-	/* Command flags */
-	__u32			flags;
-	/* Command itself (see above) */
-	__u32			cmd;
-	/* Maximum number of pages per single request in this device */
-	__u32			max_pages;
-	/* Stale/error transaction scanning timeout in milliseconds */
-	__u32			trans_scan_timeout;
-	/* Maximum number of retry sends before completing transaction as broken */
-	__u32			trans_max_retries;
-	/* Storage size */
-	__u64			size;
-};
-
-/* Reply command carries completion status */
-struct dst_ctl_ack
-{
-	struct cn_msg		msg;
-	int			error;
-	int			unused[3];
-};
-
-/*
- * Unfortunaltely socket address structure is not exported to userspace
- * and is redefined there.
- */
-#define SADDR_MAX_DATA	128
-
-struct saddr {
-	/* address family, AF_xxx	*/
-	unsigned short		sa_family;
-	/* 14 bytes of protocol address	*/
-	char			sa_data[SADDR_MAX_DATA];
-	/* Number of bytes used in sa_data */
-	unsigned short		sa_data_len;
-};
-
-/* Address structure */
-struct dst_network_ctl
-{
-	/* Socket type: datagram, stream...*/
-	unsigned int		type;
-	/* Let me guess, is it a Jupiter diameter? */
-	unsigned int		proto;
-	/* Peer's address */
-	struct saddr		addr;
-};
-
-struct dst_crypto_ctl
-{
-	/* Cipher and hash names */
-	char			cipher_algo[DST_NAMELEN];
-	char			hash_algo[DST_NAMELEN];
-
-	/* Key sizes. Can be zero for digest for example */
-	unsigned int		cipher_keysize, hash_keysize;
-	/* Alignment. Calculated by the DST itself. */
-	unsigned int		crypto_attached_size;
-	/* Number of threads to perform crypto operations */
-	int			thread_num;
-};
-
-/* Export security attributes have this bits checked in when client connects */
-#define DST_PERM_READ		(1<<0)
-#define DST_PERM_WRITE		(1<<1)
-
-/*
- * Right now it is simple model, where each remote address
- * is assigned to set of permissions it is allowed to perform.
- * In real world block device does not know anything but
- * reading and writing, so it should be more than enough.
- */
-struct dst_secure_user
-{
-	unsigned int		permissions;
-	struct saddr		addr;
-};
-
-/*
- * Export control command: device to export and network address to accept
- * clients to work with given device
- */
-struct dst_export_ctl
-{
-	char			device[DST_NAMELEN];
-	struct dst_network_ctl	ctl;
-};
-
-enum {
-	DST_CFG	= 1, 		/* Request remote configuration */
-	DST_IO,			/* IO command */
-	DST_IO_RESPONSE,	/* IO response */
-	DST_PING,		/* Keepalive message */
-	DST_NCMD_MAX,
-};
-
-struct dst_cmd
-{
-	/* Network command itself, see above */
-	__u32			cmd;
-	/*
-	 * Size of the attached data
-	 * (in most cases, for READ command it means how many bytes were requested)
-	 */
-	__u32			size;
-	/* Crypto size: number of attached bytes with digest/hmac */
-	__u32			csize;
-	/* Here we can carry secret data */
-	__u32			reserved;
-	/* Read/write bits, see how they are encoded in bio structure */
-	__u64			rw;
-	/* BIO flags */
-	__u64			flags;
-	/* Unique command id (like transaction ID) */
-	__u64			id;
-	/* Sector to start IO from */
-	__u64			sector;
-	/* Hash data is placed after this header */
-	__u8			hash[0];
-};
-
-/*
- * Convert command to/from network byte order.
- * We do not use hton*() functions, since there is
- * no 64-bit implementation.
- */
-static inline void dst_convert_cmd(struct dst_cmd *c)
-{
-	c->cmd = __cpu_to_be32(c->cmd);
-	c->csize = __cpu_to_be32(c->csize);
-	c->size = __cpu_to_be32(c->size);
-	c->sector = __cpu_to_be64(c->sector);
-	c->id = __cpu_to_be64(c->id);
-	c->flags = __cpu_to_be64(c->flags);
-	c->rw = __cpu_to_be64(c->rw);
-}
-
-/* Transaction id */
-typedef __u64 dst_gen_t;
-
-#ifdef __KERNEL__
-
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/device.h>
-#include <linux/mempool.h>
-#include <linux/net.h>
-#include <linux/poll.h>
-#include <linux/rbtree.h>
-
-#ifdef CONFIG_DST_DEBUG
-#define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
-#else
-static inline void __attribute__ ((format (printf, 1, 2)))
-	dprintk(const char *fmt, ...) {}
-#endif
-
-struct dst_node;
-
-struct dst_trans
-{
-	/* DST node we are working with */
-	struct dst_node		*n;
-
-	/* Entry inside transaction tree */
-	struct rb_node		trans_entry;
-
-	/* Merlin kills this transaction when this memory cell equals zero */
-	atomic_t		refcnt;
-
-	/* How this transaction should be processed by crypto engine */
-	short			enc;
-	/* How many times this transaction was resent */
-	short			retries;
-	/* Completion status */
-	int			error;
-
-	/* When did we send it to the remote peer */
-	long			send_time;
-
-	/* My name is...
-	 * Well, computers does not speak, they have unique id instead */
-	dst_gen_t		gen;
-
-	/* Block IO we are working with */
-	struct bio		*bio;
-
-	/* Network command for above block IO request */
-	struct dst_cmd		cmd;
-};
-
-struct dst_crypto_engine
-{
-	/* What should we do with all block requests */
-	struct crypto_hash	*hash;
-	struct crypto_ablkcipher	*cipher;
-
-	/* Pool of pages used to encrypt data into before sending */
-	int			page_num;
-	struct page		**pages;
-
-	/* What to do with current request */
-	int			enc;
-	/* Who we are and where do we go */
-	struct scatterlist	*src, *dst;
-
-	/* Maximum timeout waiting for encryption to be completed */
-	long			timeout;
-	/* IV is a 64-bit sequential counter */
-	u64			iv;
-
-	/* Secret data */
-	void			*private;
-
-	/* Cached temporary data lives here */
-	int			size;
-	void			*data;
-};
-
-struct dst_state
-{
-	/* The main state protection */
-	struct mutex		state_lock;
-
-	/* Polling machinery for sockets */
-	wait_queue_t 		wait;
-	wait_queue_head_t 	*whead;
-	/* Most of events are being waited here */
-	wait_queue_head_t 	thread_wait;
-
-	/* Who owns this? */
-	struct dst_node		*node;
-
-	/* Network address for this state */
-	struct dst_network_ctl	ctl;
-
-	/* Permissions to work with: read-only or rw connection */
-	u32			permissions;
-
-	/* Called when we need to clean private data */
-	void			(* cleanup)(struct dst_state *st);
-
-	/* Used by the server: BIO completion queues BIOs here */
-	struct list_head	request_list;
-	spinlock_t		request_lock;
-
-	/* Guess what? No, it is not number of planets */
-	atomic_t		refcnt;
-
-	/* This flags is set when connection should be dropped */
-	int			need_exit;
-
-	/*
-	 * Socket to work with. Second pointer is used for
-	 * lockless check if socket was changed before performing
-	 * next action (like working with cached polling result)
-	 */
-	struct socket		*socket, *read_socket;
-
-	/* Cached preallocated data */
-	void			*data;
-	unsigned int		size;
-
-	/* Currently processed command */
-	struct dst_cmd		cmd;
-};
-
-struct dst_info
-{
-	/* Device size */
-	u64			size;
-
-	/* Local device name for export devices */
-	char			local[DST_NAMELEN];
-
-	/* Network setup */
-	struct dst_network_ctl	net;
-
-	/* Sysfs bits use this */
-	struct device		device;
-};
-
-struct dst_node
-{
-	struct list_head	node_entry;
-
-	/* Hi, my name is stored here */
-	char			name[DST_NAMELEN];
-	/* My cache name is stored here */
-	char			cache_name[DST_NAMELEN];
-
-	/* Block device attached to given node.
-	 * Only valid for exporting nodes */
-	struct block_device 	*bdev;
-	/* Network state machine for given peer */
-	struct dst_state	*state;
-
-	/* Block IO machinery */
-	struct request_queue	*queue;
-	struct gendisk		*disk;
-
-	/* Number of threads in processing pool */
-	int			thread_num;
-	/* Maximum number of pages in single IO */
-	int			max_pages;
-
-	/* I'm that big in bytes */
-	loff_t			size;
-
-	/* Exported to userspace node information */
-	struct dst_info		*info;
-
-	/*
-	 * Security attribute list.
-	 * Used only by exporting node currently.
-	 */
-	struct list_head	security_list;
-	struct mutex		security_lock;
-
-	/*
-	 * When this unerflows below zero, university collapses.
-	 * But this will not happen, since node will be freed,
-	 * when reference counter reaches zero.
-	 */
-	atomic_t		refcnt;
-
-	/* How precisely should I be started? */
-	int 			(*start)(struct dst_node *);
-
-	/* Crypto capabilities */
-	struct dst_crypto_ctl	crypto;
-	u8			*hash_key;
-	u8			*cipher_key;
-
-	/* Pool of processing thread */
-	struct thread_pool	*pool;
-
-	/* Transaction IDs live here */
-	atomic_long_t		gen;
-
-	/*
-	 * How frequently and how many times transaction
-	 * tree should be scanned to drop stale objects.
-	 */
-	long			trans_scan_timeout;
-	int			trans_max_retries;
-
-	/* Small gnomes live here */
-	struct rb_root		trans_root;
-	struct mutex		trans_lock;
-
-	/*
-	 * Transaction cache/memory pool.
-	 * It is big enough to contain not only transaction
-	 * itself, but additional crypto data (digest/hmac).
-	 */
-	struct kmem_cache	*trans_cache;
-	mempool_t		*trans_pool;
-
-	/* This entity scans transaction tree */
-	struct delayed_work 	trans_work;
-
-	wait_queue_head_t	wait;
-};
-
-/* Kernel representation of the security attribute */
-struct dst_secure
-{
-	struct list_head	sec_entry;
-	struct dst_secure_user	sec;
-};
-
-int dst_process_bio(struct dst_node *n, struct bio *bio);
-
-int dst_node_init_connected(struct dst_node *n, struct dst_network_ctl *r);
-int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le);
-
-static inline struct dst_state *dst_state_get(struct dst_state *st)
-{
-	BUG_ON(atomic_read(&st->refcnt) == 0);
-	atomic_inc(&st->refcnt);
-	return st;
-}
-
-void dst_state_put(struct dst_state *st);
-
-struct dst_state *dst_state_alloc(struct dst_node *n);
-int dst_state_socket_create(struct dst_state *st);
-void dst_state_socket_release(struct dst_state *st);
-
-void dst_state_exit_connected(struct dst_state *st);
-
-int dst_state_schedule_receiver(struct dst_state *st);
-
-void dst_dump_addr(struct socket *sk, struct sockaddr *sa, char *str);
-
-static inline void dst_state_lock(struct dst_state *st)
-{
-	mutex_lock(&st->state_lock);
-}
-
-static inline void dst_state_unlock(struct dst_state *st)
-{
-	mutex_unlock(&st->state_lock);
-}
-
-void dst_poll_exit(struct dst_state *st);
-int dst_poll_init(struct dst_state *st);
-
-static inline unsigned int dst_state_poll(struct dst_state *st)
-{
-	unsigned int revents = POLLHUP | POLLERR;
-
-	dst_state_lock(st);
-	if (st->socket)
-		revents = st->socket->ops->poll(NULL, st->socket, NULL);
-	dst_state_unlock(st);
-
-	return revents;
-}
-
-static inline int dst_thread_setup(void *private, void *data)
-{
-	return 0;
-}
-
-void dst_node_put(struct dst_node *n);
-
-static inline struct dst_node *dst_node_get(struct dst_node *n)
-{
-	atomic_inc(&n->refcnt);
-	return n;
-}
-
-int dst_data_recv(struct dst_state *st, void *data, unsigned int size);
-int dst_recv_cdata(struct dst_state *st, void *cdata);
-int dst_data_send_header(struct socket *sock,
-		void *data, unsigned int size, int more);
-
-int dst_send_bio(struct dst_state *st, struct dst_cmd *cmd, struct bio *bio);
-
-int dst_process_io(struct dst_state *st);
-int dst_export_crypto(struct dst_node *n, struct bio *bio);
-int dst_export_send_bio(struct bio *bio);
-int dst_start_export(struct dst_node *n);
-
-int __init dst_export_init(void);
-void dst_export_exit(void);
-
-/* Private structure for export block IO requests */
-struct dst_export_priv
-{
-	struct list_head		request_entry;
-	struct dst_state		*state;
-	struct bio			*bio;
-	struct dst_cmd			cmd;
-};
-
-static inline void dst_trans_get(struct dst_trans *t)
-{
-	atomic_inc(&t->refcnt);
-}
-
-struct dst_trans *dst_trans_search(struct dst_node *node, dst_gen_t gen);
-int dst_trans_remove(struct dst_trans *t);
-int dst_trans_remove_nolock(struct dst_trans *t);
-void dst_trans_put(struct dst_trans *t);
-
-/*
- * Convert bio into network command.
- */
-static inline void dst_bio_to_cmd(struct bio *bio, struct dst_cmd *cmd,
-		u32 command, u64 id)
-{
-	cmd->cmd = command;
-	cmd->flags = (bio->bi_flags << BIO_POOL_BITS) >> BIO_POOL_BITS;
-	cmd->rw = bio->bi_rw;
-	cmd->size = bio->bi_size;
-	cmd->csize = 0;
-	cmd->id = id;
-	cmd->sector = bio->bi_sector;
-};
-
-int dst_trans_send(struct dst_trans *t);
-int dst_trans_crypto(struct dst_trans *t);
-
-int dst_node_crypto_init(struct dst_node *n, struct dst_crypto_ctl *ctl);
-void dst_node_crypto_exit(struct dst_node *n);
-
-static inline int dst_need_crypto(struct dst_node *n)
-{
-	struct dst_crypto_ctl *c = &n->crypto;
-	/*
-	 * Logical OR is appropriate here, but boolean one produces
-	 * more optimal code, so it is used instead.
-	 */
-	return (c->hash_algo[0] | c->cipher_algo[0]);
-}
-
-int dst_node_trans_init(struct dst_node *n, unsigned int size);
-void dst_node_trans_exit(struct dst_node *n);
-
-/*
- * Pool of threads.
- * Ready list contains threads currently free to be used,
- * active one contains threads with some work scheduled for them.
- * Caller can wait in given queue when thread is ready.
- */
-struct thread_pool
-{
-	int			thread_num;
-	struct mutex		thread_lock;
-	struct list_head	ready_list, active_list;
-
-	wait_queue_head_t	wait;
-};
-
-void thread_pool_del_worker(struct thread_pool *p);
-void thread_pool_del_worker_id(struct thread_pool *p, unsigned int id);
-int thread_pool_add_worker(struct thread_pool *p,
-		char *name,
-		unsigned int id,
-		void *(* init)(void *data),
-		void (* cleanup)(void *data),
-		void *data);
-
-void thread_pool_destroy(struct thread_pool *p);
-struct thread_pool *thread_pool_create(int num, char *name,
-		void *(* init)(void *data),
-		void (* cleanup)(void *data),
-		void *data);
-
-int thread_pool_schedule(struct thread_pool *p,
-		int (* setup)(void *stored_private, void *setup_data),
-		int (* action)(void *stored_private, void *setup_data),
-		void *setup_data, long timeout);
-int thread_pool_schedule_private(struct thread_pool *p,
-		int (* setup)(void *private, void *data),
-		int (* action)(void *private, void *data),
-		void *data, long timeout, void *id);
-
-#endif /* __KERNEL__ */
-#endif /* __DST_H */
-- 
cgit v1.2.3


From 31d12926e37291970dd4f6e9940df3897766a81d Mon Sep 17 00:00:00 2001
From: laurent chavey <chavey@google.com>
Date: Tue, 15 Dec 2009 11:15:28 +0000
Subject: net: Add rtnetlink init_rcvwnd to set the TCP initial receive window

Add rtnetlink init_rcvwnd to set the TCP initial receive window size
advertised by passive and active TCP connections.
The current Linux TCP implementation limits the advertised TCP initial
receive window to the one prescribed by slow start. For short lived
TCP connections used for transaction type of traffic (i.e. http
requests), bounding the advertised TCP initial receive window results
in increased latency to complete the transaction.
Support for setting initial congestion window is already supported
using rtnetlink init_cwnd, but the feature is useless without the
ability to set a larger TCP initial receive window.
The rtnetlink init_rcvwnd allows increasing the TCP initial receive
window, allowing TCP connection to advertise larger TCP receive window
than the ones bounded by slow start.

Signed-off-by: Laurent Chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  2 ++
 include/net/dst.h         |  2 --
 include/net/tcp.h         |  3 ++-
 net/ipv4/syncookies.c     |  3 ++-
 net/ipv4/tcp_output.c     | 17 +++++++++++++----
 net/ipv6/syncookies.c     |  3 ++-
 6 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 05330fc5b436..9590364fe8b5 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -362,6 +362,8 @@ enum {
 #define RTAX_FEATURES RTAX_FEATURES
 	RTAX_RTO_MIN,
 #define RTAX_RTO_MIN RTAX_RTO_MIN
+	RTAX_INITRWND,
+#define RTAX_INITRWND RTAX_INITRWND
 	__RTAX_MAX
 };
 
diff --git a/include/net/dst.h b/include/net/dst.h
index 39c4a5963e12..ce078cda6b74 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -83,8 +83,6 @@ struct dst_entry {
 	 * (L1_CACHE_SIZE would be too much)
 	 */
 #ifdef CONFIG_64BIT
-	long			__pad_to_align_refcnt[2];
-#else
 	long			__pad_to_align_refcnt[1];
 #endif
 	/*
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 185e22baecb1..788c99f98597 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -965,7 +965,8 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 /* Determine a window scaling and initial window to offer. */
 extern void tcp_select_initial_window(int __space, __u32 mss,
 				      __u32 *rcv_wnd, __u32 *window_clamp,
-				      int wscale_ok, __u8 *rcv_wscale);
+				      int wscale_ok, __u8 *rcv_wscale,
+				      __u32 init_rcv_wnd);
 
 static inline int tcp_win_from_space(int space)
 {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 66fd80ef2473..5c24db4a3c91 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale,
+				  dst_metric(&rt->u.dst, RTAX_INITRWND));
 
 	ireq->rcv_wscale  = rcv_wscale;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 12b2af36eab8..4a1605d3f909 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
  */
 void tcp_select_initial_window(int __space, __u32 mss,
 			       __u32 *rcv_wnd, __u32 *window_clamp,
-			       int wscale_ok, __u8 *rcv_wscale)
+			       int wscale_ok, __u8 *rcv_wscale,
+			       __u32 init_rcv_wnd)
 {
 	unsigned int space = (__space < 0 ? 0 : __space);
 
@@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
 			init_cwnd = 2;
 		else if (mss > 1460)
 			init_cwnd = 3;
-		if (*rcv_wnd > init_cwnd * mss)
+		/* when initializing use the value from init_rcv_wnd
+		 * rather than the default from above
+		 */
+		if (init_rcv_wnd &&
+		    (*rcv_wnd > init_rcv_wnd * mss))
+			*rcv_wnd = init_rcv_wnd * mss;
+		else if (*rcv_wnd > init_cwnd * mss)
 			*rcv_wnd = init_cwnd * mss;
 	}
 
@@ -2417,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			&req->rcv_wnd,
 			&req->window_clamp,
 			ireq->wscale_ok,
-			&rcv_wscale);
+			&rcv_wscale,
+			dst_metric(dst, RTAX_INITRWND));
 		ireq->rcv_wscale = rcv_wscale;
 	}
 
@@ -2544,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk)
 				  &tp->rcv_wnd,
 				  &tp->window_clamp,
 				  sysctl_tcp_window_scaling,
-				  &rcv_wscale);
+				  &rcv_wscale,
+				  dst_metric(dst, RTAX_INITRWND));
 
 	tp->rx_opt.rcv_wscale = rcv_wscale;
 	tp->rcv_ssthresh = tp->rcv_wnd;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7208a06576c6..34d1f0690d7e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -269,7 +269,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
-				  ireq->wscale_ok, &rcv_wscale);
+				  ireq->wscale_ok, &rcv_wscale,
+				  dst_metric(dst, RTAX_INITRWND));
 
 	ireq->rcv_wscale = rcv_wscale;
 
-- 
cgit v1.2.3


From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 25 Dec 2009 17:30:22 -0800
Subject: net: restore ip source validation

when using policy routing and the skb mark:
there are cases where a back path validation requires us
to use a different routing table for src ip validation than
the one used for mapping ingress dst ip.
One such a case is transparent proxying where we pretend to be
the destination system and therefore the local table
is used for incoming packets but possibly a main table would
be used on outbound.
Make the default behavior to allow the above and if users
need to turn on the symmetry via sysctl src_valid_mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 1 +
 include/linux/sysctl.h     | 1 +
 net/ipv4/devinet.c         | 1 +
 net/ipv4/fib_frontend.c    | 2 ++
 4 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 699e85c01a4d..b2304929434e 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
 #define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
+#define IN_DEV_SRC_VMARK(in_dev)    	IN_DEV_ORCONF((in_dev), SRC_VMARK)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
 						       ACCEPT_SOURCE_ROUTE)
 #define IN_DEV_ACCEPT_LOCAL(in_dev)	IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 877ba039e6a4..bd27fbc9db62 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
 	NET_IPV4_CONF_ACCEPT_LOCAL=23,
+	NET_IPV4_CONF_SRC_VMARK=24,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..040c4f05b653 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
 					"accept_source_route"),
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
+		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3323168ee52d..82dbf711d6d0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
 		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
+		if (mark && !IN_DEV_SRC_VMARK(in_dev))
+			fl.mark = 0;
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From e5cd6fe391aa8c93560bb7ffdfe334cf4d0a02e4 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sat, 26 Dec 2009 11:51:00 +0000
Subject: llc: add support for LLC_OPT_PKTINFO

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/llc.h    |  7 +++++++
 include/net/llc_conn.h |  1 +
 net/llc/af_llc.c       | 29 +++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/llc.h b/include/linux/llc.h
index 7733585603f1..ad7074ba81af 100644
--- a/include/linux/llc.h
+++ b/include/linux/llc.h
@@ -36,6 +36,7 @@ enum llc_sockopts {
 	LLC_OPT_BUSY_TMR_EXP,	/* busy state expire time (secs). */
 	LLC_OPT_TX_WIN,		/* tx window size. */
 	LLC_OPT_RX_WIN,		/* rx window size. */
+	LLC_OPT_PKTINFO,	/* ancillary packet information. */
 	LLC_OPT_MAX
 };
 
@@ -70,6 +71,12 @@ enum llc_sockopts {
 #define LLC_SAP_RM	0xD4		/* Resource Management 		*/
 #define LLC_SAP_GLOBAL	0xFF		/* Global SAP. 			*/
 
+struct llc_pktinfo {
+	int lpi_ifindex;
+	unsigned char lpi_sap;
+	unsigned char lpi_mac[IFHWADDRLEN];
+};
+
 #ifdef __KERNEL__
 #define LLC_SAP_DYN_START	0xC0
 #define LLC_SAP_DYN_STOP	0xDE
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index e2374e34989f..fe982fd94c4a 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -76,6 +76,7 @@ struct llc_sock {
 	u32		    rx_pdu_hdr;	   /* used for saving header of last pdu
 					      received and caused sending FRMR.
 					      Used for resending FRMR */
+	u32		    cmsg_flags;
 };
 
 static inline struct llc_sock *llc_sk(const struct sock *sk)
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 3a66546cad06..ac691fe08076 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -47,6 +47,10 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
 #define dprintk(args...)
 #endif
 
+/* Maybe we'll add some more in the future. */
+#define LLC_CMSG_PKTINFO	1
+
+
 /**
  *	llc_ui_next_link_no - return the next unused link number for a sap
  *	@sap: Address of sap to get link number from.
@@ -591,6 +595,20 @@ static int llc_wait_data(struct sock *sk, long timeo)
 	return rc;
 }
 
+static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct llc_sock *llc = llc_sk(skb->sk);
+
+	if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
+		struct llc_pktinfo info;
+
+		info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
+		llc_pdu_decode_dsap(skb, &info.lpi_sap);
+		llc_pdu_decode_da(skb, info.lpi_mac);
+		put_cmsg(msg, SOL_LLC, LLC_OPT_PKTINFO, sizeof(info), &info);
+	}
+}
+
 /**
  *	llc_ui_accept - accept a new incoming connection.
  *	@sock: Socket which connections arrive on.
@@ -812,6 +830,8 @@ copy_uaddr:
 		memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
 		msg->msg_namelen = sizeof(*uaddr);
 	}
+	if (llc_sk(sk)->cmsg_flags)
+		llc_cmsg_rcv(msg, skb);
 	goto out;
 }
 
@@ -1030,6 +1050,12 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
 			goto out;
 		llc->rw = opt;
 		break;
+	case LLC_OPT_PKTINFO:
+		if (opt)
+			llc->cmsg_flags |= LLC_CMSG_PKTINFO;
+		else
+			llc->cmsg_flags &= ~LLC_CMSG_PKTINFO;
+		break;
 	default:
 		rc = -ENOPROTOOPT;
 		goto out;
@@ -1083,6 +1109,9 @@ static int llc_ui_getsockopt(struct socket *sock, int level, int optname,
 		val = llc->k;				break;
 	case LLC_OPT_RX_WIN:
 		val = llc->rw;				break;
+	case LLC_OPT_PKTINFO:
+		val = (llc->cmsg_flags & LLC_CMSG_PKTINFO) != 0;
+		break;
 	default:
 		rc = -ENOPROTOOPT;
 		goto out;
-- 
cgit v1.2.3


From 49f474331e563a6ecf3b1e87ec27ec5482b3e4f1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sun, 27 Dec 2009 11:51:52 +0100
Subject: perf events: Remove arg from perf sched hooks

Since we only ever schedule the local cpu, there is no need to pass the
cpu number to the perf sched hooks.

This micro-optimizes things a bit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 12 ++++++------
 kernel/perf_event.c        | 27 ++++++++++++++-------------
 kernel/sched.c             |  6 +++---
 3 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f75eea..a494e7501292 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -746,10 +746,10 @@ extern int perf_max_events;
 
 extern const struct pmu *hw_perf_event_init(struct perf_event *event);
 
-extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_event_task_sched_in(struct task_struct *task);
 extern void perf_event_task_sched_out(struct task_struct *task,
-					struct task_struct *next, int cpu);
-extern void perf_event_task_tick(struct task_struct *task, int cpu);
+					struct task_struct *next);
+extern void perf_event_task_tick(struct task_struct *task);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
@@ -870,12 +870,12 @@ extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 #else
 static inline void
-perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
+perf_event_task_sched_in(struct task_struct *task)			{ }
 static inline void
 perf_event_task_sched_out(struct task_struct *task,
-			    struct task_struct *next, int cpu)		{ }
+			    struct task_struct *next)			{ }
 static inline void
-perf_event_task_tick(struct task_struct *task, int cpu)			{ }
+perf_event_task_tick(struct task_struct *task)				{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 03cc061398d1..099bd662daa6 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1170,9 +1170,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
  * not restart the event.
  */
 void perf_event_task_sched_out(struct task_struct *task,
-				 struct task_struct *next, int cpu)
+				 struct task_struct *next)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = task->perf_event_ctxp;
 	struct perf_event_context *next_ctx;
 	struct perf_event_context *parent;
@@ -1252,8 +1252,9 @@ static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
 
 static void
 __perf_event_sched_in(struct perf_event_context *ctx,
-			struct perf_cpu_context *cpuctx, int cpu)
+			struct perf_cpu_context *cpuctx)
 {
+	int cpu = smp_processor_id();
 	struct perf_event *event;
 	int can_add_hw = 1;
 
@@ -1326,24 +1327,24 @@ __perf_event_sched_in(struct perf_event_context *ctx,
  * accessing the event control register. If a NMI hits, then it will
  * keep the event running.
  */
-void perf_event_task_sched_in(struct task_struct *task, int cpu)
+void perf_event_task_sched_in(struct task_struct *task)
 {
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 	struct perf_event_context *ctx = task->perf_event_ctxp;
 
 	if (likely(!ctx))
 		return;
 	if (cpuctx->task_ctx == ctx)
 		return;
-	__perf_event_sched_in(ctx, cpuctx, cpu);
+	__perf_event_sched_in(ctx, cpuctx);
 	cpuctx->task_ctx = ctx;
 }
 
-static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx)
 {
 	struct perf_event_context *ctx = &cpuctx->ctx;
 
-	__perf_event_sched_in(ctx, cpuctx, cpu);
+	__perf_event_sched_in(ctx, cpuctx);
 }
 
 #define MAX_INTERRUPTS (~0ULL)
@@ -1461,7 +1462,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
 	raw_spin_unlock(&ctx->lock);
 }
 
-void perf_event_task_tick(struct task_struct *curr, int cpu)
+void perf_event_task_tick(struct task_struct *curr)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
@@ -1469,7 +1470,7 @@ void perf_event_task_tick(struct task_struct *curr, int cpu)
 	if (!atomic_read(&nr_events))
 		return;
 
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	cpuctx = &__get_cpu_var(perf_cpu_context);
 	ctx = curr->perf_event_ctxp;
 
 	perf_ctx_adjust_freq(&cpuctx->ctx);
@@ -1484,9 +1485,9 @@ void perf_event_task_tick(struct task_struct *curr, int cpu)
 	if (ctx)
 		rotate_ctx(ctx);
 
-	perf_event_cpu_sched_in(cpuctx, cpu);
+	perf_event_cpu_sched_in(cpuctx);
 	if (ctx)
-		perf_event_task_sched_in(curr, cpu);
+		perf_event_task_sched_in(curr);
 }
 
 /*
@@ -1527,7 +1528,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 
 	raw_spin_unlock(&ctx->lock);
 
-	perf_event_task_sched_in(task, smp_processor_id());
+	perf_event_task_sched_in(task);
  out:
 	local_irq_restore(flags);
 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 18cceeecce35..d6527ac0f6e7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2752,7 +2752,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
-	perf_event_task_sched_in(current, cpu_of(rq));
+	perf_event_task_sched_in(current);
 	finish_lock_switch(rq, prev);
 
 	fire_sched_in_preempt_notifiers(current);
@@ -5266,7 +5266,7 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	raw_spin_unlock(&rq->lock);
 
-	perf_event_task_tick(curr, cpu);
+	perf_event_task_tick(curr);
 
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
@@ -5480,7 +5480,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
-		perf_event_task_sched_out(prev, next, cpu);
+		perf_event_task_sched_out(prev, next);
 
 		rq->nr_switches++;
 		rq->curr = next;
-- 
cgit v1.2.3


From 07b139c8c81b97bbe55c68daf0cbeca8b1c609ca Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 21 Dec 2009 14:27:35 +0800
Subject: perf events: Remove CONFIG_EVENT_PROFILE

Quoted from Ingo:

| This reminds me - i think we should eliminate CONFIG_EVENT_PROFILE -
| it's an unnecessary Kconfig complication. If both PERF_EVENTS and
| EVENT_TRACING is enabled we should expose generic tracepoints.
|
| Nor is it limited to event 'profiling', so it has become a misnomer as
| well.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <4B2F1557.2050705@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  2 +-
 include/linux/perf_event.h         |  2 +-
 include/linux/syscalls.h           |  4 ++--
 include/trace/ftrace.h             | 12 ++++++------
 include/trace/syscall.h            |  4 ++--
 init/Kconfig                       | 13 -------------
 kernel/perf_event.c                |  4 ++--
 kernel/trace/Makefile              |  4 +++-
 kernel/trace/trace_events_filter.c |  4 ++--
 kernel/trace/trace_kprobe.c        | 14 +++++++-------
 kernel/trace/trace_syscalls.c      |  5 ++---
 11 files changed, 28 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2233c98d80df..0a09e758c7d3 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -188,7 +188,7 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 struct perf_event;
 extern int ftrace_profile_enable(int event_id);
 extern void ftrace_profile_disable(int event_id);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a494e7501292..9a1d276db754 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -658,7 +658,7 @@ struct perf_event {
 
 	perf_overflow_handler_t		overflow_handler;
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
 	struct event_filter		*filter;
 #endif
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 65793e90d6f6..b7c7fcf7790b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,7 +99,7 @@ struct perf_event_attr;
 #define __SC_TEST5(t5, a5, ...)	__SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
 #define __SC_TEST6(t6, a6, ...)	__SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
 	.profile_enable = prof_sysenter_enable,				       \
@@ -113,7 +113,7 @@ struct perf_event_attr;
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)
 #define TRACE_SYS_EXIT_PROFILE(sname)
 #define TRACE_SYS_EXIT_PROFILE_INIT(sname)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 #define __SC_STR_ADECL1(t, a)		#a
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 73523151a731..2fdd36df41f6 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -498,7 +498,7 @@ static inline int ftrace_get_offsets_##call(				\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 /*
  * Generate the functions needed for tracepoint perf_event support.
@@ -541,7 +541,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#endif
+#endif /* CONFIG_PERF_EVENTS */
 
 /*
  * Stage 4 of the trace events.
@@ -626,7 +626,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *
  */
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 #define _TRACE_PROFILE_INIT(call)					\
 	.profile_enable = ftrace_profile_enable_##call,			\
@@ -634,7 +634,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 
 #else
 #define _TRACE_PROFILE_INIT(call)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
 
 #undef __entry
 #define __entry entry
@@ -834,7 +834,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  * }
  */
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 #undef __perf_addr
 #define __perf_addr(a) __addr = (a)
@@ -926,7 +926,7 @@ static void ftrace_profile_##call(proto)			\
 	DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
 
 #undef _TRACE_PROFILE_INIT
 
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 961fda3556bb..3d463dcef298 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -49,12 +49,12 @@ ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
-#ifdef CONFIG_EVENT_PROFILE
+
+#ifdef CONFIG_PERF_EVENTS
 int prof_sysenter_enable(struct ftrace_event_call *call);
 void prof_sysenter_disable(struct ftrace_event_call *call);
 int prof_sysexit_enable(struct ftrace_event_call *call);
 void prof_sysexit_disable(struct ftrace_event_call *call);
-
 #endif
 
 #endif /* _TRACE_SYSCALL_H */
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..06dab27c18d9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -966,19 +966,6 @@ config PERF_EVENTS
 
 	  Say Y if unsure.
 
-config EVENT_PROFILE
-	bool "Tracepoint profiling sources"
-	depends on PERF_EVENTS && EVENT_TRACING
-	default y
-	help
-	 Allow the use of tracepoints as software performance events.
-
-	 When this is enabled, you can create perf events based on
-	 tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
-	 found in debugfs://tracing/events/*/*/id. (The -e/--events
-	 option to the perf tool can parse and interpret symbolic
-	 tracepoints, in the subsystem:tracepoint_name format.)
-
 config PERF_COUNTERS
 	bool "Kernel performance counters (old config option)"
 	depends on HAVE_PERF_EVENTS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 099bd662daa6..5b987b4a98a8 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4177,7 +4177,7 @@ static const struct pmu perf_ops_task_clock = {
 	.read		= task_clock_perf_event_read,
 };
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
 
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
@@ -4282,7 +4282,7 @@ static void perf_event_free_filter(struct perf_event *event)
 {
 }
 
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_EVENT_TRACING */
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 static void bp_perf_event_destroy(struct perf_event *event)
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..d00c6fe23f54 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@ endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events.o
 obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
-obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
+ifeq ($(CONFIG_PERF_EVENTS),y)
+obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
+endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 50504cb228de..74563d7e102e 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1360,7 +1360,7 @@ out_unlock:
 	return err;
 }
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 void ftrace_profile_free_filter(struct perf_event *event)
 {
@@ -1428,5 +1428,5 @@ out_unlock:
 	return err;
 }
 
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 375f81a568dc..75d75dec226a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1249,7 +1249,7 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
 					 ", REC->" FIELD_STRING_RETIP);
 }
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
 static __kprobes int kprobe_profile_func(struct kprobe *kp,
@@ -1407,7 +1407,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
 			disable_kprobe(&tp->rp.kp);
 	}
 }
-#endif	/* CONFIG_EVENT_PROFILE */
+#endif	/* CONFIG_PERF_EVENTS */
 
 
 static __kprobes
@@ -1417,10 +1417,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 
 	if (tp->flags & TP_FLAG_TRACE)
 		kprobe_trace_func(kp, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
 		kprobe_profile_func(kp, regs);
-#endif	/* CONFIG_EVENT_PROFILE */
+#endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
 
@@ -1431,10 +1431,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 
 	if (tp->flags & TP_FLAG_TRACE)
 		kretprobe_trace_func(ri, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
 		kretprobe_profile_func(ri, regs);
-#endif	/* CONFIG_EVENT_PROFILE */
+#endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
 
@@ -1463,7 +1463,7 @@ static int register_probe_event(struct trace_probe *tp)
 	call->regfunc = probe_event_enable;
 	call->unregfunc = probe_event_disable;
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 	call->profile_enable = probe_profile_enable;
 	call->profile_disable = probe_profile_disable;
 #endif
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f372dd2..f694f66d75b0 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void)
 }
 core_initcall(init_ftrace_syscalls);
 
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
 
 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -626,6 +626,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
 	mutex_unlock(&syscall_trace_lock);
 }
 
-#endif
-
+#endif /* CONFIG_PERF_EVENTS */
 
-- 
cgit v1.2.3


From d894837f23f491aa7ed167aae767fc07cfe6e6e6 Mon Sep 17 00:00:00 2001
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Wed, 23 Dec 2009 11:08:18 +0100
Subject: sched: might_sleep(): Make file parameter const char *

Fixes a warning when building with g++:

 warning: deprecated conversion from string constant to 'char*'

And the file parameter use is constant, so mark it as such.

Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Cc: peterz@infradead.org
LKML-Reference: <20091223110818.442d848e@marrow.netinsight.se>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 5 +++--
 kernel/sched.c         | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fc9f5aab5f8..785d7d1099d4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -124,7 +124,7 @@ extern int _cond_resched(void);
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-  void __might_sleep(char *file, int line, int preempt_offset);
+  void __might_sleep(const char *file, int line, int preempt_offset);
 /**
  * might_sleep - annotation for functions that can sleep
  *
@@ -138,7 +138,8 @@ extern int _cond_resched(void);
 # define might_sleep() \
 	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
 #else
-  static inline void __might_sleep(char *file, int line, int preempt_offset) { }
+  static inline void __might_sleep(const char *file, int line,
+				   int preempt_offset) { }
 # define might_sleep() do { might_resched(); } while (0)
 #endif
 
diff --git a/kernel/sched.c b/kernel/sched.c
index c535cc4f6428..64298a52eaa6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9694,7 +9694,7 @@ static inline int preempt_count_equals(int preempt_offset)
 	return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
 }
 
-void __might_sleep(char *file, int line, int preempt_offset)
+void __might_sleep(const char *file, int line, int preempt_offset)
 {
 #ifdef in_atomic
 	static unsigned long prev_jiffy;	/* ratelimiting */
-- 
cgit v1.2.3


From 8e664fb3fd2b04e3ac5fad7f046000ba54e0e275 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 23 Dec 2009 13:15:38 +0100
Subject: mac80211: split up and insert custom IEs correctly

Currently, we insert all user-specified IEs before the HT
IE for association, and after the HT IE for probe requests.
For association, that's correct only if the user-specified
IEs are RSN only, incorrect in all other cases including
WPA. Change this to split apart the user-specified IEs in
two places for association: before the HT IE (e.g. RSN),
after the HT IE (generally empty right now I think?) and
after WMM (all other vendor-specific IEs). For probes,
split the IEs in different places to be correct according
to the spec.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  39 ++++++++++---
 net/mac80211/ieee80211_i.h |   4 ++
 net/mac80211/util.c        | 134 ++++++++++++++++++++++++++++++++++++++-------
 net/mac80211/work.c        |  43 ++++++++++++---
 4 files changed, 184 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d62edc7df3ae..aeea282bd2fe 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1085,12 +1085,12 @@ enum ieee80211_eid {
 	WLAN_EID_TIM = 5,
 	WLAN_EID_IBSS_PARAMS = 6,
 	WLAN_EID_CHALLENGE = 16,
-	/* 802.11d */
+
 	WLAN_EID_COUNTRY = 7,
 	WLAN_EID_HP_PARAMS = 8,
 	WLAN_EID_HP_TABLE = 9,
 	WLAN_EID_REQUEST = 10,
-	/* 802.11e */
+
 	WLAN_EID_QBSS_LOAD = 11,
 	WLAN_EID_EDCA_PARAM_SET = 12,
 	WLAN_EID_TSPEC = 13,
@@ -1113,7 +1113,7 @@ enum ieee80211_eid {
 	WLAN_EID_PREP = 69,
 	WLAN_EID_PERR = 70,
 	WLAN_EID_RANN = 49,	/* compatible with FreeBSD */
-	/* 802.11h */
+
 	WLAN_EID_PWR_CONSTRAINT = 32,
 	WLAN_EID_PWR_CAPABILITY = 33,
 	WLAN_EID_TPC_REQUEST = 34,
@@ -1124,20 +1124,41 @@ enum ieee80211_eid {
 	WLAN_EID_MEASURE_REPORT = 39,
 	WLAN_EID_QUIET = 40,
 	WLAN_EID_IBSS_DFS = 41,
-	/* 802.11g */
+
 	WLAN_EID_ERP_INFO = 42,
 	WLAN_EID_EXT_SUPP_RATES = 50,
-	/* 802.11n */
+
 	WLAN_EID_HT_CAPABILITY = 45,
 	WLAN_EID_HT_INFORMATION = 61,
-	/* 802.11i */
+
 	WLAN_EID_RSN = 48,
-	WLAN_EID_TIMEOUT_INTERVAL = 56,
-	WLAN_EID_MMIE = 76 /* 802.11w */,
+	WLAN_EID_MMIE = 76,
 	WLAN_EID_WPA = 221,
 	WLAN_EID_GENERIC = 221,
 	WLAN_EID_VENDOR_SPECIFIC = 221,
-	WLAN_EID_QOS_PARAMETER = 222
+	WLAN_EID_QOS_PARAMETER = 222,
+
+	WLAN_EID_AP_CHAN_REPORT = 51,
+	WLAN_EID_NEIGHBOR_REPORT = 52,
+	WLAN_EID_RCPI = 53,
+	WLAN_EID_BSS_AVG_ACCESS_DELAY = 63,
+	WLAN_EID_ANTENNA_INFO = 64,
+	WLAN_EID_RSNI = 65,
+	WLAN_EID_MEASUREMENT_PILOT_TX_INFO = 66,
+	WLAN_EID_BSS_AVAILABLE_CAPACITY = 67,
+	WLAN_EID_BSS_AC_ACCESS_DELAY = 68,
+	WLAN_EID_RRM_ENABLED_CAPABILITIES = 70,
+	WLAN_EID_MULTIPLE_BSSID = 71,
+
+	WLAN_EID_MOBILITY_DOMAIN = 54,
+	WLAN_EID_FAST_BSS_TRANSITION = 55,
+	WLAN_EID_TIMEOUT_INTERVAL = 56,
+	WLAN_EID_RIC_DATA = 57,
+	WLAN_EID_RIC_DESCRIPTOR = 75,
+
+	WLAN_EID_DSE_REGISTERED_LOCATION = 58,
+	WLAN_EID_SUPPORTED_REGULATORY_CLASSES = 59,
+	WLAN_EID_EXT_CHANSWITCH_ANN = 60,
 };
 
 /* Action category code */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 97b6076b492e..6ea4ffbf84d8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1148,6 +1148,10 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
 void ieee80211_recalc_smps(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *forsdata);
 
+size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
+			  const u8 *ids, int n_ids, size_t offset);
+size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset);
+
 /* internal work items */
 void ieee80211_work_init(struct ieee80211_local *local);
 void ieee80211_add_work(struct ieee80211_work *wk);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5ffe9e831b66..1fdb80ff9241 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -881,30 +881,66 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 			     enum ieee80211_band band)
 {
 	struct ieee80211_supported_band *sband;
-	u8 *pos, *supp_rates_len, *esupp_rates_len = NULL;
-	int i;
+	u8 *pos;
+	size_t offset = 0, noffset;
+	int supp_rates_len, i;
 
 	sband = local->hw.wiphy->bands[band];
 
 	pos = buffer;
 
+	supp_rates_len = min_t(int, sband->n_bitrates, 8);
+
 	*pos++ = WLAN_EID_SUPP_RATES;
-	supp_rates_len = pos;
-	*pos++ = 0;
-
-	for (i = 0; i < sband->n_bitrates; i++) {
-		struct ieee80211_rate *rate = &sband->bitrates[i];
-
-		if (esupp_rates_len) {
-			*esupp_rates_len += 1;
-		} else if (*supp_rates_len == 8) {
-			*pos++ = WLAN_EID_EXT_SUPP_RATES;
-			esupp_rates_len = pos;
-			*pos++ = 1;
-		} else
-			*supp_rates_len += 1;
+	*pos++ = supp_rates_len;
 
-		*pos++ = rate->bitrate / 5;
+	for (i = 0; i < supp_rates_len; i++) {
+		int rate = sband->bitrates[i].bitrate;
+		*pos++ = (u8) (rate / 5);
+	}
+
+	/* insert "request information" if in custom IEs */
+	if (ie && ie_len) {
+		static const u8 before_extrates[] = {
+			WLAN_EID_SSID,
+			WLAN_EID_SUPP_RATES,
+			WLAN_EID_REQUEST,
+		};
+		noffset = ieee80211_ie_split(ie, ie_len,
+					     before_extrates,
+					     ARRAY_SIZE(before_extrates),
+					     offset);
+		memcpy(pos, ie + offset, noffset - offset);
+		pos += noffset - offset;
+		offset = noffset;
+	}
+
+	if (sband->n_bitrates > i) {
+		*pos++ = WLAN_EID_EXT_SUPP_RATES;
+		*pos++ = sband->n_bitrates - i;
+
+		for (; i < sband->n_bitrates; i++) {
+			int rate = sband->bitrates[i].bitrate;
+			*pos++ = (u8) (rate / 5);
+		}
+	}
+
+	/* insert custom IEs that go before HT */
+	if (ie && ie_len) {
+		static const u8 before_ht[] = {
+			WLAN_EID_SSID,
+			WLAN_EID_SUPP_RATES,
+			WLAN_EID_REQUEST,
+			WLAN_EID_EXT_SUPP_RATES,
+			WLAN_EID_DS_PARAMS,
+			WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+		};
+		noffset = ieee80211_ie_split(ie, ie_len,
+					     before_ht, ARRAY_SIZE(before_ht),
+					     offset);
+		memcpy(pos, ie + offset, noffset - offset);
+		pos += noffset - offset;
+		offset = noffset;
 	}
 
 	if (sband->ht_cap.ht_supported) {
@@ -936,9 +972,11 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 	 * that calculates local->scan_ies_len.
 	 */
 
-	if (ie) {
-		memcpy(pos, ie, ie_len);
-		pos += ie_len;
+	/* add any remaining custom IEs */
+	if (ie && ie_len) {
+		noffset = ie_len;
+		memcpy(pos, ie + offset, noffset - offset);
+		pos += noffset - offset;
 	}
 
 	return pos - buffer;
@@ -1252,3 +1290,59 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
 	/* changed flag is auto-detected for this */
 	ieee80211_hw_config(local, 0);
 }
+
+static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
+{
+	int i;
+
+	for (i = 0; i < n_ids; i++)
+		if (ids[i] == id)
+			return true;
+	return false;
+}
+
+/**
+ * ieee80211_ie_split - split an IE buffer according to ordering
+ *
+ * @ies: the IE buffer
+ * @ielen: the length of the IE buffer
+ * @ids: an array with element IDs that are allowed before
+ *	the split
+ * @n_ids: the size of the element ID array
+ * @offset: offset where to start splitting in the buffer
+ *
+ * This function splits an IE buffer by updating the @offset
+ * variable to point to the location where the buffer should be
+ * split.
+ *
+ * It assumes that the given IE buffer is well-formed, this
+ * has to be guaranteed by the caller!
+ *
+ * It also assumes that the IEs in the buffer are ordered
+ * correctly, if not the result of using this function will not
+ * be ordered correctly either, i.e. it does no reordering.
+ *
+ * The function returns the offset where the next part of the
+ * buffer starts, which may be @ielen if the entire (remainder)
+ * of the buffer should be used.
+ */
+size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
+			  const u8 *ids, int n_ids, size_t offset)
+{
+	size_t pos = offset;
+
+	while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos]))
+		pos += 2 + ies[pos + 1];
+
+	return pos;
+}
+
+size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset)
+{
+	size_t pos = offset;
+
+	while (pos < ielen && ies[pos] != WLAN_EID_VENDOR_SPECIFIC)
+		pos += 2 + ies[pos + 1];
+
+	return pos;
+}
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index c03c22d5bca3..affdd10b67ad 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -204,6 +204,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos;
 	const u8 *ies;
+	size_t offset = 0, noffset;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_supported_band *sband;
@@ -337,14 +338,26 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	/*
-	 * XXX:	These IEs could contain (vendor-specified)
-	 *	IEs that belong after HT -- the buffer may
-	 *	need to be split up.
-	 */
+	/* if present, add any custom IEs that go before HT */
 	if (wk->ie_len && wk->ie) {
-		pos = skb_put(skb, wk->ie_len);
-		memcpy(pos, wk->ie, wk->ie_len);
+		static const u8 before_ht[] = {
+			WLAN_EID_SSID,
+			WLAN_EID_SUPP_RATES,
+			WLAN_EID_EXT_SUPP_RATES,
+			WLAN_EID_PWR_CAPABILITY,
+			WLAN_EID_SUPPORTED_CHANNELS,
+			WLAN_EID_RSN,
+			WLAN_EID_QOS_CAPA,
+			WLAN_EID_RRM_ENABLED_CAPABILITIES,
+			WLAN_EID_MOBILITY_DOMAIN,
+			WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
+		};
+		noffset = ieee80211_ie_split(wk->ie, wk->ie_len,
+					     before_ht, ARRAY_SIZE(before_ht),
+					     offset);
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, wk->ie + offset, noffset - offset);
+		offset = noffset;
 	}
 
 	if (wk->assoc.use_11n && wk->assoc.wmm_used &&
@@ -352,6 +365,15 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		ieee80211_add_ht_ie(skb, wk->assoc.ht_information_ie,
 				    sband, wk->chan, wk->assoc.smps);
 
+	/* if present, add any custom non-vendor IEs that go after HT */
+	if (wk->ie_len && wk->ie) {
+		noffset = ieee80211_ie_split_vendor(wk->ie, wk->ie_len,
+						    offset);
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, wk->ie + offset, noffset - offset);
+		offset = noffset;
+	}
+
 	if (wk->assoc.wmm_used && local->hw.queues >= 4) {
 		pos = skb_put(skb, 9);
 		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
@@ -365,6 +387,13 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		*pos++ = 0;
 	}
 
+	/* add any remaining custom (i.e. vendor specific here) IEs */
+	if (wk->ie_len && wk->ie) {
+		noffset = wk->ie_len;
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, wk->ie + offset, noffset - offset);
+	}
+
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	ieee80211_tx_skb(sdata, skb);
 }
-- 
cgit v1.2.3


From 9588bbd5529461a3dacd435bf239c84c3508f569 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Wed, 23 Dec 2009 13:15:41 +0100
Subject: cfg80211: add remain-on-channel command

Add new commands for requesting the driver to remain awake
on a specified channel for the specified amount of time
(and another command to cancel such an operation). This
can be used to implement userspace-controlled off-channel
operations, like Public Action frame exchange on another
channel than the operation channel.

The off-channel operation should behave similarly to scan,
i.e. the local station (if associated) moves into power
save mode to request the AP to buffer frames for it and
then moves to the other channel to allow the off-channel
operation to be completed. The duration parameter can be
used to request enough time to receive a response from
the target station.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  36 ++++++++
 include/net/cfg80211.h  |  47 +++++++++++
 net/wireless/chan.c     |  41 +++++----
 net/wireless/core.h     |   3 +
 net/wireless/mlme.c     |  27 ++++++
 net/wireless/nl80211.c  | 218 +++++++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/nl80211.h  |  11 +++
 7 files changed, 368 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index da8ea2e19273..2bfbe88837ef 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -270,6 +270,31 @@
  * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices
  *	associated with this wiphy must be down and will follow.
  *
+ * @NL80211_CMD_REMAIN_ON_CHANNEL: Request to remain awake on the specified
+ *	channel for the specified amount of time. This can be used to do
+ *	off-channel operations like transmit a Public Action frame and wait for
+ *	a response while being associated to an AP on another channel.
+ *	%NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify which
+ *	radio is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the
+ *	frequency for the operation and %NL80211_ATTR_WIPHY_CHANNEL_TYPE may be
+ *	optionally used to specify additional channel parameters.
+ *	%NL80211_ATTR_DURATION is used to specify the duration in milliseconds
+ *	to remain on the channel. This command is also used as an event to
+ *	notify when the requested duration starts (it may take a while for the
+ *	driver to schedule this time due to other concurrent needs for the
+ *	radio).
+ *	When called, this operation returns a cookie (%NL80211_ATTR_COOKIE)
+ *	that will be included with any events pertaining to this request;
+ *	the cookie is also used to cancel the request.
+ * @NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL: This command can be used to cancel a
+ *	pending remain-on-channel duration if the desired operation has been
+ *	completed prior to expiration of the originally requested duration.
+ *	%NL80211_ATTR_WIPHY or %NL80211_ATTR_IFINDEX is used to specify the
+ *	radio. The %NL80211_ATTR_COOKIE attribute must be given as well to
+ *	uniquely identify the request.
+ *	This command is also used as an event to notify when a requested
+ *	remain-on-channel duration has expired.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -353,6 +378,9 @@ enum nl80211_commands {
 	NL80211_CMD_DEL_PMKSA,
 	NL80211_CMD_FLUSH_PMKSA,
 
+	NL80211_CMD_REMAIN_ON_CHANNEL,
+	NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -606,6 +634,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_MAX_NUM_PMKIDS: maximum number of PMKIDs a firmware can
  *	cache, a wiphy attribute.
  *
+ * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32.
+ *
+ * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -743,6 +775,10 @@ enum nl80211_attrs {
 	NL80211_ATTR_PMKID,
 	NL80211_ATTR_MAX_NUM_PMKIDS,
 
+	NL80211_ATTR_DURATION,
+
+	NL80211_ATTR_COOKIE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 542a477a94da..b66beb052054 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -988,6 +988,15 @@ struct cfg80211_pmksa {
  *
  * @dump_survey: get site survey information.
  *
+ * @remain_on_channel: Request the driver to remain awake on the specified
+ *	channel for the specified duration to complete an off-channel
+ *	operation (e.g., public action frame exchange). When the driver is
+ *	ready on the requested channel, it must indicate this with an event
+ *	notification by calling cfg80211_ready_on_channel().
+ * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation.
+ *	This allows the operation to be terminated prior to timeout based on
+ *	the duration value.
+ *
  * @testmode_cmd: run a test mode command
  *
  * @set_pmksa: Cache a PMKID for a BSSID. This is mostly useful for fullmac
@@ -1123,6 +1132,16 @@ struct cfg80211_ops {
 			     struct cfg80211_pmksa *pmksa);
 	int	(*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev);
 
+	int	(*remain_on_channel)(struct wiphy *wiphy,
+				     struct net_device *dev,
+				     struct ieee80211_channel *chan,
+				     enum nl80211_channel_type channel_type,
+				     unsigned int duration,
+				     u64 *cookie);
+	int	(*cancel_remain_on_channel)(struct wiphy *wiphy,
+					    struct net_device *dev,
+					    u64 cookie);
+
 	/* some temporary stuff to finish wext */
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
@@ -2147,5 +2166,33 @@ void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
 			   u8 *ie, size_t ie_len, gfp_t gfp);
 
+/**
+ * cfg80211_ready_on_channel - notification of remain_on_channel start
+ * @dev: network device
+ * @cookie: the request cookie
+ * @chan: The current channel (from remain_on_channel request)
+ * @channel_type: Channel type
+ * @duration: Duration in milliseconds that the driver intents to remain on the
+ *	channel
+ * @gfp: allocation flags
+ */
+void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie,
+			       struct ieee80211_channel *chan,
+			       enum nl80211_channel_type channel_type,
+			       unsigned int duration, gfp_t gfp);
+
+/**
+ * cfg80211_remain_on_channel_expired - remain_on_channel duration expired
+ * @dev: network device
+ * @cookie: the request cookie
+ * @chan: The current channel (from remain_on_channel request)
+ * @channel_type: Channel type
+ * @gfp: allocation flags
+ */
+void cfg80211_remain_on_channel_expired(struct net_device *dev,
+					u64 cookie,
+					struct ieee80211_channel *chan,
+					enum nl80211_channel_type channel_type,
+					gfp_t gfp);
 
 #endif /* __NET_CFG80211_H */
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index a46ac6c9b365..bf1737fc9a7e 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -41,44 +41,57 @@ rdev_fixed_channel(struct cfg80211_registered_device *rdev,
 	return result;
 }
 
-int rdev_set_freq(struct cfg80211_registered_device *rdev,
-		  struct wireless_dev *for_wdev,
+struct ieee80211_channel *
+rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_channel *chan;
 	struct ieee80211_sta_ht_cap *ht_cap;
-	int result;
-
-	if (rdev_fixed_channel(rdev, for_wdev))
-		return -EBUSY;
-
-	if (!rdev->ops->set_channel)
-		return -EOPNOTSUPP;
 
 	chan = ieee80211_get_channel(&rdev->wiphy, freq);
 
 	/* Primary channel not allowed */
 	if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
-		return -EINVAL;
+		return NULL;
 
 	if (channel_type == NL80211_CHAN_HT40MINUS &&
 	    chan->flags & IEEE80211_CHAN_NO_HT40MINUS)
-		return -EINVAL;
+		return NULL;
 	else if (channel_type == NL80211_CHAN_HT40PLUS &&
 		 chan->flags & IEEE80211_CHAN_NO_HT40PLUS)
-		return -EINVAL;
+		return NULL;
 
 	ht_cap = &rdev->wiphy.bands[chan->band]->ht_cap;
 
 	if (channel_type != NL80211_CHAN_NO_HT) {
 		if (!ht_cap->ht_supported)
-			return -EINVAL;
+			return NULL;
 
 		if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) ||
 		    ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT)
-			return -EINVAL;
+			return NULL;
 	}
 
+	return chan;
+}
+
+int rdev_set_freq(struct cfg80211_registered_device *rdev,
+		  struct wireless_dev *for_wdev,
+		  int freq, enum nl80211_channel_type channel_type)
+{
+	struct ieee80211_channel *chan;
+	int result;
+
+	if (rdev_fixed_channel(rdev, for_wdev))
+		return -EBUSY;
+
+	if (!rdev->ops->set_channel)
+		return -EOPNOTSUPP;
+
+	chan = rdev_freq_to_chan(rdev, freq, channel_type);
+	if (!chan)
+		return -EINVAL;
+
 	result = rdev->ops->set_channel(&rdev->wiphy, chan, channel_type);
 	if (result)
 		return result;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 35b712127143..30ec95f05b52 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -374,6 +374,9 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
 struct ieee80211_channel *
 rdev_fixed_channel(struct cfg80211_registered_device *rdev,
 		   struct wireless_dev *for_wdev);
+struct ieee80211_channel *
+rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
+		  int freq, enum nl80211_channel_type channel_type);
 int rdev_set_freq(struct cfg80211_registered_device *rdev,
 		  struct wireless_dev *for_wdev,
 		  int freq, enum nl80211_channel_type channel_type);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index acaeaa784d68..11f6469b3f98 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -680,3 +680,30 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 		}
 	}
 }
+
+void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie,
+			       struct ieee80211_channel *chan,
+			       enum nl80211_channel_type channel_type,
+			       unsigned int duration, gfp_t gfp)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_remain_on_channel(rdev, dev, cookie, chan, channel_type,
+				       duration, gfp);
+}
+EXPORT_SYMBOL(cfg80211_ready_on_channel);
+
+void cfg80211_remain_on_channel_expired(struct net_device *dev,
+					u64 cookie,
+					struct ieee80211_channel *chan,
+					enum nl80211_channel_type channel_type,
+					gfp_t gfp)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_remain_on_channel_cancel(rdev, dev, cookie, chan,
+					      channel_type, gfp);
+}
+EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 60f854377f90..ff857f10cb85 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -141,6 +141,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
 	[NL80211_ATTR_PMKID] = { .type = NLA_BINARY,
 				 .len = WLAN_PMKID_LEN },
+	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
+	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
 };
 
 /* policy for the attributes */
@@ -569,6 +571,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(set_pmksa, SET_PMKSA);
 	CMD(del_pmksa, DEL_PMKSA);
 	CMD(flush_pmksa, FLUSH_PMKSA);
+	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -4283,6 +4286,143 @@ static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
 
 }
 
+static int nl80211_remain_on_channel(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
+	struct ieee80211_channel *chan;
+	struct sk_buff *msg;
+	void *hdr;
+	u64 cookie;
+	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	u32 freq, duration;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
+	    !info->attrs[NL80211_ATTR_DURATION])
+		return -EINVAL;
+
+	duration = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]);
+
+	/*
+	 * We should be on that channel for at least one jiffie,
+	 * and more than 5 seconds seems excessive.
+	 */
+	if (!duration || !msecs_to_jiffies(duration) || duration > 5000)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->remain_on_channel) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		channel_type = nla_get_u32(
+			info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40PLUS &&
+		    channel_type != NL80211_CHAN_HT40MINUS)
+			err = -EINVAL;
+			goto out;
+	}
+
+	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+	chan = rdev_freq_to_chan(rdev, freq, channel_type);
+	if (chan == NULL) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_REMAIN_ON_CHANNEL);
+
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto free_msg;
+	}
+
+	err = rdev->ops->remain_on_channel(&rdev->wiphy, dev, chan,
+					   channel_type, duration, &cookie);
+
+	if (err)
+		goto free_msg;
+
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+
+	genlmsg_end(msg, hdr);
+	err = genlmsg_reply(msg, info);
+	goto out;
+
+ nla_put_failure:
+	err = -ENOBUFS;
+ free_msg:
+	nlmsg_free(msg);
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_cancel_remain_on_channel(struct sk_buff *skb,
+					    struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
+	u64 cookie;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_COOKIE])
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->cancel_remain_on_channel) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
+
+	err = rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie);
+
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4545,8 +4685,20 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
-
+	{
+		.cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
+		.doit = nl80211_remain_on_channel,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
+		.doit = nl80211_cancel_remain_on_channel,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
+
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
 };
@@ -5134,6 +5286,70 @@ nla_put_failure:
 	nlmsg_free(msg);
 }
 
+static void nl80211_send_remain_on_chan_event(
+	int cmd, struct cfg80211_registered_device *rdev,
+	struct net_device *netdev, u64 cookie,
+	struct ieee80211_channel *chan,
+	enum nl80211_channel_type channel_type,
+	unsigned int duration, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, channel_type);
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+
+	if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL)
+		NLA_PUT_U32(msg, NL80211_ATTR_DURATION, duration);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
+void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
+				    struct net_device *netdev, u64 cookie,
+				    struct ieee80211_channel *chan,
+				    enum nl80211_channel_type channel_type,
+				    unsigned int duration, gfp_t gfp)
+{
+	nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,
+					  rdev, netdev, cookie, chan,
+					  channel_type, duration, gfp);
+}
+
+void nl80211_send_remain_on_channel_cancel(
+	struct cfg80211_registered_device *rdev, struct net_device *netdev,
+	u64 cookie, struct ieee80211_channel *chan,
+	enum nl80211_channel_type channel_type, gfp_t gfp)
+{
+	nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
+					  rdev, netdev, cookie, chan,
+					  channel_type, 0, gfp);
+}
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 44cc2a76a1b0..a5e2de419b7a 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -59,4 +59,15 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, const u8 *bssid,
 			     gfp_t gfp);
 
+void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
+				    struct net_device *netdev,
+				    u64 cookie,
+				    struct ieee80211_channel *chan,
+				    enum nl80211_channel_type channel_type,
+				    unsigned int duration, gfp_t gfp);
+void nl80211_send_remain_on_channel_cancel(
+	struct cfg80211_registered_device *rdev, struct net_device *netdev,
+	u64 cookie, struct ieee80211_channel *chan,
+	enum nl80211_channel_type channel_type, gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From 81744ee44ab2845c16ffd7d6f762f7b4a49a4750 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 29 Dec 2009 08:35:35 +0100
Subject: block: Fix incorrect alignment offset reporting and update
 documentation

queue_sector_alignment_offset returned the wrong value which caused
partitions to report an incorrect alignment_offset.  Since offset
alignment calculation is needed several places it has been split into a
separate helper function.  The topology stacking function has been
updated accordingly.

Furthermore, comments have been added to clarify how the stacking
function works.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 44 +++++++++++++++++++++++++++++++++-----------
 include/linux/blkdev.h | 11 +++++++++--
 2 files changed, 42 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index e14fcbcedbfa..d52d4adc440b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -505,20 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b)
 
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
- * @t:	the stacking driver limits (top)
- * @b:  the underlying queue limits (bottom)
+ * @t:	the stacking driver limits (top device)
+ * @b:  the underlying queue limits (bottom, component device)
  * @offset:  offset to beginning of data within component device
  *
  * Description:
- *    Merges two queue_limit structs.  Returns 0 if alignment didn't
- *    change.  Returns -1 if adding the bottom device caused
- *    misalignment.
+ *    This function is used by stacking drivers like MD and DM to ensure
+ *    that all component devices have compatible block sizes and
+ *    alignments.  The stacking driver must provide a queue_limits
+ *    struct (top) and then iteratively call the stacking function for
+ *    all component (bottom) devices.  The stacking function will
+ *    attempt to combine the values and ensure proper alignment.
+ *
+ *    Returns 0 if the top and bottom queue_limits are compatible.  The
+ *    top device's block sizes and alignment offsets may be adjusted to
+ *    ensure alignment with the bottom device. If no compatible sizes
+ *    and alignments exist, -1 is returned and the resulting top
+ *    queue_limits will have the misaligned flag set to indicate that
+ *    the alignment_offset is undefined.
  */
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		     sector_t offset)
 {
 	sector_t alignment;
-	unsigned int top, bottom, granularity;
+	unsigned int top, bottom;
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -536,15 +546,18 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
-	granularity = max(b->physical_block_size, b->io_min);
-	alignment = b->alignment_offset - (offset & (granularity - 1));
+	alignment = queue_limit_alignment_offset(b, offset);
 
+	/* Bottom device has different alignment.  Check that it is
+	 * compatible with the current top alignment.
+	 */
 	if (t->alignment_offset != alignment) {
 
 		top = max(t->physical_block_size, t->io_min)
 			+ t->alignment_offset;
-		bottom = granularity + alignment;
+		bottom = max(b->physical_block_size, b->io_min) + alignment;
 
+		/* Verify that top and bottom intervals line up */
 		if (max(top, bottom) & (min(top, bottom) - 1))
 			t->misaligned = 1;
 	}
@@ -561,32 +574,39 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->no_cluster |= b->no_cluster;
 	t->discard_zeroes_data &= b->discard_zeroes_data;
 
+	/* Physical block size a multiple of the logical block size? */
 	if (t->physical_block_size & (t->logical_block_size - 1)) {
 		t->physical_block_size = t->logical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Minimum I/O a multiple of the physical block size? */
 	if (t->io_min & (t->physical_block_size - 1)) {
 		t->io_min = t->physical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Optimal I/O a multiple of the physical block size? */
 	if (t->io_opt & (t->physical_block_size - 1)) {
 		t->io_opt = 0;
 		t->misaligned = 1;
 	}
 
+	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
 		& (max(t->physical_block_size, t->io_min) - 1);
 
+	/* Verify that new alignment_offset is on a logical block boundary */
 	if (t->alignment_offset & (t->logical_block_size - 1))
 		t->misaligned = 1;
 
 	/* Discard alignment and granularity */
 	if (b->discard_granularity) {
+		unsigned int granularity = b->discard_granularity;
+		offset &= granularity - 1;
 
-		alignment = b->discard_alignment -
-			(offset & (b->discard_granularity - 1));
+		alignment = (granularity + b->discard_alignment - offset)
+			& (granularity - 1);
 
 		if (t->discard_granularity != 0 &&
 		    t->discard_alignment != alignment) {
@@ -598,6 +618,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 				t->discard_misaligned = 1;
 		}
 
+		t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
 		t->discard_alignment = lcm(t->discard_alignment, alignment) &
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 784a919aa0d0..59b832be3044 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1116,11 +1116,18 @@ static inline int queue_alignment_offset(struct request_queue *q)
 	return q->limits.alignment_offset;
 }
 
+static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset)
+{
+	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+
+	offset &= granularity - 1;
+	return (granularity + lim->alignment_offset - offset) & (granularity - 1);
+}
+
 static inline int queue_sector_alignment_offset(struct request_queue *q,
 						sector_t sector)
 {
-	return ((sector << 9) - q->limits.alignment_offset)
-		& (q->limits.io_min - 1);
+	return queue_limit_alignment_offset(&q->limits, sector << 9);
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)
-- 
cgit v1.2.3


From db5d247ae811f49185a71e703b65acad845e4b18 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <cladisch@fastmail.net>
Date: Thu, 24 Dec 2009 12:05:58 +0100
Subject: firewire: fix use of multiple AV/C devices, allow multiple FCP
 listeners

Control of more than one AV/C device at once --- e.g. camcorders, tape
decks, audio devices, TV tuners --- failed or worked only unreliably,
depending on driver implementation.  This affected kernelspace and
userspace drivers alike and was caused by firewire-core's inability to
accept multiple registrations of FCP listeners.

The fix allows multiple address handlers to be registered for the FCP
command and response registers.  When a request for these registers is
received, all handlers are invoked, and the Firewire response is
generated by the core and not by any handler.

The cdev API does not change, i.e., userspace is still expected to send
a response for FCP requests; this response is silently ignored.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> (changelog, rebased, whitespace)
---
 drivers/firewire/core-cdev.c            |  26 ++++---
 drivers/firewire/core-transaction.c     | 118 ++++++++++++++++++++++++++------
 drivers/media/dvb/firewire/firedtv-fw.c |  12 +---
 include/linux/firewire-cdev.h           |   3 +
 include/linux/firewire.h                |   4 +-
 5 files changed, 119 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 231e6ee5ba43..2cb22d160f6e 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -601,8 +601,9 @@ static void release_request(struct client *client,
 	struct inbound_transaction_resource *r = container_of(resource,
 			struct inbound_transaction_resource, resource);
 
-	fw_send_response(client->device->card, r->request,
-			 RCODE_CONFLICT_ERROR);
+	if (r->request)
+		fw_send_response(client->device->card, r->request,
+				 RCODE_CONFLICT_ERROR);
 	kfree(r);
 }
 
@@ -645,7 +646,8 @@ static void handle_request(struct fw_card *card, struct fw_request *request,
  failed:
 	kfree(r);
 	kfree(e);
-	fw_send_response(card, request, RCODE_CONFLICT_ERROR);
+	if (request)
+		fw_send_response(card, request, RCODE_CONFLICT_ERROR);
 }
 
 static void release_address_handler(struct client *client,
@@ -715,15 +717,17 @@ static int ioctl_send_response(struct client *client, void *buffer)
 
 	r = container_of(resource, struct inbound_transaction_resource,
 			 resource);
-	if (request->length < r->length)
-		r->length = request->length;
-
-	if (copy_from_user(r->data, u64_to_uptr(request->data), r->length)) {
-		ret = -EFAULT;
-		goto out;
+	if (r->request) {
+		if (request->length < r->length)
+			r->length = request->length;
+		if (copy_from_user(r->data, u64_to_uptr(request->data),
+				   r->length)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		fw_send_response(client->device->card, r->request,
+				 request->rcode);
 	}
-
-	fw_send_response(client->device->card, r->request, request->rcode);
  out:
 	kfree(r);
 
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 842739df23e2..495849eb13cc 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -432,14 +432,20 @@ static struct fw_address_handler *lookup_overlapping_address_handler(
 	return NULL;
 }
 
+static bool is_enclosing_handler(struct fw_address_handler *handler,
+				 unsigned long long offset, size_t length)
+{
+	return handler->offset <= offset &&
+		offset + length <= handler->offset + handler->length;
+}
+
 static struct fw_address_handler *lookup_enclosing_address_handler(
 	struct list_head *list, unsigned long long offset, size_t length)
 {
 	struct fw_address_handler *handler;
 
 	list_for_each_entry(handler, list, link) {
-		if (handler->offset <= offset &&
-		    offset + length <= handler->offset + handler->length)
+		if (is_enclosing_handler(handler, offset, length))
 			return handler;
 	}
 
@@ -465,6 +471,12 @@ const struct fw_address_region fw_unit_space_region =
 	{ .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, };
 #endif  /*  0  */
 
+static bool is_in_fcp_region(u64 offset, size_t length)
+{
+	return offset >= (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+		offset + length <= (CSR_REGISTER_BASE | CSR_FCP_END);
+}
+
 /**
  * fw_core_add_address_handler - register for incoming requests
  * @handler: callback
@@ -477,8 +489,11 @@ const struct fw_address_region fw_unit_space_region =
  * give the details of the particular request.
  *
  * Return value:  0 on success, non-zero otherwise.
+ *
  * The start offset of the handler's address region is determined by
  * fw_core_add_address_handler() and is returned in handler->offset.
+ *
+ * Address allocations are exclusive, except for the FCP registers.
  */
 int fw_core_add_address_handler(struct fw_address_handler *handler,
 				const struct fw_address_region *region)
@@ -498,10 +513,12 @@ int fw_core_add_address_handler(struct fw_address_handler *handler,
 
 	handler->offset = region->start;
 	while (handler->offset + handler->length <= region->end) {
-		other =
-		    lookup_overlapping_address_handler(&address_handler_list,
-						       handler->offset,
-						       handler->length);
+		if (is_in_fcp_region(handler->offset, handler->length))
+			other = NULL;
+		else
+			other = lookup_overlapping_address_handler
+					(&address_handler_list,
+					 handler->offset, handler->length);
 		if (other != NULL) {
 			handler->offset += other->length;
 		} else {
@@ -668,6 +685,9 @@ static struct fw_request *allocate_request(struct fw_packet *p)
 void fw_send_response(struct fw_card *card,
 		      struct fw_request *request, int rcode)
 {
+	if (WARN_ONCE(!request, "invalid for FCP address handlers"))
+		return;
+
 	/* unified transaction or broadcast transaction: don't respond */
 	if (request->ack != ACK_PENDING ||
 	    HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) {
@@ -686,26 +706,15 @@ void fw_send_response(struct fw_card *card,
 }
 EXPORT_SYMBOL(fw_send_response);
 
-void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+static void handle_exclusive_region_request(struct fw_card *card,
+					    struct fw_packet *p,
+					    struct fw_request *request,
+					    unsigned long long offset)
 {
 	struct fw_address_handler *handler;
-	struct fw_request *request;
-	unsigned long long offset;
 	unsigned long flags;
 	int tcode, destination, source;
 
-	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
-		return;
-
-	request = allocate_request(p);
-	if (request == NULL) {
-		/* FIXME: send statically allocated busy packet. */
-		return;
-	}
-
-	offset      =
-		((unsigned long long)
-		 HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | p->header[2];
 	tcode       = HEADER_GET_TCODE(p->header[0]);
 	destination = HEADER_GET_DESTINATION(p->header[0]);
 	source      = HEADER_GET_SOURCE(p->header[1]);
@@ -732,6 +741,73 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
 					  request->data, request->length,
 					  handler->callback_data);
 }
+
+static void handle_fcp_region_request(struct fw_card *card,
+				      struct fw_packet *p,
+				      struct fw_request *request,
+				      unsigned long long offset)
+{
+	struct fw_address_handler *handler;
+	unsigned long flags;
+	int tcode, destination, source;
+
+	if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+	     offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) ||
+	    request->length > 0x200) {
+		fw_send_response(card, request, RCODE_ADDRESS_ERROR);
+
+		return;
+	}
+
+	tcode       = HEADER_GET_TCODE(p->header[0]);
+	destination = HEADER_GET_DESTINATION(p->header[0]);
+	source      = HEADER_GET_SOURCE(p->header[1]);
+
+	if (tcode != TCODE_WRITE_QUADLET_REQUEST &&
+	    tcode != TCODE_WRITE_BLOCK_REQUEST) {
+		fw_send_response(card, request, RCODE_TYPE_ERROR);
+
+		return;
+	}
+
+	spin_lock_irqsave(&address_handler_lock, flags);
+	list_for_each_entry(handler, &address_handler_list, link) {
+		if (is_enclosing_handler(handler, offset, request->length))
+			handler->address_callback(card, NULL, tcode,
+						  destination, source,
+						  p->generation, p->speed,
+						  offset, request->data,
+						  request->length,
+						  handler->callback_data);
+	}
+	spin_unlock_irqrestore(&address_handler_lock, flags);
+
+	fw_send_response(card, request, RCODE_COMPLETE);
+}
+
+void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+{
+	struct fw_request *request;
+	unsigned long long offset;
+
+	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
+		return;
+
+	request = allocate_request(p);
+	if (request == NULL) {
+		/* FIXME: send statically allocated busy packet. */
+		return;
+	}
+
+	offset = ((u64)HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) |
+		p->header[2];
+
+	if (!is_in_fcp_region(offset, request->length))
+		handle_exclusive_region_request(card, p, request, offset);
+	else
+		handle_fcp_region_request(card, p, request, offset);
+
+}
 EXPORT_SYMBOL(fw_core_handle_request);
 
 void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index fe44789ab037..6223bf01efe9 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -202,14 +202,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	unsigned long flags;
 	int su;
 
-	if ((tcode != TCODE_WRITE_QUADLET_REQUEST &&
-	     tcode != TCODE_WRITE_BLOCK_REQUEST) ||
-	    offset != CSR_REGISTER_BASE + CSR_FCP_RESPONSE ||
-	    length == 0 ||
-	    (((u8 *)payload)[0] & 0xf0) != 0) {
-		fw_send_response(card, request, RCODE_TYPE_ERROR);
+	if (length < 2 || (((u8 *)payload)[0] & 0xf0) != 0)
 		return;
-	}
 
 	su = ((u8 *)payload)[1] & 0x7;
 
@@ -230,10 +224,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	}
 	spin_unlock_irqrestore(&node_list_lock, flags);
 
-	if (fdtv) {
+	if (fdtv)
 		avc_recv(fdtv, payload, length);
-		fw_send_response(card, request, RCODE_COMPLETE);
-	}
 }
 
 static struct fw_address_handler fcp_handler = {
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index c6b3ca3af6df..1f716d9f714b 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -340,6 +340,9 @@ struct fw_cdev_send_response {
  * The @closure field is passed back to userspace in the response event.
  * The @handle field is an out parameter, returning a handle to the allocated
  * range to be used for later deallocation of the range.
+ *
+ * The address range is allocated on all local nodes.  The address allocation
+ * is exclusive except for the FCP command and response registers.
  */
 struct fw_cdev_allocate {
 	__u64 offset;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 9416a461b696..a0e67150a729 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -248,8 +248,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
 					  void *data, size_t length,
 					  void *callback_data);
 /*
- * Important note:  The callback must guarantee that either fw_send_response()
- * or kfree() is called on the @request.
+ * Important note:  Except for the FCP registers, the callback must guarantee
+ * that either fw_send_response() or kfree() is called on the @request.
  */
 typedef void (*fw_address_callback_t)(struct fw_card *card,
 				      struct fw_request *request,
-- 
cgit v1.2.3


From 1f8fef7b3388b5a976e80839679b5bae581a1091 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Thu, 24 Dec 2009 11:59:57 +0100
Subject: firewire: add fw_csr_string() helper function

The core (sysfs attributes), the firedtv driver, and possible future
drivers all read strings from some configuration ROM directory.  Factor
out the generic code from show_text_leaf() into a new helper function,
modified slightly to handle arbitrary buffer sizes.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c          | 110 ++++++++++++++++++++++----------
 drivers/media/dvb/firewire/firedtv-fw.c |  39 ++---------
 include/linux/firewire.h                |   2 +
 3 files changed, 84 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 9d0dfcbe2c1c..a39e4344cd58 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -59,6 +59,67 @@ int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value)
 }
 EXPORT_SYMBOL(fw_csr_iterator_next);
 
+static u32 *search_leaf(u32 *directory, int search_key)
+{
+	struct fw_csr_iterator ci;
+	int last_key = 0, key, value;
+
+	fw_csr_iterator_init(&ci, directory);
+	while (fw_csr_iterator_next(&ci, &key, &value)) {
+		if (last_key == search_key &&
+		    key == (CSR_DESCRIPTOR | CSR_LEAF))
+			return ci.p - 1 + value;
+		last_key = key;
+	}
+	return NULL;
+}
+
+static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
+{
+	unsigned int quadlets, length;
+
+	if (!size || !buf)
+		return -EINVAL;
+
+	quadlets = min(block[0] >> 16, 256u);
+	if (quadlets < 2)
+		return -ENODATA;
+
+	if (block[1] != 0 || block[2] != 0)
+		/* unknown language/character set */
+		return -ENODATA;
+
+	block += 3;
+	quadlets -= 2;
+	for (length = 0; length < quadlets * 4 && length + 1 < size; length++) {
+		char c = block[length / 4] >> (24 - 8 * (length % 4));
+		if (c == '\0')
+			break;
+		buf[length] = c;
+	}
+	buf[length] = '\0';
+	return length;
+}
+
+/**
+ * fw_csr_string - reads a string from the configuration ROM
+ * @directory: device or unit directory;
+ *             fw_device->config_rom+5 or fw_unit->directory
+ * @key: the key of the preceding directory entry
+ * @buf: where to put the string
+ * @size: size of @buf, in bytes
+ *
+ * Returns string length (>= 0) or error code (< 0).
+ */
+int fw_csr_string(u32 *directory, int key, char *buf, size_t size)
+{
+	u32 *leaf = search_leaf(directory, key);
+	if (!leaf)
+		return -ENOENT;
+	return textual_leaf_to_string(leaf, buf, size);
+}
+EXPORT_SYMBOL(fw_csr_string);
+
 static bool is_fw_unit(struct device *dev);
 
 static int match_unit_directory(u32 *directory, u32 match_flags,
@@ -226,10 +287,10 @@ static ssize_t show_text_leaf(struct device *dev,
 {
 	struct config_rom_attribute *attr =
 		container_of(dattr, struct config_rom_attribute, attr);
-	struct fw_csr_iterator ci;
-	u32 *dir, *block = NULL, *p, *end;
-	int length, key, value, last_key = 0, ret = -ENOENT;
-	char *b;
+	u32 *dir;
+	size_t bufsize;
+	char dummy_buf[2];
+	int ret;
 
 	down_read(&fw_device_rwsem);
 
@@ -238,40 +299,23 @@ static ssize_t show_text_leaf(struct device *dev,
 	else
 		dir = fw_device(dev)->config_rom + 5;
 
-	fw_csr_iterator_init(&ci, dir);
-	while (fw_csr_iterator_next(&ci, &key, &value)) {
-		if (attr->key == last_key &&
-		    key == (CSR_DESCRIPTOR | CSR_LEAF))
-			block = ci.p - 1 + value;
-		last_key = key;
+	if (buf) {
+		bufsize = PAGE_SIZE - 1;
+	} else {
+		buf = dummy_buf;
+		bufsize = 1;
 	}
 
-	if (block == NULL)
-		goto out;
-
-	length = min(block[0] >> 16, 256U);
-	if (length < 3)
-		goto out;
-
-	if (block[1] != 0 || block[2] != 0)
-		/* Unknown encoding. */
-		goto out;
+	ret = fw_csr_string(dir, attr->key, buf, bufsize);
 
-	if (buf == NULL) {
-		ret = length * 4;
-		goto out;
+	if (ret >= 0) {
+		/* Strip trailing whitespace and add newline. */
+		while (ret > 0 && isspace(buf[ret - 1]))
+			ret--;
+		strcpy(buf + ret, "\n");
+		ret++;
 	}
 
-	b = buf;
-	end = &block[length + 1];
-	for (p = &block[3]; p < end; p++, b += 4)
-		* (u32 *) b = (__force u32) __cpu_to_be32(*p);
-
-	/* Strip trailing whitespace and add newline. */
-	while (b--, (isspace(*b) || *b == '\0') && b > buf);
-	strcpy(b + 1, "\n");
-	ret = b + 2 - buf;
- out:
 	up_read(&fw_device_rwsem);
 
 	return ret;
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index 6223bf01efe9..4253b7ab0097 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -239,47 +239,18 @@ static const struct fw_address_region fcp_region = {
 };
 
 /* Adjust the template string if models with longer names appear. */
-#define MAX_MODEL_NAME_LEN ((int)DIV_ROUND_UP(sizeof("FireDTV ????"), 4))
-
-static size_t model_name(u32 *directory, __be32 *buffer)
-{
-	struct fw_csr_iterator ci;
-	int i, length, key, value, last_key = 0;
-	u32 *block = NULL;
-
-	fw_csr_iterator_init(&ci, directory);
-	while (fw_csr_iterator_next(&ci, &key, &value)) {
-		if (last_key == CSR_MODEL &&
-		    key == (CSR_DESCRIPTOR | CSR_LEAF))
-			block = ci.p - 1 + value;
-		last_key = key;
-	}
-
-	if (block == NULL)
-		return 0;
-
-	length = min((int)(block[0] >> 16) - 2, MAX_MODEL_NAME_LEN);
-	if (length <= 0)
-		return 0;
-
-	/* fast-forward to text string */
-	block += 3;
-
-	for (i = 0; i < length; i++)
-		buffer[i] = cpu_to_be32(block[i]);
-
-	return length * 4;
-}
+#define MAX_MODEL_NAME_LEN sizeof("FireDTV ????")
 
 static int node_probe(struct device *dev)
 {
 	struct firedtv *fdtv;
-	__be32 name[MAX_MODEL_NAME_LEN];
+	char name[MAX_MODEL_NAME_LEN];
 	int name_len, err;
 
-	name_len = model_name(fw_unit(dev)->directory, name);
+	name_len = fw_csr_string(fw_unit(dev)->directory, CSR_MODEL,
+				 name, sizeof(name));
 
-	fdtv = fdtv_alloc(dev, &backend, (char *)name, name_len);
+	fdtv = fdtv_alloc(dev, &backend, name, name_len >= 0 ? name_len : 0);
 	if (!fdtv)
 		return -ENOMEM;
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index a0e67150a729..5246869d8083 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -72,6 +72,8 @@ struct fw_csr_iterator {
 void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p);
 int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value);
 
+int fw_csr_string(u32 *directory, int key, char *buf, size_t size);
+
 extern struct bus_type fw_bus_type;
 
 struct fw_card_driver;
-- 
cgit v1.2.3


From 3c2c58cb33b3b15a2c4871babeec8fe1456e1db6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 26 Dec 2009 01:43:21 +0100
Subject: firewire: core: fw_csr_string addendum

Witespace and comment changes, and a different way to say i + 1 < end.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c | 26 ++++++++++++++++----------
 include/linux/firewire.h       |  1 -
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index a39e4344cd58..5d5c6a689837 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -69,19 +69,22 @@ static u32 *search_leaf(u32 *directory, int search_key)
 		if (last_key == search_key &&
 		    key == (CSR_DESCRIPTOR | CSR_LEAF))
 			return ci.p - 1 + value;
+
 		last_key = key;
 	}
+
 	return NULL;
 }
 
 static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
 {
-	unsigned int quadlets, length;
+	unsigned int quadlets, i;
+	char c;
 
 	if (!size || !buf)
 		return -EINVAL;
 
-	quadlets = min(block[0] >> 16, 256u);
+	quadlets = min(block[0] >> 16, 256U);
 	if (quadlets < 2)
 		return -ENODATA;
 
@@ -91,31 +94,34 @@ static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
 
 	block += 3;
 	quadlets -= 2;
-	for (length = 0; length < quadlets * 4 && length + 1 < size; length++) {
-		char c = block[length / 4] >> (24 - 8 * (length % 4));
+	for (i = 0; i < quadlets * 4 && i < size - 1; i++) {
+		c = block[i / 4] >> (24 - 8 * (i % 4));
 		if (c == '\0')
 			break;
-		buf[length] = c;
+		buf[i] = c;
 	}
-	buf[length] = '\0';
-	return length;
+	buf[i] = '\0';
+
+	return i;
 }
 
 /**
  * fw_csr_string - reads a string from the configuration ROM
- * @directory: device or unit directory;
- *             fw_device->config_rom+5 or fw_unit->directory
+ * @directory: e.g. root directory or unit directory
  * @key: the key of the preceding directory entry
  * @buf: where to put the string
  * @size: size of @buf, in bytes
  *
- * Returns string length (>= 0) or error code (< 0).
+ * The string is taken from a minimal ASCII text descriptor leaf after
+ * the immediate entry with @key.  The string is zero-terminated.
+ * Returns strlen(buf) or a negative error code.
  */
 int fw_csr_string(u32 *directory, int key, char *buf, size_t size)
 {
 	u32 *leaf = search_leaf(directory, key);
 	if (!leaf)
 		return -ENOENT;
+
 	return textual_leaf_to_string(leaf, buf, size);
 }
 EXPORT_SYMBOL(fw_csr_string);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 5246869d8083..df680216e7b6 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -71,7 +71,6 @@ struct fw_csr_iterator {
 
 void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p);
 int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value);
-
 int fw_csr_string(u32 *directory, int key, char *buf, size_t size);
 
 extern struct bus_type fw_bus_type;
-- 
cgit v1.2.3


From 13b302d0a217580c0129b0641b0ca8b592e437b0 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 26 Dec 2009 01:44:10 +0100
Subject: firewire: qualify config ROM cache pointers as const pointers

Several config ROM related functions only peek at the ROM cache; mark
their arguments as const pointers.  Ditto fw_device.config_rom and
fw_unit.directory, as the memory behind them is meant to be write-once.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c | 21 +++++++++++----------
 drivers/firewire/sbp2.c        |  5 +++--
 include/linux/firewire.h       | 12 ++++++------
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 5d5c6a689837..eecd52dc8e98 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -43,7 +43,7 @@
 
 #include "core.h"
 
-void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 * p)
+void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p)
 {
 	ci->p = p + 1;
 	ci->end = ci->p + (p[0] >> 16);
@@ -59,7 +59,7 @@ int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value)
 }
 EXPORT_SYMBOL(fw_csr_iterator_next);
 
-static u32 *search_leaf(u32 *directory, int search_key)
+static const u32 *search_leaf(const u32 *directory, int search_key)
 {
 	struct fw_csr_iterator ci;
 	int last_key = 0, key, value;
@@ -76,7 +76,7 @@ static u32 *search_leaf(u32 *directory, int search_key)
 	return NULL;
 }
 
-static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
+static int textual_leaf_to_string(const u32 *block, char *buf, size_t size)
 {
 	unsigned int quadlets, i;
 	char c;
@@ -116,9 +116,9 @@ static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
  * the immediate entry with @key.  The string is zero-terminated.
  * Returns strlen(buf) or a negative error code.
  */
-int fw_csr_string(u32 *directory, int key, char *buf, size_t size)
+int fw_csr_string(const u32 *directory, int key, char *buf, size_t size)
 {
-	u32 *leaf = search_leaf(directory, key);
+	const u32 *leaf = search_leaf(directory, key);
 	if (!leaf)
 		return -ENOENT;
 
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(fw_csr_string);
 
 static bool is_fw_unit(struct device *dev);
 
-static int match_unit_directory(u32 *directory, u32 match_flags,
+static int match_unit_directory(const u32 *directory, u32 match_flags,
 				const struct ieee1394_device_id *id)
 {
 	struct fw_csr_iterator ci;
@@ -262,7 +262,7 @@ static ssize_t show_immediate(struct device *dev,
 	struct config_rom_attribute *attr =
 		container_of(dattr, struct config_rom_attribute, attr);
 	struct fw_csr_iterator ci;
-	u32 *dir;
+	const u32 *dir;
 	int key, value, ret = -ENOENT;
 
 	down_read(&fw_device_rwsem);
@@ -293,7 +293,7 @@ static ssize_t show_text_leaf(struct device *dev,
 {
 	struct config_rom_attribute *attr =
 		container_of(dattr, struct config_rom_attribute, attr);
-	u32 *dir;
+	const u32 *dir;
 	size_t bufsize;
 	char dummy_buf[2];
 	int ret;
@@ -421,7 +421,7 @@ static ssize_t guid_show(struct device *dev,
 	return ret;
 }
 
-static int units_sprintf(char *buf, u32 *directory)
+static int units_sprintf(char *buf, const u32 *directory)
 {
 	struct fw_csr_iterator ci;
 	int key, value;
@@ -503,7 +503,8 @@ static int read_rom(struct fw_device *device,
  */
 static int read_bus_info_block(struct fw_device *device, int generation)
 {
-	u32 *rom, *stack, *old_rom, *new_rom;
+	const u32 *old_rom, *new_rom;
+	u32 *rom, *stack;
 	u32 sp, key;
 	int i, end, length, ret = -1;
 
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index d485cdd8cbac..7e33b0b1704c 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1014,7 +1014,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
 	return 0;
 }
 
-static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt, u32 *directory)
+static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt,
+				      const u32 *directory)
 {
 	struct fw_csr_iterator ci;
 	int key, value;
@@ -1027,7 +1028,7 @@ static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt, u32 *directory)
 	return 0;
 }
 
-static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory,
+static int sbp2_scan_unit_dir(struct sbp2_target *tgt, const u32 *directory,
 			      u32 *model, u32 *firmware_revision)
 {
 	struct fw_csr_iterator ci;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index df680216e7b6..4bd94bf5e739 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -65,13 +65,13 @@
 #define CSR_DIRECTORY_ID	0x20
 
 struct fw_csr_iterator {
-	u32 *p;
-	u32 *end;
+	const u32 *p;
+	const u32 *end;
 };
 
-void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p);
+void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p);
 int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value);
-int fw_csr_string(u32 *directory, int key, char *buf, size_t size);
+int fw_csr_string(const u32 *directory, int key, char *buf, size_t size);
 
 extern struct bus_type fw_bus_type;
 
@@ -163,7 +163,7 @@ struct fw_device {
 	struct mutex client_list_mutex;
 	struct list_head client_list;
 
-	u32 *config_rom;
+	const u32 *config_rom;
 	size_t config_rom_length;
 	int config_rom_retries;
 	unsigned is_local:1;
@@ -205,7 +205,7 @@ int fw_device_enable_phys_dma(struct fw_device *device);
  */
 struct fw_unit {
 	struct device device;
-	u32 *directory;
+	const u32 *directory;
 	struct fw_attribute_group attribute_group;
 };
 
-- 
cgit v1.2.3


From 9bd3f98821a83041e77ee25158b80b535d02d7b4 Mon Sep 17 00:00:00 2001
From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 08:41:07 +0100
Subject: block: blk_rq_err_sectors cleanup

blk_rq_err_sectors() seems useless, get rid of it.

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 59b832be3044..9b98173a8184 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
- * blk_rq_err_sectors()		: sectors left till the next error boundary
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
-static inline unsigned int blk_rq_err_sectors(const struct request *rq)
-{
-	return blk_rq_err_bytes(rq) >> 9;
-}
-
 /*
  * Request issue related functions.
  */
-- 
cgit v1.2.3


From e96dc9674cb597de4fee757ed005c8465072d13f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:26 +0800
Subject: tracing/syscalls: Fix typo in SYSCALL_DEFINE0

The struct syscall_metadata variable name in SYSCALL_DEFINE0
should be __syscall_meta__##sname instead of __syscall_meta_##sname
to match the name that is in SYSCALL_DEFINE1/2/3/4/5/6.

This error causes event_enter_##sname->data to point to the wrong
location, which causes syscalls which are defined by SYSCALL_DEFINE0()
not to be traced.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D2E.1010807@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 65793e90d6f6..207466a49f3d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -195,7 +195,7 @@ struct perf_event_attr;
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
 	  __attribute__((section("__syscalls_metadata")))	\
-	  __syscall_meta_##sname = {				\
+	  __syscall_meta__##sname = {				\
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
 		.enter_event	= &event_enter__##sname,	\
-- 
cgit v1.2.3


From ed656d8deccc5669afa33387568e7ec6f14e3e94 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Sat, 26 Dec 2009 17:58:11 +0100
Subject: kfifo: Fix typo in comment

It's DECLARE_KFIFO, not DECLARED_KFIFO.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 3d44e9c65a8e..7c6b32a1421c 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -81,7 +81,7 @@ union { \
 }
 
 /**
- * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
+ * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO
  * @name: name of the declared kfifo datatype
  */
 #define INIT_KFIFO(name) \
-- 
cgit v1.2.3


From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 30 Dec 2009 15:36:42 +0800
Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable

Introduce kernel parameter acpi_sleep=sci_force_enable

some laptop requires SCI_EN being set directly on resume,
or else they hung somewhere in the resume code path.

We already have a blacklist for these laptops but we still need
this option, especially when debugging some suspend/resume problems,
in case there are systems that need this workaround and are not yet
in the blacklist.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/kernel-parameters.txt |  5 ++++-
 arch/x86/kernel/acpi/sleep.c        |  2 ++
 drivers/acpi/sleep.c                | 29 +++++++++++++++++------------
 include/linux/acpi.h                |  1 +
 4 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ba4d9dff113..736d45602886 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
-				  old_ordering, s4_nonvs }
+				  old_ordering, s4_nonvs, sci_force_enable }
 			See Documentation/power/video.txt for information on
 			s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
@@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			of _PTS is used by default).
 			s4_nonvs prevents the kernel from saving/restoring the
 			ACPI NVS memory during hibernation.
+			sci_force_enable causes the kernel to set SCI_EN directly
+			on resume from S1/S3 (which is against the ACPI spec,
+			but some broken systems don't work without it).
 
 	acpi_use_timer_override [HW,ACPI]
 			Use timer override. For some broken Nvidia NF5 boards
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 82e508677b91..f9961034e557 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str)
 #endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
+		if (strncmp(str, "sci_force_enable", 16) == 0)
+			acpi_set_sci_en_on_resume();
 		str = strchr(str, ',');
 		if (str != NULL)
 			str += strspn(str, ", \t");
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5f2c379ab7bf..79d33d908b5a 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -80,6 +80,23 @@ static int acpi_sleep_prepare(u32 acpi_state)
 
 #ifdef CONFIG_ACPI_SLEEP
 static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+/*
+ * According to the ACPI specification the BIOS should make sure that ACPI is
+ * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
+ * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
+ * on such systems during resume.  Unfortunately that doesn't help in
+ * particularly pathological cases in which SCI_EN has to be set directly on
+ * resume, although the specification states very clearly that this flag is
+ * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
+ * cases.
+ */
+static bool set_sci_en_on_resume;
+
+void __init acpi_set_sci_en_on_resume(void)
+{
+	set_sci_en_on_resume = true;
+}
+
 /*
  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
  * user to request that behavior by using the 'acpi_old_suspend_ordering'
@@ -170,18 +187,6 @@ static void acpi_pm_end(void)
 #endif /* CONFIG_ACPI_SLEEP */
 
 #ifdef CONFIG_SUSPEND
-/*
- * According to the ACPI specification the BIOS should make sure that ACPI is
- * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
- * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
- * on such systems during resume.  Unfortunately that doesn't help in
- * particularly pathological cases in which SCI_EN has to be set directly on
- * resume, although the specification states very clearly that this flag is
- * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
- * cases.
- */
-static bool set_sci_en_on_resume;
-
 extern void do_suspend_lowlevel(void);
 
 static u32 acpi_suspend_states[] = {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ce945d4845fc..36924255c0d5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
+void __init acpi_set_sci_en_on_resume(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-- 
cgit v1.2.3


From 2f5cb43406d0b29b96248f5328a14a6f6abf8ae6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 30 Dec 2009 08:23:30 +0000
Subject: phylib: Properly reinitialize PHYs after hibernation

Since hibernation assumes power loss, we should fully reinitialize
PHYs (including platform fixups), as if PHYs were just attached.

This patch factors phy_init_hw() out of phy_attach_direct(), then
converts mdio_bus to dev_pm_ops and adds an appropriate restore()
callback.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c   | 50 +++++++++++++++++++++++++++++++++++++-------
 drivers/net/phy/phy_device.c | 30 +++++++++++++-------------
 include/linux/phy.h          |  1 +
 3 files changed, 59 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 49252d390903..e17b70291bbc 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -264,6 +264,8 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv)
 		(phydev->phy_id & phydrv->phy_id_mask));
 }
 
+#ifdef CONFIG_PM
+
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
 	struct device_driver *drv = phydev->dev.driver;
@@ -295,10 +297,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	return true;
 }
 
-/* Suspend and resume.  Copied from platform_suspend and
- * platform_resume
- */
-static int mdio_bus_suspend(struct device * dev, pm_message_t state)
+static int mdio_bus_suspend(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -318,7 +317,7 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state)
 	return phydrv->suspend(phydev);
 }
 
-static int mdio_bus_resume(struct device * dev)
+static int mdio_bus_resume(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -338,11 +337,48 @@ no_resume:
 	return 0;
 }
 
+static int mdio_bus_restore(struct device *dev)
+{
+	struct phy_device *phydev = to_phy_device(dev);
+	struct net_device *netdev = phydev->attached_dev;
+	int ret;
+
+	if (!netdev)
+		return 0;
+
+	ret = phy_init_hw(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* The PHY needs to renegotiate. */
+	phydev->link = 0;
+	phydev->state = PHY_UP;
+
+	phy_start_machine(phydev, NULL);
+
+	return 0;
+}
+
+static struct dev_pm_ops mdio_bus_pm_ops = {
+	.suspend = mdio_bus_suspend,
+	.resume = mdio_bus_resume,
+	.freeze = mdio_bus_suspend,
+	.thaw = mdio_bus_resume,
+	.restore = mdio_bus_restore,
+};
+
+#define MDIO_BUS_PM_OPS (&mdio_bus_pm_ops)
+
+#else
+
+#define MDIO_BUS_PM_OPS NULL
+
+#endif /* CONFIG_PM */
+
 struct bus_type mdio_bus_type = {
 	.name		= "mdio_bus",
 	.match		= mdio_bus_match,
-	.suspend	= mdio_bus_suspend,
-	.resume		= mdio_bus_resume,
+	.pm		= MDIO_BUS_PM_OPS,
 };
 EXPORT_SYMBOL(mdio_bus_type);
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b10fedd82143..8212b2b93422 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -378,6 +378,20 @@ void phy_disconnect(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_disconnect);
 
+int phy_init_hw(struct phy_device *phydev)
+{
+	int ret;
+
+	if (!phydev->drv || !phydev->drv->config_init)
+		return 0;
+
+	ret = phy_scan_fixups(phydev);
+	if (ret < 0)
+		return ret;
+
+	return phydev->drv->config_init(phydev);
+}
+
 /**
  * phy_attach_direct - attach a network device to a given PHY device pointer
  * @dev: network device to attach
@@ -425,21 +439,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	/* Do initial configuration here, now that
 	 * we have certain key parameters
 	 * (dev_flags and interface) */
-	if (phydev->drv->config_init) {
-		int err;
-
-		err = phy_scan_fixups(phydev);
-
-		if (err < 0)
-			return err;
-
-		err = phydev->drv->config_init(phydev);
-
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
+	return phy_init_hw(phydev);
 }
 EXPORT_SYMBOL(phy_attach_direct);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index b1368b8f6572..7968defd2fa7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -447,6 +447,7 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
 int phy_device_register(struct phy_device *phy);
 int phy_clear_interrupt(struct phy_device *phydev);
 int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+int phy_init_hw(struct phy_device *phydev);
 int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
-- 
cgit v1.2.3


From 0f4bd46ec252887f44f1f065b41867cac8f70dfb Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 22 Dec 2009 03:15:43 +0000
Subject: kmsg_dump: Dump on crash_kexec as well

crash_kexec gets called before kmsg_dump(KMSG_DUMP_OOPS) if
panic_on_oops is set, so the kernel log buffer is not stored
for this case.

This patch adds a KMSG_DUMP_KEXEC dump type which gets called
when crash_kexec() is invoked. To avoid getting double dumps,
the old KMSG_DUMP_PANIC is moved below crash_kexec(). The
mtdoops driver is modified to handle KMSG_DUMP_KEXEC in the
same way as a panic.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c     | 2 +-
 include/linux/kmsg_dump.h | 1 +
 kernel/kexec.c            | 4 ++++
 kernel/panic.c            | 3 ++-
 kernel/printk.c           | 1 +
 5 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index a714ec482761..92e12df0917f 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -322,7 +322,7 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
 	memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
 
 	/* Panics must be written immediately */
-	if (reason == KMSG_DUMP_PANIC) {
+	if (reason != KMSG_DUMP_OOPS) {
 		if (!cxt->mtd->panic_write)
 			printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n");
 		else
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index e32aa268efac..24b44145a886 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -17,6 +17,7 @@
 enum kmsg_dump_reason {
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_PANIC,
+	KMSG_DUMP_KEXEC,
 };
 
 /**
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 433e9fcc1fc5..ae217488fef8 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,6 +32,7 @@
 #include <linux/console.h>
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
+#include <linux/kmsg_dump.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1074,6 +1075,9 @@ void crash_kexec(struct pt_regs *regs)
 	if (mutex_trylock(&kexec_mutex)) {
 		if (kexec_crash_image) {
 			struct pt_regs fixed_regs;
+
+			kmsg_dump(KMSG_DUMP_KEXEC);
+
 			crash_setup_regs(&fixed_regs, regs);
 			crash_save_vmcoreinfo();
 			machine_crash_shutdown(&fixed_regs);
diff --git a/kernel/panic.c b/kernel/panic.c
index 5827f7b97254..c787333282b8 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -75,7 +75,6 @@ NORET_TYPE void panic(const char * fmt, ...)
 	dump_stack();
 #endif
 
-	kmsg_dump(KMSG_DUMP_PANIC);
 	/*
 	 * If we have crashed and we have a crash kernel loaded let it handle
 	 * everything else.
@@ -83,6 +82,8 @@ NORET_TYPE void panic(const char * fmt, ...)
 	 */
 	crash_kexec(NULL);
 
+	kmsg_dump(KMSG_DUMP_PANIC);
+
 	/*
 	 * Note smp_send_stop is the usual smp shutdown function, which
 	 * unfortunately means it may not be hardened to work in a panic
diff --git a/kernel/printk.c b/kernel/printk.c
index 1ded8e7dd19b..2c9dc0b03a5e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1467,6 +1467,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
 static const char const *kmsg_reasons[] = {
 	[KMSG_DUMP_OOPS]	= "oops",
 	[KMSG_DUMP_PANIC]	= "panic",
+	[KMSG_DUMP_KEXEC]	= "kexec",
 };
 
 static const char *kmsg_to_str(enum kmsg_dump_reason reason)
-- 
cgit v1.2.3


From 0719d3434747889b314a1e8add776418c4148bcf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 00:39:22 +0100
Subject: reiserfs: Fix reiserfs lock <-> i_xattr_sem dependency inversion

i_xattr_sem depends on the reiserfs lock. But after we grab
i_xattr_sem, we may relax/relock the reiserfs lock while waiting
on a freezed filesystem, creating a dependency inversion between
the two locks.

In order to avoid the i_xattr_sem -> reiserfs lock dependency, let's
create a reiserfs_down_read_safe() that acts like
reiserfs_mutex_lock_safe(): relax the reiserfs lock while grabbing
another lock to avoid undesired dependencies induced by the
heivyweight reiserfs lock.

This fixes the following warning:

[  990.005931] =======================================================
[  990.012373] [ INFO: possible circular locking dependency detected ]
[  990.013233] 2.6.33-rc1 #1
[  990.013233] -------------------------------------------------------
[  990.013233] dbench/1891 is trying to acquire lock:
[  990.013233]  (&REISERFS_SB(s)->lock){+.+.+.}, at: [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]
[  990.013233] but task is already holding lock:
[  990.013233]  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] which lock already depends on the new lock.
[  990.013233]
[  990.013233]
[  990.013233] the existing dependency chain (in reverse order) is:
[  990.013233]
[  990.013233] -> #1 (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}:
[  990.013233]        [<ffffffff81063afc>] __lock_acquire+0xf9c/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814ac194>] down_write+0x44/0x80
[  990.013233]        [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] -> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
[  990.013233]        [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]        [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]        [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]        [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]        [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] other info that might help us debug this:
[  990.013233]
[  990.013233] 2 locks held by dbench/1891:
[  990.013233]  #0:  (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [<ffffffff810e2678>] vfs_setxattr+0x78/0xc0
[  990.013233]  #1:  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] stack backtrace:
[  990.013233] Pid: 1891, comm: dbench Not tainted 2.6.33-rc1 #1
[  990.013233] Call Trace:
[  990.013233]  [<ffffffff81061639>] print_circular_bug+0xe9/0xf0
[  990.013233]  [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81062592>] ? mark_held_locks+0x72/0xa0
[  990.013233]  [<ffffffff814ab81d>] ? __mutex_unlock_slowpath+0xbd/0x140
[  990.013233]  [<ffffffff810628ad>] ? trace_hardirqs_on_caller+0x14d/0x1a0
[  990.013233]  [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]  [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]  [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]  [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]  [<ffffffff814abcb4>] ? __mutex_lock_common+0x284/0x3b0
[  990.013233]  [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]  [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]  [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]  [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]  [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]  [<ffffffff81056018>] ? sched_clock_cpu+0xb8/0x100
[  990.013233]  [<ffffffff8105eded>] ? trace_hardirqs_off+0xd/0x10
[  990.013233]  [<ffffffff810560a3>] ? cpu_clock+0x43/0x50
[  990.013233]  [<ffffffff810c6820>] ? fget+0xb0/0x110
[  990.013233]  [<ffffffff810c6770>] ? fget+0x0/0x110
[  990.013233]  [<ffffffff81002ddc>] ? sysret_check+0x27/0x62
[  990.013233]  [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]  [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b

Reported-and-tested-by: Christian Kujau <lists@nerdbynature.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/xattr.c         | 2 +-
 include/linux/reiserfs_fs.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8891cd88a3f4..a0e2e7acdc75 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -484,7 +484,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	down_write(&REISERFS_I(inode)->i_xattr_sem);
+	reiserfs_down_read_safe(&REISERFS_I(inode)->i_xattr_sem, inode->i_sb);
 
 	xahash = xattr_hash(buffer, buffer_size);
 	while (buffer_pos < buffer_size || buffer_pos == 0) {
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4351b49e2b1e..35d3f459b0ac 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -106,6 +106,14 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	down_read(sem);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3


From c4a62ca362258d98f42efb282cfbf9b61caffdbe Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 03:20:19 +0100
Subject: reiserfs: Warn on lock relax if taken recursively

When we relax the reiserfs lock to avoid creating unwanted
dependencies against others locks while grabbing these,
we want to ensure it has not been taken recursively, otherwise
the lock won't be really relaxed. Only its depth will be decreased.
The unwanted dependency would then actually happen.

To prevent from that, add a reiserfs_lock_check_recursive() call
in the places that need it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/lock.c          | 9 +++++++++
 include/linux/reiserfs_fs.h | 9 +++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
 		reiserfs_panic(sb, "%s called without kernel lock held %d",
 			       caller);
 }
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *sb)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+	WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
+}
+#endif
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 35d3f459b0ac..793bf8351ab8 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s);
 int reiserfs_write_lock_once(struct super_block *s);
 void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *s);
+#else
+static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
+#endif
+
 /*
  * Several mutexes depend on the write lock.
  * However sometimes we want to relax the write lock while we hold
@@ -92,6 +98,7 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock(m);
 	reiserfs_write_lock(s);
@@ -101,6 +108,7 @@ static inline void
 reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock_nested(m, subclass);
 	reiserfs_write_lock(s);
@@ -109,6 +117,7 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 static inline void
 reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	down_read(sem);
 	reiserfs_write_lock(s);
-- 
cgit v1.2.3


From c1c5523dd1517250cac8b15a4acbc237c24a67d4 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 23 Dec 2009 01:27:48 +0000
Subject: can/netlink: add CAN_CTRLMODE_ONE_SHOT

This patch adds the flag CAN_CTRLMODE_ONE_SHOT. It is used as mask
or flag in the "struct can_ctrlmode".

It allows userspace via netlink to set a CAN controller into the special
"one-shot" mode. In this mode, if supported by the CAN controller, it
tries only once to deliver a CAN frame and aborts it if an error
(e.g.: arbitration lost) happens.

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/can/netlink.h b/include/linux/can/netlink.h
index 9ecbb7871c0e..c818335fbb13 100644
--- a/include/linux/can/netlink.h
+++ b/include/linux/can/netlink.h
@@ -80,6 +80,7 @@ struct can_ctrlmode {
 #define CAN_CTRLMODE_LOOPBACK	0x1	/* Loopback mode */
 #define CAN_CTRLMODE_LISTENONLY	0x2 	/* Listen-only mode */
 #define CAN_CTRLMODE_3_SAMPLES	0x4	/* Triple sampling mode */
+#define CAN_CTRLMODE_ONE_SHOT	0x8	/* One-Shot mode */
 
 /*
  * CAN device statistics
-- 
cgit v1.2.3


From 96d07d211739fd2450ac54e81d00fa40fcd4b1bd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 20 Nov 2009 14:16:33 +0100
Subject: resource: move kernel function inside __KERNEL__

It is an internal function. Move it inside __KERNEL__ ifdef, along
with task_struct declaration.

Then we get:
--- /usr/include/linux/resource.h       2009-09-14 15:09:29.000000000 +0200
+++ usr/include/linux/resource.h       2010-01-04 11:30:54.000000000 +0100
@@ -3,8 +3,6 @@

 #include <linux/time.h>

-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,5 @@
  */
 #include <asm/resource.h>

-int getrusage(struct task_struct *p, int who, struct rusage *ru);

 #endif

***********

include/linux/Kbuild is untouched, since unifdef is run even on
headers-y nowadays.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 include/linux/resource.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e626082..f1e914eefeab 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -3,8 +3,6 @@
 
 #include <linux/time.h>
 
-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,12 @@ struct rlimit {
  */
 #include <asm/resource.h>
 
+#ifdef __KERNEL__
+
+struct task_struct;
+
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
 
+#endif /* __KERNEL__ */
+
 #endif
-- 
cgit v1.2.3


From 3e10e716abf3c71bdb5d86b8f507f9e72236c9cd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 19 Nov 2009 17:16:37 +0100
Subject: resource: add helpers for fetching rlimits

We want to be sure that compiler fetches the limit variable only
once, so add helpers for fetching current and maximal resource
limits which do that.

Add them to sched.h (instead of resource.h) due to circular dependency
 sched.h->resource.h->task_struct
Alternative would be to create a separate res_access.h or similar.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2f842db03ce..8d4991be9d53 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2601,6 +2601,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+	return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+	return task_rlimit_max(current, limit);
+}
+
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From cf2f765f1896064e34c6f0f2ef896ff058dd5c06 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Mon, 4 Jan 2010 12:20:56 +0100
Subject: HID: handle joysticks with large number of buttons

Current HID code doesn't properly handle HID joysticks which have
larger number of buttons than what fits into current range reserved
for BTN_JOYSTICK.

One such joystick reported to not work properly is Saitek X52 Pro
Flight System.

We can't extend the range to fit more buttons in, because of backwards
compatibility reasons.

Therefore this patch introduces a new BTN_TRIGGER_HAPPY range, and
uses these to map the buttons which are over BTN_JOYSTICK limit.

Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> [for the input.h part]
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c |  7 ++++++-
 include/linux/input.h   | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 5862b0f3b55d..dad7aae9c975 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -198,7 +198,12 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		switch (field->application) {
 		case HID_GD_MOUSE:
 		case HID_GD_POINTER:  code += 0x110; break;
-		case HID_GD_JOYSTICK: code += 0x120; break;
+		case HID_GD_JOYSTICK:
+				      if (code <= 0xf)
+					      code += BTN_JOYSTICK;
+				      else
+					      code += BTN_TRIGGER_HAPPY;
+				      break;
 		case HID_GD_GAMEPAD:  code += 0x130; break;
 		default:
 			switch (field->physical) {
diff --git a/include/linux/input.h b/include/linux/input.h
index 7be8a6537b57..97f98ca9b040 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -597,6 +597,48 @@ struct input_absinfo {
 
 #define KEY_CAMERA_FOCUS	0x210
 
+#define BTN_TRIGGER_HAPPY		0x2c0
+#define BTN_TRIGGER_HAPPY1		0x2c0
+#define BTN_TRIGGER_HAPPY2		0x2c1
+#define BTN_TRIGGER_HAPPY3		0x2c2
+#define BTN_TRIGGER_HAPPY4		0x2c3
+#define BTN_TRIGGER_HAPPY5		0x2c4
+#define BTN_TRIGGER_HAPPY6		0x2c5
+#define BTN_TRIGGER_HAPPY7		0x2c6
+#define BTN_TRIGGER_HAPPY8		0x2c7
+#define BTN_TRIGGER_HAPPY9		0x2c8
+#define BTN_TRIGGER_HAPPY10		0x2c9
+#define BTN_TRIGGER_HAPPY11		0x2ca
+#define BTN_TRIGGER_HAPPY12		0x2cb
+#define BTN_TRIGGER_HAPPY13		0x2cc
+#define BTN_TRIGGER_HAPPY14		0x2cd
+#define BTN_TRIGGER_HAPPY15		0x2ce
+#define BTN_TRIGGER_HAPPY16		0x2cf
+#define BTN_TRIGGER_HAPPY17		0x2d0
+#define BTN_TRIGGER_HAPPY18		0x2d1
+#define BTN_TRIGGER_HAPPY19		0x2d2
+#define BTN_TRIGGER_HAPPY20		0x2d3
+#define BTN_TRIGGER_HAPPY21		0x2d4
+#define BTN_TRIGGER_HAPPY22		0x2d5
+#define BTN_TRIGGER_HAPPY23		0x2d6
+#define BTN_TRIGGER_HAPPY24		0x2d7
+#define BTN_TRIGGER_HAPPY25		0x2d8
+#define BTN_TRIGGER_HAPPY26		0x2d9
+#define BTN_TRIGGER_HAPPY27		0x2da
+#define BTN_TRIGGER_HAPPY28		0x2db
+#define BTN_TRIGGER_HAPPY29		0x2dc
+#define BTN_TRIGGER_HAPPY30		0x2dd
+#define BTN_TRIGGER_HAPPY31		0x2de
+#define BTN_TRIGGER_HAPPY32		0x2df
+#define BTN_TRIGGER_HAPPY33		0x2e0
+#define BTN_TRIGGER_HAPPY34		0x2e1
+#define BTN_TRIGGER_HAPPY35		0x2e2
+#define BTN_TRIGGER_HAPPY36		0x2e3
+#define BTN_TRIGGER_HAPPY37		0x2e4
+#define BTN_TRIGGER_HAPPY38		0x2e5
+#define BTN_TRIGGER_HAPPY39		0x2e6
+#define BTN_TRIGGER_HAPPY40		0x2e7
+
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING	KEY_MUTE
 #define KEY_MAX			0x2ff
-- 
cgit v1.2.3


From 1ae861e652b5457e7fa98ccbc55abea1e207916e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 31 Dec 2009 12:15:54 +0100
Subject: PCI/PM: Use per-device D3 delays

It turns out that some PCI devices require extra delays when changing
power state from D3 to D0 (and the other way around).  Although this
is against the PCI specification, we can handle it quite easily by
allowing drivers to define arbitrary D3 delays for devices known to
require extra time for switching power states.

Introduce additional field d3_delay in struct pci_dev and use it to
store the value of the device's D0->D3 delay, in miliseconds.  Make
the PCI PM core code use the per-device d3_delay unless
pci_pm_d3_delay is greater (in which case the latter is used).
[This also allows the driver to specify d3_delay shorter than the
 10 ms required by the PCI standard if the device is known to be able
 to handle that.]

Make the sky2 driver set d3_delay to 150 for devices handled by it.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14730 which is a
listed regression from 2.6.30.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/net/sky2.c  |  1 +
 drivers/pci/pci.c   | 19 +++++++++++++++----
 include/linux/pci.h |  1 +
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 1c01b96c9611..2d28d58200d0 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -4684,6 +4684,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 	INIT_WORK(&hw->restart_work, sky2_restart);
 
 	pci_set_drvdata(pdev, hw);
+	pdev->d3_delay = 150;
 
 	return 0;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0906599ebfde..315fea47e784 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -29,7 +29,17 @@ const char *pci_power_names[] = {
 };
 EXPORT_SYMBOL_GPL(pci_power_names);
 
-unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
+unsigned int pci_pm_d3_delay;
+
+static void pci_dev_d3_sleep(struct pci_dev *dev)
+{
+	unsigned int delay = dev->d3_delay;
+
+	if (delay < pci_pm_d3_delay)
+		delay = pci_pm_d3_delay;
+
+	msleep(delay);
+}
 
 #ifdef CONFIG_PCI_DOMAINS
 int pci_domains_supported = 1;
@@ -522,7 +532,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 	/* Mandatory power management transition delays */
 	/* see PCI PM 1.1 5.6.1 table 18 */
 	if (state == PCI_D3hot || dev->current_state == PCI_D3hot)
-		msleep(pci_pm_d3_delay);
+		pci_dev_d3_sleep(dev);
 	else if (state == PCI_D2 || dev->current_state == PCI_D2)
 		udelay(PCI_PM_D2_DELAY);
 
@@ -1409,6 +1419,7 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 
 	dev->pm_cap = pm;
+	dev->d3_delay = PCI_PM_D3_WAIT;
 
 	dev->d1_support = false;
 	dev->d2_support = false;
@@ -2247,12 +2258,12 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D3hot;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D0;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	return 0;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5da0690d9cee..174e5392e51e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -243,6 +243,7 @@ struct pci_dev {
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
 	unsigned int	wakeup_prepared:1;
+	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
 
 #ifdef CONFIG_PCIEASPM
 	struct pcie_link_state	*link_state;	/* ASPM link state. */
-- 
cgit v1.2.3


From e1783a240f491fb233f04edc042e16b18a7a79ba Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 5 Jan 2010 15:34:50 +0900
Subject: module: Use this_cpu_xx to dynamically allocate counters

Use cpu ops to deal with the per cpu data instead of a local_t. Reduces memory
requirements, cache footprint and decreases cycle counts.

The this_cpu_xx operations are also used for !SMP mode. Otherwise we could
not drop the use of __module_ref_addr() which would make per cpu data handling
complicated. this_cpu_xx operations have their own fallback for !SMP.

V8-V9:
- Leave include asm/module.h since ringbuffer.c depends on it. Nothing else
  does though. Another patch will deal with that.
- Remove spurious free.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/module.h | 36 ++++++++++++++----------------------
 kernel/module.c        | 29 +++++++++++++++--------------
 2 files changed, 29 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 6cb1a3cab5d3..2302f09ea2d9 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -18,6 +18,7 @@
 #include <linux/tracepoint.h>
 
 #include <asm/local.h>
+#include <linux/percpu.h>
 #include <asm/module.h>
 
 #include <trace/events/module.h>
@@ -363,11 +364,9 @@ struct module
 	/* Destruction function. */
 	void (*exit)(void);
 
-#ifdef CONFIG_SMP
-	char *refptr;
-#else
-	local_t ref;
-#endif
+	struct module_ref {
+		int count;
+	} *refptr;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
@@ -454,25 +453,16 @@ void __symbol_put(const char *symbol);
 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
 void symbol_put_addr(void *addr);
 
-static inline local_t *__module_ref_addr(struct module *mod, int cpu)
-{
-#ifdef CONFIG_SMP
-	return (local_t *) (mod->refptr + per_cpu_offset(cpu));
-#else
-	return &mod->ref;
-#endif
-}
-
 /* Sometimes we know we already have a refcount, and it's easier not
    to handle the error case (which only happens with rmmod --wait). */
 static inline void __module_get(struct module *module)
 {
 	if (module) {
-		unsigned int cpu = get_cpu();
-		local_inc(__module_ref_addr(module, cpu));
+		preempt_disable();
+		__this_cpu_inc(module->refptr->count);
 		trace_module_get(module, _THIS_IP_,
-				 local_read(__module_ref_addr(module, cpu)));
-		put_cpu();
+				 __this_cpu_read(module->refptr->count));
+		preempt_enable();
 	}
 }
 
@@ -481,15 +471,17 @@ static inline int try_module_get(struct module *module)
 	int ret = 1;
 
 	if (module) {
-		unsigned int cpu = get_cpu();
+		preempt_disable();
+
 		if (likely(module_is_live(module))) {
-			local_inc(__module_ref_addr(module, cpu));
+			__this_cpu_inc(module->refptr->count);
 			trace_module_get(module, _THIS_IP_,
-				local_read(__module_ref_addr(module, cpu)));
+				__this_cpu_read(module->refptr->count));
 		}
 		else
 			ret = 0;
-		put_cpu();
+
+		preempt_enable();
 	}
 	return ret;
 }
diff --git a/kernel/module.c b/kernel/module.c
index e96b8ed1cb6a..9bf228052ec5 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -474,9 +474,10 @@ static void module_unload_init(struct module *mod)
 
 	INIT_LIST_HEAD(&mod->modules_which_use_me);
 	for_each_possible_cpu(cpu)
-		local_set(__module_ref_addr(mod, cpu), 0);
+		per_cpu_ptr(mod->refptr, cpu)->count = 0;
+
 	/* Hold reference count during initialization. */
-	local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
+	__this_cpu_write(mod->refptr->count, 1);
 	/* Backwards compatibility macros put refcount during init. */
 	mod->waiter = current;
 }
@@ -619,7 +620,7 @@ unsigned int module_refcount(struct module *mod)
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		total += local_read(__module_ref_addr(mod, cpu));
+		total += per_cpu_ptr(mod->refptr, cpu)->count;
 	return total;
 }
 EXPORT_SYMBOL(module_refcount);
@@ -796,14 +797,15 @@ static struct module_attribute refcnt = {
 void module_put(struct module *module)
 {
 	if (module) {
-		unsigned int cpu = get_cpu();
-		local_dec(__module_ref_addr(module, cpu));
+		preempt_disable();
+		__this_cpu_dec(module->refptr->count);
+
 		trace_module_put(module, _RET_IP_,
-				 local_read(__module_ref_addr(module, cpu)));
+				 __this_cpu_read(module->refptr->count));
 		/* Maybe they're waiting for us to drop reference? */
 		if (unlikely(!module_is_live(module)))
 			wake_up_process(module->waiter);
-		put_cpu();
+		preempt_enable();
 	}
 }
 EXPORT_SYMBOL(module_put);
@@ -1394,9 +1396,9 @@ static void free_module(struct module *mod)
 	kfree(mod->args);
 	if (mod->percpu)
 		percpu_modfree(mod->percpu);
-#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+#if defined(CONFIG_MODULE_UNLOAD)
 	if (mod->refptr)
-		percpu_modfree(mod->refptr);
+		free_percpu(mod->refptr);
 #endif
 	/* Free lock-classes: */
 	lockdep_free_key_range(mod->module_core, mod->core_size);
@@ -2159,9 +2161,8 @@ static noinline struct module *load_module(void __user *umod,
 	mod = (void *)sechdrs[modindex].sh_addr;
 	kmemleak_load_module(mod, hdr, sechdrs, secstrings);
 
-#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
-	mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
-				      mod->name);
+#if defined(CONFIG_MODULE_UNLOAD)
+	mod->refptr = alloc_percpu(struct module_ref);
 	if (!mod->refptr) {
 		err = -ENOMEM;
 		goto free_init;
@@ -2393,8 +2394,8 @@ static noinline struct module *load_module(void __user *umod,
 	kobject_put(&mod->mkobj.kobj);
  free_unload:
 	module_unload_free(mod);
-#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
-	percpu_modfree(mod->refptr);
+#if defined(CONFIG_MODULE_UNLOAD)
+	free_percpu(mod->refptr);
  free_init:
 #endif
 	module_free(mod, mod->module_init);
-- 
cgit v1.2.3


From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 5 Jan 2010 15:34:50 +0900
Subject: local_t: Move local.h include to ringbuffer.c and
 ring_buffer_benchmark.c

ringbuffer*.c are the last users of local.h.

Remove the include from modules.h and add it to ringbuffer files.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/module.h               | 1 -
 kernel/trace/ring_buffer.c           | 1 +
 kernel/trace/ring_buffer_benchmark.c | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 2302f09ea2d9..7e74ae0051cc 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -17,7 +17,6 @@
 #include <linux/moduleparam.h>
 #include <linux/tracepoint.h>
 
-#include <asm/local.h>
 #include <linux/percpu.h>
 #include <asm/module.h>
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2326b04c95c4..eb6c8988c31a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -20,6 +20,7 @@
 #include <linux/cpu.h>
 #include <linux/fs.h>
 
+#include <asm/local.h>
 #include "trace.h"
 
 /*
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index b2477caf09c2..df74c7982255 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/time.h>
+#include <asm/local.h>
 
 struct rb_page {
 	u64		ts;
-- 
cgit v1.2.3


From 99dcc3e5a94ed491fbef402831d8c0bbb267f995 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 5 Jan 2010 15:34:51 +0900
Subject: this_cpu: Page allocator conversion

Use the per cpu allocator functionality to avoid per cpu arrays in struct zone.

This drastically reduces the size of struct zone for systems with large
amounts of processors and allows placement of critical variables of struct
zone in one cacheline even on very large systems.

Another effect is that the pagesets of one processor are placed near one
another. If multiple pagesets from different zones fit into one cacheline
then additional cacheline fetches can be avoided on the hot paths when
allocating memory from multiple zones.

Bootstrap becomes simpler if we use the same scheme for UP, SMP, NUMA. #ifdefs
are reduced and we can drop the zone_pcp macro.

Hotplug handling is also simplified since cpu alloc can bring up and
shut down cpu areas for a specific cpu as a whole. So there is no need to
allocate or free individual pagesets.

V7-V8:
- Explain chicken egg dilemmna with percpu allocator.

V4-V5:
- Fix up cases where per_cpu_ptr is called before irq disable
- Integrate the bootstrap logic that was separate before.

tj: Build failure in pageset_cpuup_callback() due to missing ret
    variable fixed.

Reviewed-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/mm.h     |   4 -
 include/linux/mmzone.h |  12 +--
 mm/page_alloc.c        | 202 +++++++++++++++++--------------------------------
 mm/vmstat.c            |  14 ++--
 4 files changed, 81 insertions(+), 151 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2265f28eb47a..554fa395aac9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1079,11 +1079,7 @@ extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
 
-#ifdef CONFIG_NUMA
 extern void setup_per_cpu_pageset(void);
-#else
-static inline void setup_per_cpu_pageset(void) {}
-#endif
 
 extern void zone_pcp_update(struct zone *zone);
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 30fe668c2542..7874201a3556 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -184,13 +184,7 @@ struct per_cpu_pageset {
 	s8 stat_threshold;
 	s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
 #endif
-} ____cacheline_aligned_in_smp;
-
-#ifdef CONFIG_NUMA
-#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
-#else
-#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
-#endif
+};
 
 #endif /* !__GENERATING_BOUNDS.H */
 
@@ -306,10 +300,8 @@ struct zone {
 	 */
 	unsigned long		min_unmapped_pages;
 	unsigned long		min_slab_pages;
-	struct per_cpu_pageset	*pageset[NR_CPUS];
-#else
-	struct per_cpu_pageset	pageset[NR_CPUS];
 #endif
+	struct per_cpu_pageset	*pageset;
 	/*
 	 * free areas of different sizes
 	 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e9f5cc5fb59..6849e870de54 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1008,10 +1008,10 @@ static void drain_pages(unsigned int cpu)
 		struct per_cpu_pageset *pset;
 		struct per_cpu_pages *pcp;
 
-		pset = zone_pcp(zone, cpu);
+		local_irq_save(flags);
+		pset = per_cpu_ptr(zone->pageset, cpu);
 
 		pcp = &pset->pcp;
-		local_irq_save(flags);
 		free_pcppages_bulk(zone, pcp->count, pcp);
 		pcp->count = 0;
 		local_irq_restore(flags);
@@ -1095,7 +1095,6 @@ static void free_hot_cold_page(struct page *page, int cold)
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
-	pcp = &zone_pcp(zone, get_cpu())->pcp;
 	migratetype = get_pageblock_migratetype(page);
 	set_page_private(page, migratetype);
 	local_irq_save(flags);
@@ -1118,6 +1117,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 		migratetype = MIGRATE_MOVABLE;
 	}
 
+	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	if (cold)
 		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	else
@@ -1130,7 +1130,6 @@ static void free_hot_cold_page(struct page *page, int cold)
 
 out:
 	local_irq_restore(flags);
-	put_cpu();
 }
 
 void free_hot_page(struct page *page)
@@ -1180,17 +1179,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
 	unsigned long flags;
 	struct page *page;
 	int cold = !!(gfp_flags & __GFP_COLD);
-	int cpu;
 
 again:
-	cpu  = get_cpu();
 	if (likely(order == 0)) {
 		struct per_cpu_pages *pcp;
 		struct list_head *list;
 
-		pcp = &zone_pcp(zone, cpu)->pcp;
-		list = &pcp->lists[migratetype];
 		local_irq_save(flags);
+		pcp = &this_cpu_ptr(zone->pageset)->pcp;
+		list = &pcp->lists[migratetype];
 		if (list_empty(list)) {
 			pcp->count += rmqueue_bulk(zone, 0,
 					pcp->batch, list,
@@ -1231,7 +1228,6 @@ again:
 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
 	zone_statistics(preferred_zone, zone);
 	local_irq_restore(flags);
-	put_cpu();
 
 	VM_BUG_ON(bad_range(zone, page));
 	if (prep_new_page(page, order, gfp_flags))
@@ -1240,7 +1236,6 @@ again:
 
 failed:
 	local_irq_restore(flags);
-	put_cpu();
 	return NULL;
 }
 
@@ -2179,7 +2174,7 @@ void show_free_areas(void)
 		for_each_online_cpu(cpu) {
 			struct per_cpu_pageset *pageset;
 
-			pageset = zone_pcp(zone, cpu);
+			pageset = per_cpu_ptr(zone->pageset, cpu);
 
 			printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
 			       cpu, pageset->pcp.high,
@@ -2744,10 +2739,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)
 
 #endif	/* CONFIG_NUMA */
 
+/*
+ * Boot pageset table. One per cpu which is going to be used for all
+ * zones and all nodes. The parameters will be set in such a way
+ * that an item put on a list will immediately be handed over to
+ * the buddy list. This is safe since pageset manipulation is done
+ * with interrupts disabled.
+ *
+ * The boot_pagesets must be kept even after bootup is complete for
+ * unused processors and/or zones. They do play a role for bootstrapping
+ * hotplugged processors.
+ *
+ * zoneinfo_show() and maybe other functions do
+ * not check if the processor is online before following the pageset pointer.
+ * Other parts of the kernel may not check if the zone is available.
+ */
+static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
+static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
+
 /* return values int ....just for stop_machine() */
 static int __build_all_zonelists(void *dummy)
 {
 	int nid;
+	int cpu;
 
 #ifdef CONFIG_NUMA
 	memset(node_load, 0, sizeof(node_load));
@@ -2758,6 +2772,23 @@ static int __build_all_zonelists(void *dummy)
 		build_zonelists(pgdat);
 		build_zonelist_cache(pgdat);
 	}
+
+	/*
+	 * Initialize the boot_pagesets that are going to be used
+	 * for bootstrapping processors. The real pagesets for
+	 * each zone will be allocated later when the per cpu
+	 * allocator is available.
+	 *
+	 * boot_pagesets are used also for bootstrapping offline
+	 * cpus if the system is already booted because the pagesets
+	 * are needed to initialize allocators on a specific cpu too.
+	 * F.e. the percpu allocator needs the page allocator which
+	 * needs the percpu allocator in order to allocate its pagesets
+	 * (a chicken-egg dilemma).
+	 */
+	for_each_possible_cpu(cpu)
+		setup_pageset(&per_cpu(boot_pageset, cpu), 0);
+
 	return 0;
 }
 
@@ -3095,121 +3126,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
 		pcp->batch = PAGE_SHIFT * 8;
 }
 
-
-#ifdef CONFIG_NUMA
-/*
- * Boot pageset table. One per cpu which is going to be used for all
- * zones and all nodes. The parameters will be set in such a way
- * that an item put on a list will immediately be handed over to
- * the buddy list. This is safe since pageset manipulation is done
- * with interrupts disabled.
- *
- * Some NUMA counter updates may also be caught by the boot pagesets.
- *
- * The boot_pagesets must be kept even after bootup is complete for
- * unused processors and/or zones. They do play a role for bootstrapping
- * hotplugged processors.
- *
- * zoneinfo_show() and maybe other functions do
- * not check if the processor is online before following the pageset pointer.
- * Other parts of the kernel may not check if the zone is available.
- */
-static struct per_cpu_pageset boot_pageset[NR_CPUS];
-
 /*
- * Dynamically allocate memory for the
- * per cpu pageset array in struct zone.
+ * Allocate per cpu pagesets and initialize them.
+ * Before this call only boot pagesets were available.
+ * Boot pagesets will no longer be used by this processorr
+ * after setup_per_cpu_pageset().
  */
-static int __cpuinit process_zones(int cpu)
+void __init setup_per_cpu_pageset(void)
 {
-	struct zone *zone, *dzone;
-	int node = cpu_to_node(cpu);
-
-	node_set_state(node, N_CPU);	/* this node has a cpu */
+	struct zone *zone;
+	int cpu;
 
 	for_each_populated_zone(zone) {
-		zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
-					 GFP_KERNEL, node);
-		if (!zone_pcp(zone, cpu))
-			goto bad;
-
-		setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
-
-		if (percpu_pagelist_fraction)
-			setup_pagelist_highmark(zone_pcp(zone, cpu),
-			    (zone->present_pages / percpu_pagelist_fraction));
-	}
-
-	return 0;
-bad:
-	for_each_zone(dzone) {
-		if (!populated_zone(dzone))
-			continue;
-		if (dzone == zone)
-			break;
-		kfree(zone_pcp(dzone, cpu));
-		zone_pcp(dzone, cpu) = &boot_pageset[cpu];
-	}
-	return -ENOMEM;
-}
+		zone->pageset = alloc_percpu(struct per_cpu_pageset);
 
-static inline void free_zone_pagesets(int cpu)
-{
-	struct zone *zone;
-
-	for_each_zone(zone) {
-		struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+		for_each_possible_cpu(cpu) {
+			struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
 
-		/* Free per_cpu_pageset if it is slab allocated */
-		if (pset != &boot_pageset[cpu])
-			kfree(pset);
-		zone_pcp(zone, cpu) = &boot_pageset[cpu];
-	}
-}
+			setup_pageset(pcp, zone_batchsize(zone));
 
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
-		unsigned long action,
-		void *hcpu)
-{
-	int cpu = (long)hcpu;
-	int ret = NOTIFY_OK;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		if (process_zones(cpu))
-			ret = NOTIFY_BAD;
-		break;
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		free_zone_pagesets(cpu);
-		break;
-	default:
-		break;
+			if (percpu_pagelist_fraction)
+				setup_pagelist_highmark(pcp,
+					(zone->present_pages /
+						percpu_pagelist_fraction));
+		}
 	}
-	return ret;
 }
 
-static struct notifier_block __cpuinitdata pageset_notifier =
-	{ &pageset_cpuup_callback, NULL, 0 };
-
-void __init setup_per_cpu_pageset(void)
-{
-	int err;
-
-	/* Initialize per_cpu_pageset for cpu 0.
-	 * A cpuup callback will do this for every cpu
-	 * as it comes online
-	 */
-	err = process_zones(smp_processor_id());
-	BUG_ON(err);
-	register_cpu_notifier(&pageset_notifier);
-}
-
-#endif
-
 static noinline __init_refok
 int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 {
@@ -3263,7 +3206,7 @@ static int __zone_pcp_update(void *data)
 		struct per_cpu_pageset *pset;
 		struct per_cpu_pages *pcp;
 
-		pset = zone_pcp(zone, cpu);
+		pset = per_cpu_ptr(zone->pageset, cpu);
 		pcp = &pset->pcp;
 
 		local_irq_save(flags);
@@ -3281,21 +3224,17 @@ void zone_pcp_update(struct zone *zone)
 
 static __meminit void zone_pcp_init(struct zone *zone)
 {
-	int cpu;
-	unsigned long batch = zone_batchsize(zone);
+	/*
+	 * per cpu subsystem is not up at this point. The following code
+	 * relies on the ability of the linker to provide the
+	 * offset of a (static) per cpu variable into the per cpu area.
+	 */
+	zone->pageset = &boot_pageset;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-#ifdef CONFIG_NUMA
-		/* Early boot. Slab allocator not functional yet */
-		zone_pcp(zone, cpu) = &boot_pageset[cpu];
-		setup_pageset(&boot_pageset[cpu],0);
-#else
-		setup_pageset(zone_pcp(zone,cpu), batch);
-#endif
-	}
 	if (zone->present_pages)
-		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
-			zone->name, zone->present_pages, batch);
+		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%u\n",
+			zone->name, zone->present_pages,
+					 zone_batchsize(zone));
 }
 
 __meminit int init_currently_empty_zone(struct zone *zone,
@@ -4809,10 +4748,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
 	if (!write || (ret == -EINVAL))
 		return ret;
 	for_each_populated_zone(zone) {
-		for_each_online_cpu(cpu) {
+		for_each_possible_cpu(cpu) {
 			unsigned long  high;
 			high = zone->present_pages / percpu_pagelist_fraction;
-			setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+			setup_pagelist_highmark(
+				per_cpu_ptr(zone->pageset, cpu), high);
 		}
 	}
 	return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6051fbab67ba..1ba0bb7ad043 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void)
 		threshold = calculate_threshold(zone);
 
 		for_each_online_cpu(cpu)
-			zone_pcp(zone, cpu)->stat_threshold = threshold;
+			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+							= threshold;
 	}
 }
 
@@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void)
 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 				int delta)
 {
-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+	struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
+
 	s8 *p = pcp->vm_stat_diff + item;
 	long x;
 
@@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
  */
 void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+	struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
 	s8 *p = pcp->vm_stat_diff + item;
 
 	(*p)++;
@@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state);
 
 void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+	struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
 	s8 *p = pcp->vm_stat_diff + item;
 
 	(*p)--;
@@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu)
 	for_each_populated_zone(zone) {
 		struct per_cpu_pageset *p;
 
-		p = zone_pcp(zone, cpu);
+		p = per_cpu_ptr(zone->pageset, cpu);
 
 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 			if (p->vm_stat_diff[i]) {
@@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 	for_each_online_cpu(i) {
 		struct per_cpu_pageset *pageset;
 
-		pageset = zone_pcp(zone, i);
+		pageset = per_cpu_ptr(zone->pageset, i);
 		seq_printf(m,
 			   "\n    cpu: %i"
 			   "\n              count: %i"
-- 
cgit v1.2.3


From 59b015133cd0034f5904a76969d73476380aac46 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 5 Jan 2010 17:56:02 -0800
Subject: Input: serio - fix potential deadlock when unbinding drivers

sysfs_remove_group() waits for sysfs attributes to be removed, therefore
we do not need to worry about driver-specific attributes being accessed
after driver has been detached from the device. In fact, attempts to take
serio->drv_mutex in attribute methods may lead to the following deadlock:

                                          sysfs_read_file()
                                            fill_read_buffer()
                                              sysfs_get_active_two()
                                                psmouse_attr_show_helper()
                                                  serio_pin_driver()
serio_disconnect_driver()
  mutex_lock(&serio->drv_mutex);
                                <-------->        mutex_lock(&serio_drv_mutex);
    psmouse_disconnect()
      sysfs_remove_group(... psmouse_attr_group);
        ....
        sysfs_deactivate();
          wait_for_completion();

Fix this by removing calls to serio_[un]pin_driver() and functions themselves
and using driver-private mutexes to serialize access to attribute's set()
methods that may change device state.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c     | 59 ++++++++++++++++----------------------
 drivers/input/mouse/psmouse-base.c | 32 ++-------------------
 include/linux/serio.h              | 19 ------------
 3 files changed, 28 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 1f5e2ce327d6..1cf32a7814d0 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -225,8 +225,10 @@ struct atkbd {
 
 	struct delayed_work event_work;
 	unsigned long event_jiffies;
-	struct mutex event_mutex;
 	unsigned long event_mask;
+
+	/* Serializes reconnect(), attr->set() and event work */
+	struct mutex mutex;
 };
 
 /*
@@ -577,7 +579,7 @@ static void atkbd_event_work(struct work_struct *work)
 {
 	struct atkbd *atkbd = container_of(work, struct atkbd, event_work.work);
 
-	mutex_lock(&atkbd->event_mutex);
+	mutex_lock(&atkbd->mutex);
 
 	if (!atkbd->enabled) {
 		/*
@@ -596,7 +598,7 @@ static void atkbd_event_work(struct work_struct *work)
 			atkbd_set_repeat_rate(atkbd);
 	}
 
-	mutex_unlock(&atkbd->event_mutex);
+	mutex_unlock(&atkbd->mutex);
 }
 
 /*
@@ -612,7 +614,7 @@ static void atkbd_schedule_event_work(struct atkbd *atkbd, int event_bit)
 
 	atkbd->event_jiffies = jiffies;
 	set_bit(event_bit, &atkbd->event_mask);
-	wmb();
+	mb();
 	schedule_delayed_work(&atkbd->event_work, delay);
 }
 
@@ -849,12 +851,13 @@ static void atkbd_disconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 
+	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
+
 	atkbd_disable(atkbd);
 
 	/* make sure we don't have a command in flight */
 	cancel_delayed_work_sync(&atkbd->event_work);
 
-	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
 	input_unregister_device(atkbd->dev);
 	serio_close(serio);
 	serio_set_drvdata(serio, NULL);
@@ -1087,7 +1090,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 	atkbd->dev = dev;
 	ps2_init(&atkbd->ps2dev, serio);
 	INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work);
-	mutex_init(&atkbd->event_mutex);
+	mutex_init(&atkbd->mutex);
 
 	switch (serio->id.type) {
 
@@ -1160,19 +1163,23 @@ static int atkbd_reconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 	struct serio_driver *drv = serio->drv;
+	int retval = -1;
 
 	if (!atkbd || !drv) {
 		printk(KERN_DEBUG "atkbd: reconnect request, but serio is disconnected, ignoring...\n");
 		return -1;
 	}
 
+	mutex_lock(&atkbd->mutex);
+
 	atkbd_disable(atkbd);
 
 	if (atkbd->write) {
 		if (atkbd_probe(atkbd))
-			return -1;
+			goto out;
+
 		if (atkbd->set != atkbd_select_set(atkbd, atkbd->set, atkbd->extra))
-			return -1;
+			goto out;
 
 		atkbd_activate(atkbd);
 
@@ -1190,8 +1197,11 @@ static int atkbd_reconnect(struct serio *serio)
 	}
 
 	atkbd_enable(atkbd);
+	retval = 0;
 
-	return 0;
+ out:
+	mutex_unlock(&atkbd->mutex);
+	return retval;
 }
 
 static struct serio_device_id atkbd_serio_ids[] = {
@@ -1235,47 +1245,28 @@ static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf,
 				ssize_t (*handler)(struct atkbd *, char *))
 {
 	struct serio *serio = to_serio_port(dev);
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	retval = handler((struct atkbd *)serio_get_drvdata(serio), buf);
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return handler(atkbd, buf);
 }
 
 static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count,
 				ssize_t (*handler)(struct atkbd *, const char *, size_t))
 {
 	struct serio *serio = to_serio_port(dev);
-	struct atkbd *atkbd;
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 	int retval;
 
-	retval = serio_pin_driver(serio);
+	retval = mutex_lock_interruptible(&atkbd->mutex);
 	if (retval)
 		return retval;
 
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	atkbd = serio_get_drvdata(serio);
 	atkbd_disable(atkbd);
 	retval = handler(atkbd, buf, count);
 	atkbd_enable(atkbd);
 
-out:
-	serio_unpin_driver(serio);
+	mutex_unlock(&atkbd->mutex);
+
 	return retval;
 }
 
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 401ac6b6edd4..d59e18b24ede 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1450,24 +1450,10 @@ ssize_t psmouse_attr_show_helper(struct device *dev, struct device_attribute *de
 	struct serio *serio = to_serio_port(dev);
 	struct psmouse_attribute *attr = to_psmouse_attr(devattr);
 	struct psmouse *psmouse;
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
 
 	psmouse = serio_get_drvdata(serio);
 
-	retval = attr->show(psmouse, attr->data, buf);
-
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return attr->show(psmouse, attr->data, buf);
 }
 
 ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *devattr,
@@ -1478,18 +1464,9 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 	struct psmouse *psmouse, *parent = NULL;
 	int retval;
 
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out_unpin;
-	}
-
 	retval = mutex_lock_interruptible(&psmouse_mutex);
 	if (retval)
-		goto out_unpin;
+		goto out;
 
 	psmouse = serio_get_drvdata(serio);
 
@@ -1519,8 +1496,7 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 
  out_unlock:
 	mutex_unlock(&psmouse_mutex);
- out_unpin:
-	serio_unpin_driver(serio);
+ out:
 	return retval;
 }
 
@@ -1582,9 +1558,7 @@ static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, void *data, co
 		}
 
 		mutex_unlock(&psmouse_mutex);
-		serio_unpin_driver(serio);
 		serio_unregister_child_port(serio);
-		serio_pin_driver_uninterruptible(serio);
 		mutex_lock(&psmouse_mutex);
 
 		if (serio->drv != &psmouse_drv) {
diff --git a/include/linux/serio.h b/include/linux/serio.h
index e2f3044d4a4a..813d26c247ec 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -136,25 +136,6 @@ static inline void serio_continue_rx(struct serio *serio)
 	spin_unlock_irq(&serio->lock);
 }
 
-/*
- * Use the following functions to pin serio's driver in process context
- */
-static inline int serio_pin_driver(struct serio *serio)
-{
-	return mutex_lock_interruptible(&serio->drv_mutex);
-}
-
-static inline void serio_pin_driver_uninterruptible(struct serio *serio)
-{
-	mutex_lock(&serio->drv_mutex);
-}
-
-static inline void serio_unpin_driver(struct serio *serio)
-{
-	mutex_unlock(&serio->drv_mutex);
-}
-
-
 #endif
 
 /*
-- 
cgit v1.2.3


From ddf1ffbd40c92ff1e58c45fa96d309788f7beb60 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 5 Jan 2010 17:56:04 -0800
Subject: Input: serio - let device core tell us if device was registered

No need to keep track of it by ourselves.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/serio.c | 8 ++------
 include/linux/serio.h       | 1 -
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 0a278f9f1d3a..d89880450f77 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -577,8 +577,6 @@ static void serio_add_port(struct serio *serio)
 		printk(KERN_ERR
 			"serio: device_add() failed for %s (%s), error: %d\n",
 			serio->phys, serio->name, error);
-	else
-		serio->registered = true;
 }
 
 /*
@@ -605,10 +603,8 @@ static void serio_destroy_port(struct serio *serio)
 		serio->parent = NULL;
 	}
 
-	if (serio->registered) {
+	if (device_is_registered(&serio->dev))
 		device_del(&serio->dev);
-		serio->registered = false;
-	}
 
 	list_del_init(&serio->node);
 	serio_remove_pending_events(serio);
@@ -995,7 +991,7 @@ irqreturn_t serio_interrupt(struct serio *serio,
 
         if (likely(serio->drv)) {
                 ret = serio->drv->interrupt(serio, data, dfl);
-	} else if (!dfl && serio->registered) {
+	} else if (!dfl && device_is_registered(&serio->dev)) {
 		serio_rescan(serio);
 		ret = IRQ_HANDLED;
 	}
diff --git a/include/linux/serio.h b/include/linux/serio.h
index e2f3044d4a4a..d0fb702059cd 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -30,7 +30,6 @@ struct serio {
 	char phys[32];
 
 	bool manual_bind;
-	bool registered;	/* port has been fully registered with driver core */
 
 	struct serio_device_id id;
 
-- 
cgit v1.2.3


From 361b7b5b032338361ea88412f1fc45479fdd5859 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 5 Jan 2010 17:56:03 -0800
Subject: Input: gameport - let device core tell us if device was registered

No need to keep track of it by ourselves.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/gameport/gameport.c | 6 +-----
 include/linux/gameport.h          | 1 -
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index ac11be08585e..f9e5f8e1690b 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -561,8 +561,6 @@ static void gameport_add_port(struct gameport *gameport)
 		printk(KERN_ERR
 			"gameport: device_add() failed for %s (%s), error: %d\n",
 			gameport->phys, gameport->name, error);
-	else
-		gameport->registered = 1;
 }
 
 /*
@@ -584,10 +582,8 @@ static void gameport_destroy_port(struct gameport *gameport)
 		gameport->parent = NULL;
 	}
 
-	if (gameport->registered) {
+	if (device_is_registered(&gameport->dev))
 		device_del(&gameport->dev);
-		gameport->registered = 0;
-	}
 
 	list_del_init(&gameport->node);
 
diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 1bc08541c2b9..48e68da097f6 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -46,7 +46,6 @@ struct gameport {
 	struct mutex drv_mutex;		/* protects serio->drv so attributes can pin driver */
 
 	struct device dev;
-	unsigned int registered;	/* port has been fully registered with driver core */
 
 	struct list_head node;
 };
-- 
cgit v1.2.3


From 16295bec6398a3eedc9377e1af6ff4c71b98c300 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 6 Jan 2010 19:47:10 +1100
Subject: padata: Generic parallelization/serialization interface

This patch introduces an interface to process data objects
in parallel. The parallelized objects return after serialization
in the same order as they were before the parallelization.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h |  88 +++++++
 init/Kconfig           |   4 +
 kernel/Makefile        |   1 +
 kernel/padata.c        | 690 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 783 insertions(+)
 create mode 100644 include/linux/padata.h
 create mode 100644 kernel/padata.c

(limited to 'include/linux')

diff --git a/include/linux/padata.h b/include/linux/padata.h
new file mode 100644
index 000000000000..51611da9c498
--- /dev/null
+++ b/include/linux/padata.h
@@ -0,0 +1,88 @@
+/*
+ * padata.h - header for the padata parallelization interface
+ *
+ * Copyright (C) 2008, 2009 secunet Security Networks AG
+ * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef PADATA_H
+#define PADATA_H
+
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+struct padata_priv {
+	struct list_head	list;
+	struct parallel_data	*pd;
+	int			cb_cpu;
+	int			seq_nr;
+	int			info;
+	void                    (*parallel)(struct padata_priv *padata);
+	void                    (*serial)(struct padata_priv *padata);
+};
+
+struct padata_list {
+	struct list_head        list;
+	spinlock_t              lock;
+};
+
+struct padata_queue {
+	struct padata_list	parallel;
+	struct padata_list	reorder;
+	struct padata_list	serial;
+	struct work_struct	pwork;
+	struct work_struct	swork;
+	struct parallel_data    *pd;
+	atomic_t		num_obj;
+	int			cpu_index;
+};
+
+struct parallel_data {
+	struct padata_instance	*pinst;
+	struct padata_queue	*queue;
+	atomic_t		seq_nr;
+	atomic_t		reorder_objects;
+	atomic_t                refcnt;
+	unsigned int		max_seq_nr;
+	cpumask_var_t		cpumask;
+	spinlock_t              lock;
+};
+
+struct padata_instance {
+	struct notifier_block   cpu_notifier;
+	struct workqueue_struct *wq;
+	struct parallel_data	*pd;
+	cpumask_var_t           cpumask;
+	struct mutex		lock;
+	u8			flags;
+#define	PADATA_INIT		1
+#define	PADATA_RESET		2
+};
+
+extern struct padata_instance *padata_alloc(const struct cpumask *cpumask,
+					    struct workqueue_struct *wq);
+extern void padata_free(struct padata_instance *pinst);
+extern int padata_do_parallel(struct padata_instance *pinst,
+			      struct padata_priv *padata, int cb_cpu);
+extern void padata_do_serial(struct padata_priv *padata);
+extern int padata_set_cpumask(struct padata_instance *pinst,
+			      cpumask_var_t cpumask);
+extern int padata_add_cpu(struct padata_instance *pinst, int cpu);
+extern int padata_remove_cpu(struct padata_instance *pinst, int cpu);
+extern void padata_start(struct padata_instance *pinst);
+extern void padata_stop(struct padata_instance *pinst);
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..9fd23bcc1709 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1252,4 +1252,8 @@ source "block/Kconfig"
 config PREEMPT_NOTIFIERS
 	bool
 
+config PADATA
+	depends on SMP
+	bool
+
 source "kernel/Kconfig.locks"
diff --git a/kernel/Makefile b/kernel/Makefile
index 864ff75d65f2..6aebdeb2aa34 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
+obj-$(CONFIG_PADATA) += padata.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/padata.c b/kernel/padata.c
new file mode 100644
index 000000000000..6f9bcb8313d6
--- /dev/null
+++ b/kernel/padata.c
@@ -0,0 +1,690 @@
+/*
+ * padata.c - generic interface to process data streams in parallel
+ *
+ * Copyright (C) 2008, 2009 secunet Security Networks AG
+ * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/cpu.h>
+#include <linux/padata.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/rcupdate.h>
+
+#define MAX_SEQ_NR INT_MAX - NR_CPUS
+#define MAX_OBJ_NUM 10000 * NR_CPUS
+
+static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
+{
+	int cpu, target_cpu;
+
+	target_cpu = cpumask_first(pd->cpumask);
+	for (cpu = 0; cpu < cpu_index; cpu++)
+		target_cpu = cpumask_next(target_cpu, pd->cpumask);
+
+	return target_cpu;
+}
+
+static int padata_cpu_hash(struct padata_priv *padata)
+{
+	int cpu_index;
+	struct parallel_data *pd;
+
+	pd =  padata->pd;
+
+	/*
+	 * Hash the sequence numbers to the cpus by taking
+	 * seq_nr mod. number of cpus in use.
+	 */
+	cpu_index =  padata->seq_nr % cpumask_weight(pd->cpumask);
+
+	return padata_index_to_cpu(pd, cpu_index);
+}
+
+static void padata_parallel_worker(struct work_struct *work)
+{
+	struct padata_queue *queue;
+	struct parallel_data *pd;
+	struct padata_instance *pinst;
+	LIST_HEAD(local_list);
+
+	local_bh_disable();
+	queue = container_of(work, struct padata_queue, pwork);
+	pd = queue->pd;
+	pinst = pd->pinst;
+
+	spin_lock(&queue->parallel.lock);
+	list_replace_init(&queue->parallel.list, &local_list);
+	spin_unlock(&queue->parallel.lock);
+
+	while (!list_empty(&local_list)) {
+		struct padata_priv *padata;
+
+		padata = list_entry(local_list.next,
+				    struct padata_priv, list);
+
+		list_del_init(&padata->list);
+
+		padata->parallel(padata);
+	}
+
+	local_bh_enable();
+}
+
+/*
+ * padata_do_parallel - padata parallelization function
+ *
+ * @pinst: padata instance
+ * @padata: object to be parallelized
+ * @cb_cpu: cpu the serialization callback function will run on,
+ *          must be in the cpumask of padata.
+ *
+ * The parallelization callback function will run with BHs off.
+ * Note: Every object which is parallelized by padata_do_parallel
+ * must be seen by padata_do_serial.
+ */
+int padata_do_parallel(struct padata_instance *pinst,
+		       struct padata_priv *padata, int cb_cpu)
+{
+	int target_cpu, err;
+	struct padata_queue *queue;
+	struct parallel_data *pd;
+
+	rcu_read_lock_bh();
+
+	pd = rcu_dereference(pinst->pd);
+
+	err = 0;
+	if (!(pinst->flags & PADATA_INIT))
+		goto out;
+
+	err =  -EBUSY;
+	if ((pinst->flags & PADATA_RESET))
+		goto out;
+
+	if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
+		goto out;
+
+	err = -EINVAL;
+	if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
+		goto out;
+
+	err = -EINPROGRESS;
+	atomic_inc(&pd->refcnt);
+	padata->pd = pd;
+	padata->cb_cpu = cb_cpu;
+
+	if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr))
+		atomic_set(&pd->seq_nr, -1);
+
+	padata->seq_nr = atomic_inc_return(&pd->seq_nr);
+
+	target_cpu = padata_cpu_hash(padata);
+	queue = per_cpu_ptr(pd->queue, target_cpu);
+
+	spin_lock(&queue->parallel.lock);
+	list_add_tail(&padata->list, &queue->parallel.list);
+	spin_unlock(&queue->parallel.lock);
+
+	queue_work_on(target_cpu, pinst->wq, &queue->pwork);
+
+out:
+	rcu_read_unlock_bh();
+
+	return err;
+}
+EXPORT_SYMBOL(padata_do_parallel);
+
+static struct padata_priv *padata_get_next(struct parallel_data *pd)
+{
+	int cpu, num_cpus, empty, calc_seq_nr;
+	int seq_nr, next_nr, overrun, next_overrun;
+	struct padata_queue *queue, *next_queue;
+	struct padata_priv *padata;
+	struct padata_list *reorder;
+
+	empty = 0;
+	next_nr = -1;
+	next_overrun = 0;
+	next_queue = NULL;
+
+	num_cpus = cpumask_weight(pd->cpumask);
+
+	for_each_cpu(cpu, pd->cpumask) {
+		queue = per_cpu_ptr(pd->queue, cpu);
+		reorder = &queue->reorder;
+
+		/*
+		 * Calculate the seq_nr of the object that should be
+		 * next in this queue.
+		 */
+		overrun = 0;
+		calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
+			       + queue->cpu_index;
+
+		if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
+			calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
+			overrun = 1;
+		}
+
+		if (!list_empty(&reorder->list)) {
+			padata = list_entry(reorder->list.next,
+					    struct padata_priv, list);
+
+			seq_nr  = padata->seq_nr;
+			BUG_ON(calc_seq_nr != seq_nr);
+		} else {
+			seq_nr = calc_seq_nr;
+			empty++;
+		}
+
+		if (next_nr < 0 || seq_nr < next_nr
+		    || (next_overrun && !overrun)) {
+			next_nr = seq_nr;
+			next_overrun = overrun;
+			next_queue = queue;
+		}
+	}
+
+	padata = NULL;
+
+	if (empty == num_cpus)
+		goto out;
+
+	reorder = &next_queue->reorder;
+
+	if (!list_empty(&reorder->list)) {
+		padata = list_entry(reorder->list.next,
+				    struct padata_priv, list);
+
+		if (unlikely(next_overrun)) {
+			for_each_cpu(cpu, pd->cpumask) {
+				queue = per_cpu_ptr(pd->queue, cpu);
+				atomic_set(&queue->num_obj, 0);
+			}
+		}
+
+		spin_lock(&reorder->lock);
+		list_del_init(&padata->list);
+		atomic_dec(&pd->reorder_objects);
+		spin_unlock(&reorder->lock);
+
+		atomic_inc(&next_queue->num_obj);
+
+		goto out;
+	}
+
+	if (next_nr % num_cpus == next_queue->cpu_index) {
+		padata = ERR_PTR(-ENODATA);
+		goto out;
+	}
+
+	padata = ERR_PTR(-EINPROGRESS);
+out:
+	return padata;
+}
+
+static void padata_reorder(struct parallel_data *pd)
+{
+	struct padata_priv *padata;
+	struct padata_queue *queue;
+	struct padata_instance *pinst = pd->pinst;
+
+try_again:
+	if (!spin_trylock_bh(&pd->lock))
+		goto out;
+
+	while (1) {
+		padata = padata_get_next(pd);
+
+		if (!padata || PTR_ERR(padata) == -EINPROGRESS)
+			break;
+
+		if (PTR_ERR(padata) == -ENODATA) {
+			spin_unlock_bh(&pd->lock);
+			goto out;
+		}
+
+		queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
+
+		spin_lock(&queue->serial.lock);
+		list_add_tail(&padata->list, &queue->serial.list);
+		spin_unlock(&queue->serial.lock);
+
+		queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork);
+	}
+
+	spin_unlock_bh(&pd->lock);
+
+	if (atomic_read(&pd->reorder_objects))
+		goto try_again;
+
+out:
+	return;
+}
+
+static void padata_serial_worker(struct work_struct *work)
+{
+	struct padata_queue *queue;
+	struct parallel_data *pd;
+	LIST_HEAD(local_list);
+
+	local_bh_disable();
+	queue = container_of(work, struct padata_queue, swork);
+	pd = queue->pd;
+
+	spin_lock(&queue->serial.lock);
+	list_replace_init(&queue->serial.list, &local_list);
+	spin_unlock(&queue->serial.lock);
+
+	while (!list_empty(&local_list)) {
+		struct padata_priv *padata;
+
+		padata = list_entry(local_list.next,
+				    struct padata_priv, list);
+
+		list_del_init(&padata->list);
+
+		padata->serial(padata);
+		atomic_dec(&pd->refcnt);
+	}
+	local_bh_enable();
+}
+
+/*
+ * padata_do_serial - padata serialization function
+ *
+ * @padata: object to be serialized.
+ *
+ * padata_do_serial must be called for every parallelized object.
+ * The serialization callback function will run with BHs off.
+ */
+void padata_do_serial(struct padata_priv *padata)
+{
+	int cpu;
+	struct padata_queue *queue;
+	struct parallel_data *pd;
+
+	pd = padata->pd;
+
+	cpu = get_cpu();
+	queue = per_cpu_ptr(pd->queue, cpu);
+
+	spin_lock(&queue->reorder.lock);
+	atomic_inc(&pd->reorder_objects);
+	list_add_tail(&padata->list, &queue->reorder.list);
+	spin_unlock(&queue->reorder.lock);
+
+	put_cpu();
+
+	padata_reorder(pd);
+}
+EXPORT_SYMBOL(padata_do_serial);
+
+static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
+					     const struct cpumask *cpumask)
+{
+	int cpu, cpu_index, num_cpus;
+	struct padata_queue *queue;
+	struct parallel_data *pd;
+
+	cpu_index = 0;
+
+	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
+	if (!pd)
+		goto err;
+
+	pd->queue = alloc_percpu(struct padata_queue);
+	if (!pd->queue)
+		goto err_free_pd;
+
+	if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
+		goto err_free_queue;
+
+	for_each_possible_cpu(cpu) {
+		queue = per_cpu_ptr(pd->queue, cpu);
+
+		queue->pd = pd;
+
+		if (cpumask_test_cpu(cpu, cpumask)
+		    && cpumask_test_cpu(cpu, cpu_active_mask)) {
+			queue->cpu_index = cpu_index;
+			cpu_index++;
+		} else
+			queue->cpu_index = -1;
+
+		INIT_LIST_HEAD(&queue->reorder.list);
+		INIT_LIST_HEAD(&queue->parallel.list);
+		INIT_LIST_HEAD(&queue->serial.list);
+		spin_lock_init(&queue->reorder.lock);
+		spin_lock_init(&queue->parallel.lock);
+		spin_lock_init(&queue->serial.lock);
+
+		INIT_WORK(&queue->pwork, padata_parallel_worker);
+		INIT_WORK(&queue->swork, padata_serial_worker);
+		atomic_set(&queue->num_obj, 0);
+	}
+
+	cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+
+	num_cpus = cpumask_weight(pd->cpumask);
+	pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+
+	atomic_set(&pd->seq_nr, -1);
+	atomic_set(&pd->reorder_objects, 0);
+	atomic_set(&pd->refcnt, 0);
+	pd->pinst = pinst;
+	spin_lock_init(&pd->lock);
+
+	return pd;
+
+err_free_queue:
+	free_percpu(pd->queue);
+err_free_pd:
+	kfree(pd);
+err:
+	return NULL;
+}
+
+static void padata_free_pd(struct parallel_data *pd)
+{
+	free_cpumask_var(pd->cpumask);
+	free_percpu(pd->queue);
+	kfree(pd);
+}
+
+static void padata_replace(struct padata_instance *pinst,
+			   struct parallel_data *pd_new)
+{
+	struct parallel_data *pd_old = pinst->pd;
+
+	pinst->flags |= PADATA_RESET;
+
+	rcu_assign_pointer(pinst->pd, pd_new);
+
+	synchronize_rcu();
+
+	while (atomic_read(&pd_old->refcnt) != 0)
+		yield();
+
+	flush_workqueue(pinst->wq);
+
+	padata_free_pd(pd_old);
+
+	pinst->flags &= ~PADATA_RESET;
+}
+
+/*
+ * padata_set_cpumask - set the cpumask that padata should use
+ *
+ * @pinst: padata instance
+ * @cpumask: the cpumask to use
+ */
+int padata_set_cpumask(struct padata_instance *pinst,
+			cpumask_var_t cpumask)
+{
+	struct parallel_data *pd;
+	int err = 0;
+
+	might_sleep();
+
+	mutex_lock(&pinst->lock);
+
+	pd = padata_alloc_pd(pinst, cpumask);
+	if (!pd) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	cpumask_copy(pinst->cpumask, cpumask);
+
+	padata_replace(pinst, pd);
+
+out:
+	mutex_unlock(&pinst->lock);
+
+	return err;
+}
+EXPORT_SYMBOL(padata_set_cpumask);
+
+static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
+{
+	struct parallel_data *pd;
+
+	if (cpumask_test_cpu(cpu, cpu_active_mask)) {
+		pd = padata_alloc_pd(pinst, pinst->cpumask);
+		if (!pd)
+			return -ENOMEM;
+
+		padata_replace(pinst, pd);
+	}
+
+	return 0;
+}
+
+/*
+ * padata_add_cpu - add a cpu to the padata cpumask
+ *
+ * @pinst: padata instance
+ * @cpu: cpu to add
+ */
+int padata_add_cpu(struct padata_instance *pinst, int cpu)
+{
+	int err;
+
+	might_sleep();
+
+	mutex_lock(&pinst->lock);
+
+	cpumask_set_cpu(cpu, pinst->cpumask);
+	err = __padata_add_cpu(pinst, cpu);
+
+	mutex_unlock(&pinst->lock);
+
+	return err;
+}
+EXPORT_SYMBOL(padata_add_cpu);
+
+static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
+{
+	struct parallel_data *pd;
+
+	if (cpumask_test_cpu(cpu, cpu_online_mask)) {
+		pd = padata_alloc_pd(pinst, pinst->cpumask);
+		if (!pd)
+			return -ENOMEM;
+
+		padata_replace(pinst, pd);
+	}
+
+	return 0;
+}
+
+/*
+ * padata_remove_cpu - remove a cpu from the padata cpumask
+ *
+ * @pinst: padata instance
+ * @cpu: cpu to remove
+ */
+int padata_remove_cpu(struct padata_instance *pinst, int cpu)
+{
+	int err;
+
+	might_sleep();
+
+	mutex_lock(&pinst->lock);
+
+	cpumask_clear_cpu(cpu, pinst->cpumask);
+	err = __padata_remove_cpu(pinst, cpu);
+
+	mutex_unlock(&pinst->lock);
+
+	return err;
+}
+EXPORT_SYMBOL(padata_remove_cpu);
+
+/*
+ * padata_start - start the parallel processing
+ *
+ * @pinst: padata instance to start
+ */
+void padata_start(struct padata_instance *pinst)
+{
+	might_sleep();
+
+	mutex_lock(&pinst->lock);
+	pinst->flags |= PADATA_INIT;
+	mutex_unlock(&pinst->lock);
+}
+EXPORT_SYMBOL(padata_start);
+
+/*
+ * padata_stop - stop the parallel processing
+ *
+ * @pinst: padata instance to stop
+ */
+void padata_stop(struct padata_instance *pinst)
+{
+	might_sleep();
+
+	mutex_lock(&pinst->lock);
+	pinst->flags &= ~PADATA_INIT;
+	mutex_unlock(&pinst->lock);
+}
+EXPORT_SYMBOL(padata_stop);
+
+static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
+					 unsigned long action, void *hcpu)
+{
+	int err;
+	struct padata_instance *pinst;
+	int cpu = (unsigned long)hcpu;
+
+	pinst = container_of(nfb, struct padata_instance, cpu_notifier);
+
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+			break;
+		mutex_lock(&pinst->lock);
+		err = __padata_add_cpu(pinst, cpu);
+		mutex_unlock(&pinst->lock);
+		if (err)
+			return NOTIFY_BAD;
+		break;
+
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+			break;
+		mutex_lock(&pinst->lock);
+		err = __padata_remove_cpu(pinst, cpu);
+		mutex_unlock(&pinst->lock);
+		if (err)
+			return NOTIFY_BAD;
+		break;
+
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+			break;
+		mutex_lock(&pinst->lock);
+		__padata_remove_cpu(pinst, cpu);
+		mutex_unlock(&pinst->lock);
+
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+		if (!cpumask_test_cpu(cpu, pinst->cpumask))
+			break;
+		mutex_lock(&pinst->lock);
+		__padata_add_cpu(pinst, cpu);
+		mutex_unlock(&pinst->lock);
+	}
+
+	return NOTIFY_OK;
+}
+
+/*
+ * padata_alloc - allocate and initialize a padata instance
+ *
+ * @cpumask: cpumask that padata uses for parallelization
+ * @wq: workqueue to use for the allocated padata instance
+ */
+struct padata_instance *padata_alloc(const struct cpumask *cpumask,
+				     struct workqueue_struct *wq)
+{
+	int err;
+	struct padata_instance *pinst;
+	struct parallel_data *pd;
+
+	pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
+	if (!pinst)
+		goto err;
+
+	pd = padata_alloc_pd(pinst, cpumask);
+	if (!pd)
+		goto err_free_inst;
+
+	rcu_assign_pointer(pinst->pd, pd);
+
+	pinst->wq = wq;
+
+	cpumask_copy(pinst->cpumask, cpumask);
+
+	pinst->flags = 0;
+
+	pinst->cpu_notifier.notifier_call = padata_cpu_callback;
+	pinst->cpu_notifier.priority = 0;
+	err = register_hotcpu_notifier(&pinst->cpu_notifier);
+	if (err)
+		goto err_free_pd;
+
+	mutex_init(&pinst->lock);
+
+	return pinst;
+
+err_free_pd:
+	padata_free_pd(pd);
+err_free_inst:
+	kfree(pinst);
+err:
+	return NULL;
+}
+EXPORT_SYMBOL(padata_alloc);
+
+/*
+ * padata_free - free a padata instance
+ *
+ * @ padata_inst: padata instance to free
+ */
+void padata_free(struct padata_instance *pinst)
+{
+	padata_stop(pinst);
+
+	synchronize_rcu();
+
+	while (atomic_read(&pinst->pd->refcnt) != 0)
+		yield();
+
+	unregister_hotcpu_notifier(&pinst->cpu_notifier);
+	padata_free_pd(pinst->pd);
+	kfree(pinst);
+}
+EXPORT_SYMBOL(padata_free);
-- 
cgit v1.2.3


From 509e760cd91c831983097ae174cb6c0b8c6c8e6b Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:42 +0800
Subject: tracing: Add print_fmt field

This is part of a patch set that removes the show_format method
in the ftrace event macros.

The print_fmt field is added to hold the string that shows
the print_fmt in the event format files. This patch only adds
the field but it is currently not used. Later patches will use
this field to enable us to remove the show_format field
and function.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D3E.2000704@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  1 +
 include/trace/ftrace.h       | 28 +++++++++++++++++++++++++++-
 kernel/trace/trace_export.c  |  7 +++++++
 3 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2233c98d80df..bd23d8e52f02 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -121,6 +121,7 @@ struct ftrace_event_call {
 	int			(*regfunc)(struct ftrace_event_call *);
 	void			(*unregfunc)(struct ftrace_event_call *);
 	int			id;
+	const char		*print_fmt;
 	int			(*raw_init)(struct ftrace_event_call *);
 	int			(*show_format)(struct ftrace_event_call *,
 					       struct trace_seq *);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c6fe03e902ca..3351d85c83a3 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -722,8 +722,20 @@ static struct trace_event ftrace_event_type_##call = {			\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+#undef __entry
+#define __entry REC
+
+#undef __print_flags
+#undef __print_symbolic
+#undef __get_dynamic_array
+#undef __get_str
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "\"" fmt "\", "  __stringify(args)
+
 #undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
+static const char print_fmt_##call[] = print;
 
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)			\
@@ -737,6 +749,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.print_fmt		= print_fmt_##template,			\
 	.show_format		= ftrace_format_##template,		\
 	.define_fields		= ftrace_define_fields_##template,	\
 	_TRACE_PROFILE_INIT(call)					\
@@ -745,6 +758,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
 									\
+static const char print_fmt_##call[] = print;				\
+									\
 static struct ftrace_event_call __used					\
 __attribute__((__aligned__(4)))						\
 __attribute__((section("_ftrace_events"))) event_##call = {		\
@@ -754,6 +769,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.raw_init		= trace_event_raw_init,			\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
+	.print_fmt		= print_fmt_##call,			\
 	.show_format		= ftrace_format_##call,			\
 	.define_fields		= ftrace_define_fields_##template,	\
 	_TRACE_PROFILE_INIT(call)					\
@@ -837,6 +853,16 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 
 #ifdef CONFIG_EVENT_PROFILE
 
+#undef __entry
+#define __entry entry
+
+#undef __get_dynamic_array
+#define __get_dynamic_array(field)	\
+		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
+
+#undef __get_str
+#define __get_str(field) (char *)__get_dynamic_array(field)
+
 #undef __perf_addr
 #define __perf_addr(a) __addr = (a)
 
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 9978a4f40090..95d14b640a66 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -203,6 +203,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
 	return 0;
 }
 
+#undef __entry
+#define __entry REC
+
 #undef __field
 #define __field(type, item)
 
@@ -218,6 +221,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
 #undef __dynamic_array
 #define __dynamic_array(type, item)
 
+#undef F_printk
+#define F_printk(fmt, args...) #fmt ", "  __stringify(args)
+
 #undef FTRACE_ENTRY
 #define FTRACE_ENTRY(call, struct_name, type, tstruct, print)		\
 									\
@@ -228,6 +234,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.id			= type,					\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.raw_init		= ftrace_raw_init_event,		\
+	.print_fmt		= print,				\
 	.show_format		= ftrace_format_##call,			\
 	.define_fields		= ftrace_define_fields_##call,		\
 };									\
-- 
cgit v1.2.3


From c7ef3a9004201bca90626db246a19dadd2c29c9b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 28 Dec 2009 21:13:59 -0500
Subject: tracing: Have syscall tracing call its own init function

In the clean up of having all events call one specific function,
the syscall event init was changed to call this helper function.

With the new print_fmt updates, the syscalls need to do special
initializations. This patch converts the syscall events to call
its own init function again.

Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h      |  4 ++--
 kernel/trace/trace_syscalls.c | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 207466a49f3d..ed353d274a77 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -143,7 +143,7 @@ struct perf_event_attr;
 		.name                   = "sys_enter"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &enter_syscall_print_##sname,	\
-		.raw_init		= trace_event_raw_init,		\
+		.raw_init		= init_syscall_trace,		\
 		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
@@ -165,7 +165,7 @@ struct perf_event_attr;
 		.name                   = "sys_exit"#sname,		\
 		.system                 = "syscalls",			\
 		.event                  = &exit_syscall_print_##sname,	\
-		.raw_init		= trace_event_raw_init,		\
+		.raw_init		= init_syscall_trace,		\
 		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 1352b0a36fac..a78e86349ecb 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -450,14 +450,14 @@ int init_syscall_trace(struct ftrace_event_call *call)
 	if (set_syscall_print_fmt(call) < 0)
 		return -ENOMEM;
 
-	id = register_ftrace_event(call->event);
-	if (!id) {
+	id = trace_event_raw_init(call);
+
+	if (id < 0) {
 		free_syscall_print_fmt(call);
-		return -ENODEV;
+		return id;
 	}
-	call->id = id;
-	INIT_LIST_HEAD(&call->fields);
-	return 0;
+
+	return id;
 }
 
 int __init init_ftrace_syscalls(void)
-- 
cgit v1.2.3


From 0fa0edaf32b9a78b9854f1da98d4511a501089b0 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:57 +0800
Subject: tracing: Remove show_format and related macros from TRACE_EVENT

The previous patches added the use of print_fmt string and changes
the trace_define_field() function to also create the fields and
format output for the event format files.

   text	   data	    bss	    dec	    hex	filename
5857201	1355780	9336808	16549789	 fc879d	vmlinux
5884589	1351684	9337896	16574169	 fce6d9	vmlinux-orig

The above shows the size of the vmlinux after this patch set
compared to the vmlinux-orig which is before the patch set.

This saves us 27k on text, 1k on bss and adds just 4k of data.

The total savings of 24k in size.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D4D.40604@cn.fujitsu.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h  |   2 -
 include/linux/syscalls.h      |   2 -
 include/trace/ftrace.h        | 133 ++----------------------------------------
 include/trace/syscall.h       |   4 --
 kernel/trace/trace_events.c   |  12 ----
 kernel/trace/trace_export.c   |  73 -----------------------
 kernel/trace/trace_kprobe.c   |  78 -------------------------
 kernel/trace/trace_syscalls.c |  66 ---------------------
 8 files changed, 6 insertions(+), 364 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bd23d8e52f02..84a5629adfd8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -123,8 +123,6 @@ struct ftrace_event_call {
 	int			id;
 	const char		*print_fmt;
 	int			(*raw_init)(struct ftrace_event_call *);
-	int			(*show_format)(struct ftrace_event_call *,
-					       struct trace_seq *);
 	int			(*define_fields)(struct ftrace_event_call *);
 	struct list_head	fields;
 	int			filter_active;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index ed353d274a77..7b219696ad24 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -144,7 +144,6 @@ struct perf_event_attr;
 		.system                 = "syscalls",			\
 		.event                  = &enter_syscall_print_##sname,	\
 		.raw_init		= init_syscall_trace,		\
-		.show_format		= syscall_enter_format,		\
 		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
@@ -166,7 +165,6 @@ struct perf_event_attr;
 		.system                 = "syscalls",			\
 		.event                  = &exit_syscall_print_##sname,	\
 		.raw_init		= init_syscall_trace,		\
-		.show_format		= syscall_exit_format,		\
 		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 3351d85c83a3..df65b99880b1 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -130,130 +130,6 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- *	struct ftrace_raw_##call field;
- *	int ret;
- *
- *	ret = trace_seq_printf(s, #type " " #item ";"
- *			       " offset:%u; size:%u;\n",
- *			       offsetof(struct ftrace_raw_##call, item),
- *			       sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item),	\
-			       (unsigned int)is_signed_type(type));	\
-	if (!ret)							\
-		return 0;
-
-#undef __field_ext
-#define __field_ext(type, item, filter_type)	__field(type, item)
-
-#undef __array
-#define __array(type, item, len)						\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
-			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	\
-			       (unsigned int)offsetof(typeof(field), item), \
-			       (unsigned int)sizeof(field.item),	\
-			       (unsigned int)is_signed_type(type));	\
-	if (!ret)							\
-		return 0;
-
-#undef __dynamic_array
-#define __dynamic_array(type, item, len)				       \
-	ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\
-			       "offset:%u;\tsize:%u;\tsigned:%u;\n",	       \
-			       (unsigned int)offsetof(typeof(field),	       \
-					__data_loc_##item),		       \
-			       (unsigned int)sizeof(field.__data_loc_##item), \
-			       (unsigned int)is_signed_type(type));	\
-	if (!ret)							       \
-		return 0;
-
-#undef __string
-#define __string(item, src) __dynamic_array(char, item, -1)
-
-#undef __entry
-#define __entry REC
-
-#undef __print_symbolic
-#undef __get_dynamic_array
-#undef __get_str
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TP_perf_assign
-#define TP_perf_assign(args...)
-
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
-static int								\
-ftrace_format_setup_##call(struct ftrace_event_call *unused,		\
-			   struct trace_seq *s)				\
-{									\
-	struct ftrace_raw_##call field __attribute__((unused));		\
-	int ret = 0;							\
-									\
-	tstruct;							\
-									\
-	return ret;							\
-}									\
-									\
-static int								\
-ftrace_format_##call(struct ftrace_event_call *unused,			\
-		     struct trace_seq *s)				\
-{									\
-	int ret = 0;							\
-									\
-	ret = ftrace_format_setup_##call(unused, s);			\
-	if (!ret)							\
-		return ret;						\
-									\
-	ret = trace_seq_printf(s, "\nprint fmt: " print);		\
-									\
-	return ret;							\
-}
-
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, name, proto, args)
-
-#undef DEFINE_EVENT_PRINT
-#define DEFINE_EVENT_PRINT(template, name, proto, args, print)		\
-static int								\
-ftrace_format_##name(struct ftrace_event_call *unused,			\
-		      struct trace_seq *s)				\
-{									\
-	int ret = 0;							\
-									\
-	ret = ftrace_format_setup_##template(unused, s);		\
-	if (!ret)							\
-		return ret;						\
-									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
-									\
-	return ret;							\
-}
-
-#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-
 /*
  * Stage 3 of the trace events.
  *
@@ -622,7 +498,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
  *	.raw_init		= trace_event_raw_init,
  *	.regfunc		= ftrace_reg_event_<call>,
  *	.unregfunc		= ftrace_unreg_event_<call>,
- *	.show_format		= ftrace_format_<call>,
  * }
  *
  */
@@ -657,6 +532,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), src);
 
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TP_perf_assign
+#define TP_perf_assign(args...)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 									\
@@ -750,7 +631,6 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.print_fmt		= print_fmt_##template,			\
-	.show_format		= ftrace_format_##template,		\
 	.define_fields		= ftrace_define_fields_##template,	\
 	_TRACE_PROFILE_INIT(call)					\
 }
@@ -770,7 +650,6 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.regfunc		= ftrace_raw_reg_event_##call,		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.print_fmt		= print_fmt_##call,			\
-	.show_format		= ftrace_format_##call,			\
 	.define_fields		= ftrace_define_fields_##template,	\
 	_TRACE_PROFILE_INIT(call)					\
 }
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 961fda3556bb..8cd410254456 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -34,10 +34,6 @@ struct syscall_metadata {
 extern unsigned long arch_syscall_addr(int nr);
 extern int init_syscall_trace(struct ftrace_event_call *call);
 
-extern int syscall_enter_format(struct ftrace_event_call *call,
-				struct trace_seq *s);
-extern int syscall_exit_format(struct ftrace_event_call *call,
-				struct trace_seq *s);
 extern int syscall_enter_define_fields(struct ftrace_event_call *call);
 extern int syscall_exit_define_fields(struct ftrace_event_call *call);
 extern int reg_event_syscall_enter(struct ftrace_event_call *call);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 250ec865d5f5..c2a3077b7353 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -520,14 +520,6 @@ out:
 	return ret;
 }
 
-extern char *__bad_type_size(void);
-
-#undef FIELD
-#define FIELD(type, name)						\
-	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
-	#type, "common_" #name, offsetof(typeof(field), name),		\
-		sizeof(field.name), is_signed_type(type)
-
 static ssize_t
 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
 		  loff_t *ppos)
@@ -965,10 +957,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 				  filter);
 	}
 
-	/* A trace may not want to export its format */
-	if (!call->show_format)
-		return 0;
-
 	trace_create_file("format", 0444, call->dir, call,
 			  format);
 
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 95d14b640a66..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void)	\
 
 #include "trace_entries.h"
 
-
-#undef __field
-#define __field(type, item)						\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
-			       offsetof(typeof(field), item),		\
-			       sizeof(field.item), is_signed_type(type)); \
-	if (!ret)							\
-		return 0;
-
-#undef __field_desc
-#define __field_desc(type, container, item)				\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
-			       offsetof(typeof(field), container.item),	\
-			       sizeof(field.container.item),		\
-			       is_signed_type(type));			\
-	if (!ret)							\
-		return 0;
-
-#undef __array
-#define __array(type, item, len)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
-			       offsetof(typeof(field), item),		\
-			       sizeof(field.item), is_signed_type(type)); \
-	if (!ret)							\
-		return 0;
-
-#undef __array_desc
-#define __array_desc(type, container, item, len)			\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-			       "offset:%zu;\tsize:%zu;\tsigned:%u;\n",	\
-			       offsetof(typeof(field), container.item),	\
-			       sizeof(field.container.item),		\
-			       is_signed_type(type));			\
-	if (!ret)							\
-		return 0;
-
-#undef __dynamic_array
-#define __dynamic_array(type, item)					\
-	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
-			       "offset:%zu;\tsize:0;\tsigned:%u;\n",	\
-			       offsetof(typeof(field), item),		\
-			       is_signed_type(type));			\
-	if (!ret)							\
-		return 0;
-
-#undef F_printk
-#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef __entry
-#define __entry REC
-
-#undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)		\
-static int								\
-ftrace_format_##name(struct ftrace_event_call *unused,			\
-		     struct trace_seq *s)				\
-{									\
-	struct struct_name field __attribute__((unused));		\
-	int ret = 0;							\
-									\
-	tstruct;							\
-									\
-	trace_seq_printf(s, "\nprint fmt: " print);			\
-									\
-	return ret;							\
-}
-
-#include "trace_entries.h"
-
 #undef __field
 #define __field(type, item)						\
 	ret = trace_define_field(event_call, #type, #item,		\
@@ -235,7 +163,6 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.raw_init		= ftrace_raw_init_event,		\
 	.print_fmt		= print,				\
-	.show_format		= ftrace_format_##call,			\
 	.define_fields		= ftrace_define_fields_##call,		\
 };									\
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 147491dccead..c99029916c76 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1174,82 +1174,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
 	return 0;
 }
 
-static int __probe_event_show_format(struct trace_seq *s,
-				     struct trace_probe *tp, const char *fmt,
-				     const char *arg)
-{
-	int i;
-
-	/* Show format */
-	if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
-		return 0;
-
-	for (i = 0; i < tp->nr_args; i++)
-		if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
-			return 0;
-
-	if (!trace_seq_printf(s, "\", %s", arg))
-		return 0;
-
-	for (i = 0; i < tp->nr_args; i++)
-		if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
-			return 0;
-
-	return trace_seq_puts(s, "\n");
-}
-
-#undef SHOW_FIELD
-#define SHOW_FIELD(type, item, name)					\
-	do {								\
-		ret = trace_seq_printf(s, "\tfield:" #type " %s;\t"	\
-				"offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
-				(unsigned int)offsetof(typeof(field), item),\
-				(unsigned int)sizeof(type),		\
-				is_signed_type(type));			\
-		if (!ret)						\
-			return 0;					\
-	} while (0)
-
-static int kprobe_event_show_format(struct ftrace_event_call *call,
-				    struct trace_seq *s)
-{
-	struct kprobe_trace_entry field __attribute__((unused));
-	int ret, i;
-	struct trace_probe *tp = (struct trace_probe *)call->data;
-
-	SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
-	SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
-
-	/* Show fields */
-	for (i = 0; i < tp->nr_args; i++)
-		SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
-	trace_seq_puts(s, "\n");
-
-	return __probe_event_show_format(s, tp, "(%lx)",
-					 "REC->" FIELD_STRING_IP);
-}
-
-static int kretprobe_event_show_format(struct ftrace_event_call *call,
-				       struct trace_seq *s)
-{
-	struct kretprobe_trace_entry field __attribute__((unused));
-	int ret, i;
-	struct trace_probe *tp = (struct trace_probe *)call->data;
-
-	SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
-	SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
-	SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
-
-	/* Show fields */
-	for (i = 0; i < tp->nr_args; i++)
-		SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
-	trace_seq_puts(s, "\n");
-
-	return __probe_event_show_format(s, tp, "(%lx <- %lx)",
-					 "REC->" FIELD_STRING_FUNC
-					 ", REC->" FIELD_STRING_RETIP);
-}
-
 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
 {
 	int i;
@@ -1504,12 +1428,10 @@ static int register_probe_event(struct trace_probe *tp)
 	if (probe_is_return(tp)) {
 		tp->event.trace = print_kretprobe_event;
 		call->raw_init = probe_event_raw_init;
-		call->show_format = kretprobe_event_show_format;
 		call->define_fields = kretprobe_event_define_fields;
 	} else {
 		tp->event.trace = print_kprobe_event;
 		call->raw_init = probe_event_raw_init;
-		call->show_format = kprobe_event_show_format;
 		call->define_fields = kprobe_event_define_fields;
 	}
 	if (set_print_fmt(tp) < 0)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a78e86349ecb..49cea70fbf6d 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -143,54 +143,6 @@ extern char *__bad_type_size(void);
 		#type, #name, offsetof(typeof(trace), name),		\
 		sizeof(trace.name), is_signed_type(type)
 
-int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
-{
-	int i;
-	int ret;
-	struct syscall_metadata *entry = call->data;
-	struct syscall_trace_enter trace;
-	int offset = offsetof(struct syscall_trace_enter, args);
-
-	ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
-			       "\tsigned:%u;\n",
-			       SYSCALL_FIELD(int, nr));
-	if (!ret)
-		return 0;
-
-	for (i = 0; i < entry->nb_args; i++) {
-		ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
-				        entry->args[i]);
-		if (!ret)
-			return 0;
-		ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
-				       "\tsigned:%u;\n", offset,
-				       sizeof(unsigned long),
-				       is_signed_type(unsigned long));
-		if (!ret)
-			return 0;
-		offset += sizeof(unsigned long);
-	}
-
-	trace_seq_puts(s, "\nprint fmt: \"");
-	for (i = 0; i < entry->nb_args; i++) {
-		ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
-				        sizeof(unsigned long),
-					i == entry->nb_args - 1 ? "" : ", ");
-		if (!ret)
-			return 0;
-	}
-	trace_seq_putc(s, '"');
-
-	for (i = 0; i < entry->nb_args; i++) {
-		ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
-				       entry->args[i]);
-		if (!ret)
-			return 0;
-	}
-
-	return trace_seq_putc(s, '\n');
-}
-
 static
 int  __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
 {
@@ -252,24 +204,6 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call)
 		kfree(call->print_fmt);
 }
 
-int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
-{
-	int ret;
-	struct syscall_trace_exit trace;
-
-	ret = trace_seq_printf(s,
-			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
-			       "\tsigned:%u;\n"
-			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
-			       "\tsigned:%u;\n",
-			       SYSCALL_FIELD(int, nr),
-			       SYSCALL_FIELD(long, ret));
-	if (!ret)
-		return 0;
-
-	return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
-}
-
 int syscall_enter_define_fields(struct ftrace_event_call *call)
 {
 	struct syscall_trace_enter trace;
-- 
cgit v1.2.3


From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Jan 2010 16:11:06 -0500
Subject: x86, ACPI: delete acpi_boot_table_init() return value

cleanup only.

setup_arch(), doesn't care care if ACPI initialization succeeded
or failed, so delete acpi_boot_table_init()'s return value.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 22 ++++++----------------
 include/linux/acpi.h        |  6 +++---
 2 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fb1035cd9a6a..036d28adf59d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1529,16 +1529,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
  *	if acpi_blacklisted() acpi_disabled = 1;
  *	acpi_irq_model=...
  *	...
- *
- * return value: (currently ignored)
- *	0: success
- *	!0: failure
  */
 
-int __init acpi_boot_table_init(void)
+void __init acpi_boot_table_init(void)
 {
-	int error;
-
 	dmi_check_system(acpi_dmi_table);
 
 	/*
@@ -1546,15 +1540,14 @@ int __init acpi_boot_table_init(void)
 	 * One exception: acpi=ht continues far enough to enumerate LAPICs
 	 */
 	if (acpi_disabled && !acpi_ht)
-		return 1;
+		return; 
 
 	/*
 	 * Initialize the ACPI boot-time table parser.
 	 */
-	error = acpi_table_init();
-	if (error) {
+	if (acpi_table_init()) {
 		disable_acpi();
-		return error;
+		return;
 	}
 
 	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
@@ -1562,18 +1555,15 @@ int __init acpi_boot_table_init(void)
 	/*
 	 * blacklist may disable ACPI entirely
 	 */
-	error = acpi_blacklisted();
-	if (error) {
+	if (acpi_blacklisted()) {
 		if (acpi_force) {
 			printk(KERN_WARNING PREFIX "acpi=force override\n");
 		} else {
 			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
 			disable_acpi();
-			return error;
+			return;
 		}
 	}
-
-	return 0;
 }
 
 int __init early_acpi_boot_init(void)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 36924255c0d5..b926afe8c03e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 void __acpi_unmap_table(char *map, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
-int acpi_boot_table_init (void);
+void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
@@ -321,9 +321,9 @@ static inline int acpi_boot_init(void)
 	return 0;
 }
 
-static inline int acpi_boot_table_init(void)
+static inline void acpi_boot_table_init(void)
 {
-	return 0;
+	return;
 }
 
 static inline int acpi_mps_check(void)
-- 
cgit v1.2.3


From cfe79c00a2f4f687eed8b7534d1d3d3d35540c29 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Wed, 6 Jan 2010 17:23:23 +0000
Subject: NOMMU: Avoiding duplicate icache flushes of shared maps

When working with FDPIC, there are many shared mappings of read-only
code regions between applications (the C library, applet packages like
busybox, etc.), but the current do_mmap_pgoff() function will issue an
icache flush whenever a VMA is added to an MM instead of only doing it
when the map is initially created.

The flush can instead be done when a region is first mmapped PROT_EXEC.
Note that we may not rely on the first mapping of a region being
executable - it's possible for it to be PROT_READ only, so we have to
remember whether we've flushed the region or not, and then flush the
entire region when a bit of it is made executable.

However, this also affects the brk area.  That will no longer be
executable.  We can mprotect() it to PROT_EXEC on MPU-mode kernels, but
for NOMMU mode kernels, when it increases the brk allocation, making
sys_brk() flush the extra from the icache should suffice.  The brk area
probably isn't used by NOMMU programs since the brk area can only use up
the leavings from the stack allocation, where the stack allocation is
larger than requested.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 mm/nommu.c               | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84a524afb3dc..84d020bed083 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -123,6 +123,8 @@ struct vm_region {
 	struct file	*vm_file;	/* the backing file or NULL */
 
 	atomic_t	vm_usage;	/* region usage count */
+	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
+						* this region */
 };
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index 6f9248f89bde..a8d17521624a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	/*
 	 * Ok, looks good - let it rip.
 	 */
+	flush_icache_range(mm->brk, brk);
 	return mm->brk = brk;
 }
 
@@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file,
 share:
 	add_vma_to_mm(current->mm, vma);
 
-	up_write(&nommu_region_sem);
+	/* we flush the region from the icache only when the first executable
+	 * mapping of it is made  */
+	if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
+		flush_icache_range(region->vm_start, region->vm_end);
+		region->vm_icache_flushed = true;
+	}
 
-	if (prot & PROT_EXEC)
-		flush_icache_range(result, result + len);
+	up_write(&nommu_region_sem);
 
 	kleave(" = %lx", result);
 	return result;
-- 
cgit v1.2.3


From 65324144b50bc7022cc9b6ca8f4a536a957019e3 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Tue, 5 Jan 2010 05:50:47 +0000
Subject: net: RFC3069, private VLAN proxy arp support

This is to be used together with switch technologies, like RFC3069,
that where the individual ports are not allowed to communicate with
each other, but they are allowed to talk to the upstream router.  As
described in RFC 3069, it is possible to allow these hosts to
communicate through the upstream router by proxy_arp'ing.

This patch basically allow proxy arp replies back to the same
interface (from which the ARP request/solicitation was received).

Tunable per device via proc "proxy_arp_pvlan":
  /proc/sys/net/ipv4/conf/*/proxy_arp_pvlan

This switch technology is known by different vendor names:
 - In RFC 3069 it is called VLAN Aggregation.
 - Cisco and Allied Telesyn call it Private VLAN.
 - Hewlett-Packard call it Source-Port filtering or port-isolation.
 - Ericsson call it MAC-Forced Forwarding (RFC Draft).

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 19 +++++++++++++
 include/linux/inetdevice.h             |  1 +
 include/linux/sysctl.h                 |  1 +
 net/ipv4/arp.c                         | 52 +++++++++++++++++++++++++++++++---
 net/ipv4/devinet.c                     |  1 +
 net/ipv4/route.c                       |  7 ++++-
 6 files changed, 76 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 006b39dec87d..c532884f4fec 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -692,6 +692,25 @@ proxy_arp - BOOLEAN
 	conf/{all,interface}/proxy_arp is set to TRUE,
 	it will be disabled otherwise
 
+proxy_arp_pvlan - BOOLEAN
+	Private VLAN proxy arp.
+	Basically allow proxy arp replies back to the same interface
+	(from which the ARP request/solicitation was received).
+
+	This is done to support (ethernet) switch features, like RFC
+	3069, where the individual ports are NOT allowed to
+	communicate with each other, but they are allowed to talk to
+	the upstream router.  As described in RFC 3069, it is possible
+	to allow these hosts to communicate through the upstream
+	router by proxy_arp'ing. Don't need to be used together with
+	proxy_arp.
+
+	This technology is known by different names:
+	  In RFC 3069 it is called VLAN Aggregation.
+	  Cisco and Allied Telesyn call it Private VLAN.
+	  Hewlett-Packard call it Source-Port filtering or port-isolation.
+	  Ericsson call it MAC-Forced Forwarding (RFC Draft).
+
 shared_media - BOOLEAN
 	Send(router) or accept(host) RFC1620 shared media redirects.
 	Overrides ip_secure_redirects.
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 699e85c01a4d..9a8c57467d3d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -88,6 +88,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 
 #define IN_DEV_LOG_MARTIANS(in_dev)	IN_DEV_ORCONF((in_dev), LOG_MARTIANS)
 #define IN_DEV_PROXY_ARP(in_dev)	IN_DEV_ORCONF((in_dev), PROXY_ARP)
+#define IN_DEV_PROXY_ARP_PVLAN(in_dev)	IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN)
 #define IN_DEV_SHARED_MEDIA(in_dev)	IN_DEV_ORCONF((in_dev), SHARED_MEDIA)
 #define IN_DEV_TX_REDIRECTS(in_dev)	IN_DEV_ORCONF((in_dev), SEND_REDIRECTS)
 #define IN_DEV_SEC_REDIRECTS(in_dev)	IN_DEV_ORCONF((in_dev), \
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 877ba039e6a4..24ff7e3a0d59 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
 	NET_IPV4_CONF_ACCEPT_LOCAL=23,
+	NET_IPV4_CONF_PROXY_ARP_PVLAN=24,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c95cd93acf29..078709233bc4 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -70,6 +70,7 @@
  *					bonding can change the skb before
  *					sending (e.g. insert 8021q tag).
  *		Harald Welte	:	convert to make use of jenkins hash
+ *		Jesper D. Brouer:       Proxy ARP PVLAN RFC 3069 support.
  */
 
 #include <linux/module.h>
@@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst)
 /*
  * Check if we can use proxy ARP for this path
  */
-
-static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
+static inline int arp_fwd_proxy(struct in_device *in_dev,
+				struct net_device *dev,	struct rtable *rt)
 {
 	struct in_device *out_dev;
 	int imi, omi = -1;
 
+	if (rt->u.dst.dev == dev)
+		return 0;
+
 	if (!IN_DEV_PROXY_ARP(in_dev))
 		return 0;
 
@@ -547,6 +551,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
 	return (omi != imi && omi != -1);
 }
 
+/*
+ * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev)
+ *
+ * RFC3069 supports proxy arp replies back to the same interface.  This
+ * is done to support (ethernet) switch features, like RFC 3069, where
+ * the individual ports are not allowed to communicate with each
+ * other, BUT they are allowed to talk to the upstream router.  As
+ * described in RFC 3069, it is possible to allow these hosts to
+ * communicate through the upstream router, by proxy_arp'ing.
+ *
+ * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation"
+ *
+ *  This technology is known by different names:
+ *    In RFC 3069 it is called VLAN Aggregation.
+ *    Cisco and Allied Telesyn call it Private VLAN.
+ *    Hewlett-Packard call it Source-Port filtering or port-isolation.
+ *    Ericsson call it MAC-Forced Forwarding (RFC Draft).
+ *
+ */
+static inline int arp_fwd_pvlan(struct in_device *in_dev,
+				struct net_device *dev,	struct rtable *rt,
+				__be32 sip, __be32 tip)
+{
+	/* Private VLAN is only concerned about the same ethernet segment */
+	if (rt->u.dst.dev != dev)
+		return 0;
+
+	/* Don't reply on self probes (often done by windowz boxes)*/
+	if (sip == tip)
+		return 0;
+
+	if (IN_DEV_PROXY_ARP_PVLAN(in_dev))
+		return 1;
+	else
+		return 0;
+}
+
 /*
  *	Interface to link layer: send routine and receive handler.
  */
@@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb)
 			}
 			goto out;
 		} else if (IN_DEV_FORWARD(in_dev)) {
-			    if (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
-			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
+			if (addr_type == RTN_UNICAST  &&
+			    (arp_fwd_proxy(in_dev, dev, rt) ||
+			     arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
+			     pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))
+			{
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n)
 					neigh_release(n);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..0715f4cac391 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1407,6 +1407,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
+		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
 
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e446496f564f..1cc339441e7d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1988,8 +1988,13 @@ static int __mkroute_input(struct sk_buff *skb,
 	if (skb->protocol != htons(ETH_P_IP)) {
 		/* Not IP (i.e. ARP). Do not create route, if it is
 		 * invalid for proxy arp. DNAT routes are always valid.
+		 *
+		 * Proxy arp feature have been extended to allow, ARP
+		 * replies back to the same interface, to support
+		 * Private VLAN switch technologies. See arp.c.
 		 */
-		if (out_dev == in_dev) {
+		if (out_dev == in_dev &&
+		    IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
 			err = -EINVAL;
 			goto cleanup;
 		}
-- 
cgit v1.2.3


From 6144a85a0e018c19bc4b24f7eb6c1f3f7431813d Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 7 Jan 2010 11:58:36 -0600
Subject: maccess,probe_kernel: Allow arch specific override
 probe_kernel_(read|write)

Some archs such as blackfin, would like to have an arch specific
probe_kernel_read() and probe_kernel_write() implementation which can
fall back to the generic implementation if no special operations are
needed.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 include/linux/uaccess.h |  4 +++-
 mm/maccess.c            | 11 +++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 6b58367d145e..d512d98dfb7d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -94,6 +94,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
  * happens, handle that and return -EFAULT.
  */
 extern long probe_kernel_read(void *dst, void *src, size_t size);
+extern long __probe_kernel_read(void *dst, void *src, size_t size);
 
 /*
  * probe_kernel_write(): safely attempt to write to a location
@@ -104,6 +105,7 @@ extern long probe_kernel_read(void *dst, void *src, size_t size);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace __probe_kernel_write(void *dst, void *src, size_t size);
 
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/maccess.c b/mm/maccess.c
index 9073695ff25f..4e348dbaecd7 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -14,7 +14,11 @@
  * Safely read from address @src to the buffer at @dst.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long probe_kernel_read(void *dst, void *src, size_t size)
+
+long __weak probe_kernel_read(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_read")));
+
+long __probe_kernel_read(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
@@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long notrace __weak probe_kernel_write(void *dst, void *src, size_t size)
+long __weak probe_kernel_write(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_write")));
+
+long __probe_kernel_write(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
-- 
cgit v1.2.3


From b11e1eca7ed9c0b5dab21a62c11acc711d9bdda0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 7 Jan 2010 11:58:37 -0600
Subject: kgdb: Fix kernel-doc format error in kgdb.h

linux-next-20081022//include/linux/kgdb.h:308): duplicate section name 'Description'

and fix typos in that file's kernel-doc comments.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6adcc297e354..19ec41a183f5 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -29,8 +29,7 @@ struct pt_regs;
  *
  *	On some architectures it is required to skip a breakpoint
  *	exception when it occurs after a breakpoint has been removed.
- *	This can be implemented in the architecture specific portion of
- *	for kgdb.
+ *	This can be implemented in the architecture specific portion of kgdb.
  */
 extern int kgdb_skipexception(int exception, struct pt_regs *regs);
 
@@ -65,7 +64,7 @@ struct uart_port;
 /**
  *	kgdb_breakpoint - compiled in breakpoint
  *
- *	This will be impelmented a static inline per architecture.  This
+ *	This will be implemented as a static inline per architecture.  This
  *	function is called by the kgdb core to execute an architecture
  *	specific trap to cause kgdb to enter the exception processing.
  *
@@ -190,7 +189,7 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code,
  *	@flags: Current IRQ state
  *
  *	On SMP systems, we need to get the attention of the other CPUs
- *	and get them be in a known state.  This should do what is needed
+ *	and get them into a known state.  This should do what is needed
  *	to get the other CPUs to call kgdb_wait(). Note that on some arches,
  *	the NMI approach is not used for rounding up all the CPUs. For example,
  *	in case of MIPS, smp_call_function() is used to roundup CPUs. In
-- 
cgit v1.2.3


From 3c9732c06879d85f2fdf7ec69198c1d78da42a98 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Wed, 6 Jan 2010 23:07:13 +0000
Subject: stmmac: add the new Header file for stmmac platform data

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 include/linux/stmmac.h

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
new file mode 100644
index 000000000000..32bfd1a8a48d
--- /dev/null
+++ b/include/linux/stmmac.h
@@ -0,0 +1,53 @@
+/*******************************************************************************
+
+  Header file for stmmac platform data
+
+  Copyright (C) 2009  STMicroelectronics Ltd
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+*******************************************************************************/
+
+#ifndef __STMMAC_PLATFORM_DATA
+#define __STMMAC_PLATFORM_DATA
+
+/* platfrom data for platfrom device structure's platfrom_data field */
+
+/* Private data for the STM on-board ethernet driver */
+struct plat_stmmacenet_data {
+	int bus_id;
+	int pbl;
+	int has_gmac;
+	void (*fix_mac_speed)(void *priv, unsigned int speed);
+	void (*bus_setup)(unsigned long ioaddr);
+#ifdef CONFIG_STM_DRIVERS
+	struct stm_pad_config *pad_config;
+#endif
+	void *bsp_priv;
+};
+
+struct plat_stmmacphy_data {
+	int bus_id;
+	int phy_addr;
+	unsigned int phy_mask;
+	int interface;
+	int (*phy_reset)(void *priv);
+	void *priv;
+};
+#endif
+
-- 
cgit v1.2.3


From 0ed731859e24cd6e3ec058cf2b49b2a0df80e86b Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 6 Jan 2010 09:23:54 +0900
Subject: LSM: Update comment on security_sock_rcv_skb

It is not permitted to do sleeping operation inside security_sock_rcv_skb().

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Serge Hallyn <serue@us.ibm.com>

--
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 466cbadbd1ef..3696ca345745 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -978,6 +978,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Check permissions on incoming network packets.  This hook is distinct
  *	from Netfilter's IP input hooks since it is the first time that the
  *	incoming sk_buff @skb has been associated with a particular socket, @sk.
+ *	Must not sleep inside this hook because some callers hold spinlocks.
  *	@sk contains the sock (not socket) associated with the incoming sk_buff.
  *	@skb contains the incoming network data.
  * @socket_getpeersec_stream:
-- 
cgit v1.2.3


From dd3d145d49c5816b79acc6761ebbd842bc50b0ee Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:48 -0500
Subject: block: Fix discard alignment calculation and printing

Discard alignment reporting for partitions was incorrect.  Update to
match the algorithm used elsewhere.

The alignment can be negative (misaligned).  Fix format string
accordingly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c          | 2 +-
 include/linux/blkdev.h | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index b11a4ad7d571..d13ba76a169c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -867,7 +867,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue));
+	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
 }
 
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9b98173a8184..a41bcc8e140f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1148,8 +1148,11 @@ static inline int queue_discard_alignment(struct request_queue *q)
 static inline int queue_sector_discard_alignment(struct request_queue *q,
 						 sector_t sector)
 {
-	return ((sector << 9) - q->limits.discard_alignment)
-		& (q->limits.discard_granularity - 1);
+	struct queue_limits *lim = &q->limits;
+	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+
+	return (lim->discard_granularity + lim->discard_alignment - alignment)
+		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
-- 
cgit v1.2.3


From 17be8c245054b9c7786545af3ba3ca4e54cd4ad9 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:49 -0500
Subject: block: bdev_stack_limits wrapper

DM does not want to know about partition offsets.  Add a partition-aware
wrapper that DM can use when stacking block devices.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 22 ++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 127f82551855..5eeb9e0d256e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -639,6 +639,28 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 }
 EXPORT_SYMBOL(blk_stack_limits);
 
+/**
+ * bdev_stack_limits - adjust queue limits for stacked drivers
+ * @t:	the stacking driver limits (top device)
+ * @bdev:  the component block_device (bottom)
+ * @start:  first data sector within component device
+ *
+ * Description:
+ *    Merges queue limits for a top device and a block_device.  Returns
+ *    0 if alignment didn't change.  Returns -1 if adding the bottom
+ *    device caused misalignment.
+ */
+int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+		      sector_t start)
+{
+	struct request_queue *bq = bdev_get_queue(bdev);
+
+	start += get_start_sect(bdev);
+
+	return blk_stack_limits(t, &bq->limits, start << 9);
+}
+EXPORT_SYMBOL(bdev_stack_limits);
+
 /**
  * disk_stack_limits - adjust queue limits for stacked drivers
  * @disk:  MD/DM gendisk (top)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a41bcc8e140f..5c8018977efa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -938,6 +938,8 @@ extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
+extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
-- 
cgit v1.2.3


From ce289321b7dc1eb108e3df0dec872b7429ef49f7 Mon Sep 17 00:00:00 2001
From: Kirill Afonshin <kirill_nnov@mail.ru>
Date: Fri, 8 Jan 2010 22:09:59 +0300
Subject: block: removed unused as_io_context

It isn't used anymore, since AS was deleted.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-ioc.c           |  5 -----
 include/linux/iocontext.h | 27 ---------------------------
 2 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index cbdabb0dd6d7..98e6bf61b0ac 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -39,8 +39,6 @@ int put_io_context(struct io_context *ioc)
 
 	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		rcu_read_lock();
-		if (ioc->aic && ioc->aic->dtor)
-			ioc->aic->dtor(ioc->aic);
 		cfq_dtor(ioc);
 		rcu_read_unlock();
 
@@ -76,8 +74,6 @@ void exit_io_context(struct task_struct *task)
 	task_unlock(task);
 
 	if (atomic_dec_and_test(&ioc->nr_tasks)) {
-		if (ioc->aic && ioc->aic->exit)
-			ioc->aic->exit(ioc->aic);
 		cfq_exit(ioc);
 
 	}
@@ -97,7 +93,6 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 		ret->ioprio = 0;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
-		ret->aic = NULL;
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
 		INIT_HLIST_HEAD(&ret->cic_list);
 		ret->ioc_data = NULL;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index a63235996309..78ef023227d4 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -4,32 +4,6 @@
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
 
-/*
- * This is the per-process anticipatory I/O scheduler state.
- */
-struct as_io_context {
-	spinlock_t lock;
-
-	void (*dtor)(struct as_io_context *aic); /* destructor */
-	void (*exit)(struct as_io_context *aic); /* called on task exit */
-
-	unsigned long state;
-	atomic_t nr_queued; /* queued reads & sync writes */
-	atomic_t nr_dispatched; /* number of requests gone to the drivers */
-
-	/* IO History tracking */
-	/* Thinktime */
-	unsigned long last_end_request;
-	unsigned long ttime_total;
-	unsigned long ttime_samples;
-	unsigned long ttime_mean;
-	/* Layout pattern */
-	unsigned int seek_samples;
-	sector_t last_request_pos;
-	u64 seek_total;
-	sector_t seek_mean;
-};
-
 struct cfq_queue;
 struct cfq_io_context {
 	void *key;
@@ -78,7 +52,6 @@ struct io_context {
 	unsigned long last_waited; /* Time last woken after wait for request */
 	int nr_batch_requests;     /* Number of requests left in the batch */
 
-	struct as_io_context *aic;
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
 	void *ioc_data;
-- 
cgit v1.2.3


From e03a72e13648ac6277bf2bab6b8324d51f89c0fa Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:51 -0500
Subject: block: Stop using byte offsets

All callers of the stacking functions use 512-byte sector units rather
than byte offsets.  Simplify the code so the stacking functions take
sectors when specifying data offsets.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 26 +++++++++-----------------
 fs/partitions/check.c  |  7 ++++---
 include/linux/blkdev.h | 17 +++++------------
 3 files changed, 18 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5eeb9e0d256e..78549c723783 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -507,7 +507,7 @@ static unsigned int lcm(unsigned int a, unsigned int b)
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t:	the stacking driver limits (top device)
  * @b:  the underlying queue limits (bottom, component device)
- * @offset:  offset to beginning of data within component device
+ * @start:  first data sector within component device
  *
  * Description:
  *    This function is used by stacking drivers like MD and DM to ensure
@@ -525,10 +525,9 @@ static unsigned int lcm(unsigned int a, unsigned int b)
  *    the alignment_offset is undefined.
  */
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
-		     sector_t offset)
+		     sector_t start)
 {
-	sector_t alignment;
-	unsigned int top, bottom, ret = 0;
+	unsigned int top, bottom, alignment, ret = 0;
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -548,7 +547,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	t->misaligned |= b->misaligned;
 
-	alignment = queue_limit_alignment_offset(b, offset);
+	alignment = queue_limit_alignment_offset(b, start);
 
 	/* Bottom device has different alignment.  Check that it is
 	 * compatible with the current top alignment.
@@ -611,11 +610,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	/* Discard alignment and granularity */
 	if (b->discard_granularity) {
-		unsigned int granularity = b->discard_granularity;
-		offset &= granularity - 1;
-
-		alignment = (granularity + b->discard_alignment - offset)
-			& (granularity - 1);
+		alignment = queue_limit_discard_alignment(b, start);
 
 		if (t->discard_granularity != 0 &&
 		    t->discard_alignment != alignment) {
@@ -657,7 +652,7 @@ int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
 
 	start += get_start_sect(bdev);
 
-	return blk_stack_limits(t, &bq->limits, start << 9);
+	return blk_stack_limits(t, &bq->limits, start);
 }
 EXPORT_SYMBOL(bdev_stack_limits);
 
@@ -668,9 +663,8 @@ EXPORT_SYMBOL(bdev_stack_limits);
  * @offset:  offset to beginning of data within component device
  *
  * Description:
- *    Merges the limits for two queues.  Returns 0 if alignment
- *    didn't change.  Returns -1 if adding the bottom device caused
- *    misalignment.
+ *    Merges the limits for a top level gendisk and a bottom level
+ *    block_device.
  */
 void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 		       sector_t offset)
@@ -678,9 +672,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 	struct request_queue *t = disk->queue;
 	struct request_queue *b = bdev_get_queue(bdev);
 
-	offset += get_start_sect(bdev) << 9;
-
-	if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) {
+	if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) {
 		char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
 
 		disk_name(disk, 0, top);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 64bc8998ac9a..e8865c11777f 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -412,9 +412,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	pdev = part_to_dev(p);
 
 	p->start_sect = start;
-	p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
-	p->discard_alignment = queue_sector_discard_alignment(disk->queue,
-							      start);
+	p->alignment_offset =
+		queue_limit_alignment_offset(&disk->queue->limits, start);
+	p->discard_alignment =
+		queue_limit_discard_alignment(&disk->queue->limits, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c8018977efa..ffb13ad35716 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1112,18 +1112,13 @@ static inline int queue_alignment_offset(struct request_queue *q)
 	return q->limits.alignment_offset;
 }
 
-static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset)
+static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
 {
 	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+	unsigned int alignment = (sector << 9) & (granularity - 1);
 
-	offset &= granularity - 1;
-	return (granularity + lim->alignment_offset - offset) & (granularity - 1);
-}
-
-static inline int queue_sector_alignment_offset(struct request_queue *q,
-						sector_t sector)
-{
-	return queue_limit_alignment_offset(&q->limits, sector << 9);
+	return (granularity + lim->alignment_offset - alignment)
+		& (granularity - 1);
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)
@@ -1147,10 +1142,8 @@ static inline int queue_discard_alignment(struct request_queue *q)
 	return q->limits.discard_alignment;
 }
 
-static inline int queue_sector_discard_alignment(struct request_queue *q,
-						 sector_t sector)
+static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
-	struct queue_limits *lim = &q->limits;
 	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
 
 	return (lim->discard_granularity + lim->discard_alignment - alignment)
-- 
cgit v1.2.3


From 7af92f8754b87bc78cbfd447d5f4096b25c46682 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 6 Jan 2010 15:45:55 -0800
Subject: genhd: overlapping variable definition

This fixes the sparse warning:
fs/ext4/super.c:2390:40: warning: symbol 'i' shadows an earlier one
fs/ext4/super.c:2368:22: originally declared here

Using 'i' in a macro is dubious practice.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/genhd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c6c0c41af35f..9717081c75ad 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -256,9 +256,9 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
 #define part_stat_read(part, field)					\
 ({									\
 	typeof((part)->dkstats->field) res = 0;				\
-	int i;								\
-	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr((part)->dkstats, i)->field;		\
+	unsigned int _cpu;						\
+	for_each_possible_cpu(_cpu)					\
+		res += per_cpu_ptr((part)->dkstats, _cpu)->field;	\
 	res;								\
 })
 
-- 
cgit v1.2.3


From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Date: Fri, 8 Jan 2010 14:42:31 -0800
Subject: mm: make totalhigh_pages unsigned long

Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM
is set Removes redundant casts in printout messages

Signed-off-by: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/init.c               | 2 +-
 arch/mips/mm/init.c              | 2 +-
 arch/mips/sgi-ip27/ip27-memory.c | 2 +-
 arch/mn10300/mm/init.c           | 3 +--
 arch/score/mm/init.c             | 2 +-
 arch/x86/mm/init_32.c            | 3 +--
 include/linux/highmem.h          | 2 +-
 7 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 52c40d155672..a04ffbbbe253 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -616,7 +616,7 @@ void __init mem_init(void)
 		"%dK data, %dK init, %luK highmem)\n",
 		nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10,
 		datasize >> 10, initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
 		extern int sysctl_overcommit_memory;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9e8d00389eef..1651942f7feb 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -424,7 +424,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index f61c164d1e67..bc1297109cc5 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -505,5 +505,5 @@ void __init mem_init(void)
 	       (num_physpages - tmp) << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index ec1420562dc7..dd27a9a35152 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -118,8 +118,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT - 10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10))
-	       );
+	       totalhigh_pages << (PAGE_SHIFT - 10));
 }
 
 /*
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 8c15b2c85d5a..dfaf458d6702 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -106,7 +106,7 @@ void __init mem_init(void)
 			ram << (PAGE_SHIFT-10), codesize >> 10,
 			reservedpages << (PAGE_SHIFT-10), datasize >> 10,
 			initsize >> 10,
-			(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+			totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c973f8e2a6cf..9a0c258a86be 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -892,8 +892,7 @@ void __init mem_init(void)
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
-	       );
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 211ff4497269..ab2cc20e21a5 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -46,7 +46,7 @@ void kmap_flush_unused(void);
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
-#define totalhigh_pages 0
+#define totalhigh_pages 0UL
 
 #ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
-- 
cgit v1.2.3


From e992cd9b72a18122bd5c958715623057f110793f Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 8 Jan 2010 14:42:35 -0800
Subject: kmemcheck: make bitfield annotations truly no-ops when disabled

It turns out that even zero-sized struct members (int foo[0];) will affect
the struct layout, causing us in particular to lose 4 bytes in struct
sock.

This patch fixes the regression in CONFIG_KMEMCHECK=n case.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemcheck.h | 110 ++++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index e880d4cf9e22..08d7dc4ddf40 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -36,6 +36,56 @@ int kmemcheck_hide_addr(unsigned long address);
 
 bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
 
+/*
+ * Bitfield annotations
+ *
+ * How to use: If you have a struct using bitfields, for example
+ *
+ *     struct a {
+ *             int x:8, y:8;
+ *     };
+ *
+ * then this should be rewritten as
+ *
+ *     struct a {
+ *             kmemcheck_bitfield_begin(flags);
+ *             int x:8, y:8;
+ *             kmemcheck_bitfield_end(flags);
+ *     };
+ *
+ * Now the "flags_begin" and "flags_end" members may be used to refer to the
+ * beginning and end, respectively, of the bitfield (and things like
+ * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
+ * fields should be annotated:
+ *
+ *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
+ *     kmemcheck_annotate_bitfield(a, flags);
+ */
+#define kmemcheck_bitfield_begin(name)	\
+	int name##_begin[0];
+
+#define kmemcheck_bitfield_end(name)	\
+	int name##_end[0];
+
+#define kmemcheck_annotate_bitfield(ptr, name)				\
+	do {								\
+		int _n;							\
+									\
+		if (!ptr)						\
+			break;						\
+									\
+		_n = (long) &((ptr)->name##_end)			\
+			- (long) &((ptr)->name##_begin);		\
+		MAYBE_BUILD_BUG_ON(_n < 0);				\
+									\
+		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+	} while (0)
+
+#define kmemcheck_annotate_variable(var)				\
+	do {								\
+		kmemcheck_mark_initialized(&(var), sizeof(var));	\
+	} while (0)							\
+
 #else
 #define kmemcheck_enabled 0
 
@@ -106,60 +156,16 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 	return true;
 }
 
-#endif /* CONFIG_KMEMCHECK */
-
-/*
- * Bitfield annotations
- *
- * How to use: If you have a struct using bitfields, for example
- *
- *     struct a {
- *             int x:8, y:8;
- *     };
- *
- * then this should be rewritten as
- *
- *     struct a {
- *             kmemcheck_bitfield_begin(flags);
- *             int x:8, y:8;
- *             kmemcheck_bitfield_end(flags);
- *     };
- *
- * Now the "flags_begin" and "flags_end" members may be used to refer to the
- * beginning and end, respectively, of the bitfield (and things like
- * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
- * fields should be annotated:
- *
- *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
- *     kmemcheck_annotate_bitfield(a, flags);
- *
- * Note: We provide the same definitions for both kmemcheck and non-
- * kmemcheck kernels. This makes it harder to introduce accidental errors. It
- * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield().
- */
-#define kmemcheck_bitfield_begin(name)	\
-	int name##_begin[0];
-
-#define kmemcheck_bitfield_end(name)	\
-	int name##_end[0];
+#define kmemcheck_bitfield_begin(name)
+#define kmemcheck_bitfield_end(name)
+#define kmemcheck_annotate_bitfield(ptr, name)	\
+	do {					\
+	} while (0)
 
-#define kmemcheck_annotate_bitfield(ptr, name)				\
-	do {								\
-		int _n;							\
-									\
-		if (!ptr)						\
-			break;						\
-									\
-		_n = (long) &((ptr)->name##_end)			\
-			- (long) &((ptr)->name##_begin);		\
-		MAYBE_BUILD_BUG_ON(_n < 0);				\
-									\
-		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+#define kmemcheck_annotate_variable(var)	\
+	do {					\
 	} while (0)
 
-#define kmemcheck_annotate_variable(var)				\
-	do {								\
-		kmemcheck_mark_initialized(&(var), sizeof(var));	\
-	} while (0)							\
+#endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
-- 
cgit v1.2.3


From 7dd65feb6c603e13eba501c34c662259ab38e70e Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Fri, 8 Jan 2010 14:42:42 -0800
Subject: lib: add support for LZO-compressed kernels

This patch series adds generic support for creating and extracting
LZO-compressed kernel images, as well as support for using such images on
the x86 and ARM architectures, and support for creating and using
LZO-compressed initrd and initramfs images.

Russell King said:

: Testing on a Cortex A9 model:
: - lzo decompressor is 65% of the time gzip takes to decompress a kernel
: - lzo kernel is 9% larger than a gzip kernel
:
: which I'm happy to say confirms your figures when comparing the two.
:
: However, when comparing your new gzip code to the old gzip code:
: - new is 99% of the size of the old code
: - new takes 42% of the time to decompress than the old code
:
: What this means is that for a proper comparison, the results get even better:
: - lzo is 7.5% larger than the old gzip'd kernel image
: - lzo takes 28% of the time that the old gzip code took
:
: So the expense seems definitely worth the effort.  The only reason I
: can think of ever using gzip would be if you needed the additional
: compression (eg, because you have limited flash to store the image.)
:
: I would argue that the default for ARM should therefore be LZO.

This patch:

The lzo compressor is worse than gzip at compression, but faster at
extraction.  Here are some figures for an ARM board I'm working on:

Uncompressed size: 3.24Mo
gzip  1.61Mo 0.72s
lzo   1.75Mo 0.48s

So for a compression ratio that is still relatively close to gzip, it's
much faster to extract, at least in that case.

This part contains:
 - Makefile routine to support lzo compression
 - Fixes to the existing lzo compressor so that it can be used in
   compressed kernels
 - wrapper around the existing lzo1x_decompress, as it only extracts one
   block at a time, while we need to extract a whole file here
 - config dialog for kernel compression

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: cleanup]
Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Tested-by: Wu Zhangjin <wuzhangjin@gmail.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Russell King <rmk@arm.linux.org.uk>
Acked-by: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/decompress/unlzo.h |  10 ++
 init/Kconfig                     |  18 +++-
 lib/decompress_unlzo.c           | 209 +++++++++++++++++++++++++++++++++++++++
 lib/lzo/lzo1x_decompress.c       |   9 +-
 scripts/Makefile.lib             |   5 +
 5 files changed, 244 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/decompress/unlzo.h
 create mode 100644 lib/decompress_unlzo.c

(limited to 'include/linux')

diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
new file mode 100644
index 000000000000..987229752519
--- /dev/null
+++ b/include/linux/decompress/unlzo.h
@@ -0,0 +1,10 @@
+#ifndef DECOMPRESS_UNLZO_H
+#define DECOMPRESS_UNLZO_H
+
+int unlzo(unsigned char *inbuf, int len,
+	int(*fill)(void*, unsigned int),
+	int(*flush)(void*, unsigned int),
+	unsigned char *output,
+	int *pos,
+	void(*error)(char *x));
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..d95ca7cd5d45 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -115,10 +115,13 @@ config HAVE_KERNEL_BZIP2
 config HAVE_KERNEL_LZMA
 	bool
 
+config HAVE_KERNEL_LZO
+	bool
+
 choice
 	prompt "Kernel compression mode"
 	default KERNEL_GZIP
-	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA
+	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_LZO
 	help
 	  The linux kernel is a kind of self-extracting executable.
 	  Several compression algorithms are available, which differ
@@ -141,9 +144,8 @@ config KERNEL_GZIP
 	bool "Gzip"
 	depends on HAVE_KERNEL_GZIP
 	help
-	  The old and tried gzip compression. Its compression ratio is
-	  the poorest among the 3 choices; however its speed (both
-	  compression and decompression) is the fastest.
+	  The old and tried gzip compression. It provides a good balance
+	  between compression ratio and decompression speed.
 
 config KERNEL_BZIP2
 	bool "Bzip2"
@@ -164,6 +166,14 @@ config KERNEL_LZMA
 	  two. Compression is slowest.	The kernel size is about 33%
 	  smaller with LZMA in comparison to gzip.
 
+config KERNEL_LZO
+	bool "LZO"
+	depends on HAVE_KERNEL_LZO
+	help
+	  Its compression ratio is the poorest among the 4. The kernel
+	  size is about about 10% bigger than gzip; however its speed
+	  (both compression and decompression) is the fastest.
+
 endchoice
 
 config SWAP
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
new file mode 100644
index 000000000000..db521f45626e
--- /dev/null
+++ b/lib/decompress_unlzo.c
@@ -0,0 +1,209 @@
+/*
+ * LZO decompressor for the Linux kernel. Code borrowed from the lzo
+ * implementation by Markus Franz Xaver Johannes Oberhumer.
+ *
+ * Linux kernel adaptation:
+ * Copyright (C) 2009
+ * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com>
+ *
+ * Original code:
+ * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer
+ * All Rights Reserved.
+ *
+ * lzop and the LZO library are free software; you can redistribute them
+ * and/or modify them under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.
+ * If not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Markus F.X.J. Oberhumer
+ * <markus@oberhumer.com>
+ * http://www.oberhumer.com/opensource/lzop/
+ */
+
+#ifdef STATIC
+#include "lzo/lzo1x_decompress.c"
+#else
+#include <linux/slab.h>
+#include <linux/decompress/unlzo.h>
+#endif
+
+#include <linux/types.h>
+#include <linux/lzo.h>
+#include <linux/decompress/mm.h>
+
+#include <linux/compiler.h>
+#include <asm/unaligned.h>
+
+static const unsigned char lzop_magic[] = {
+	0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
+
+#define LZO_BLOCK_SIZE        (256*1024l)
+#define HEADER_HAS_FILTER      0x00000800L
+
+STATIC inline int INIT parse_header(u8 *input, u8 *skip)
+{
+	int l;
+	u8 *parse = input;
+	u8 level = 0;
+	u16 version;
+
+	/* read magic: 9 first bits */
+	for (l = 0; l < 9; l++) {
+		if (*parse++ != lzop_magic[l])
+			return 0;
+	}
+	/* get version (2bytes), skip library version (2),
+	 * 'need to be extracted' version (2) and
+	 * method (1) */
+	version = get_unaligned_be16(parse);
+	parse += 7;
+	if (version >= 0x0940)
+		level = *parse++;
+	if (get_unaligned_be32(parse) & HEADER_HAS_FILTER)
+		parse += 8; /* flags + filter info */
+	else
+		parse += 4; /* flags */
+
+	/* skip mode and mtime_low */
+	parse += 8;
+	if (version >= 0x0940)
+		parse += 4;	/* skip mtime_high */
+
+	l = *parse++;
+	/* don't care about the file name, and skip checksum */
+	parse += l + 4;
+
+	*skip = parse - input;
+	return 1;
+}
+
+STATIC inline int INIT unlzo(u8 *input, int in_len,
+				int (*fill) (void *, unsigned int),
+				int (*flush) (void *, unsigned int),
+				u8 *output, int *posp,
+				void (*error_fn) (char *x))
+{
+	u8 skip = 0, r = 0;
+	u32 src_len, dst_len;
+	size_t tmp;
+	u8 *in_buf, *in_buf_save, *out_buf;
+	int obytes_processed = 0;
+
+	set_error_fn(error_fn);
+
+	if (output) {
+		out_buf = output;
+	} else if (!flush) {
+		error("NULL output pointer and no flush function provided");
+		goto exit;
+	} else {
+		out_buf = malloc(LZO_BLOCK_SIZE);
+		if (!out_buf) {
+			error("Could not allocate output buffer");
+			goto exit;
+		}
+	}
+
+	if (input && fill) {
+		error("Both input pointer and fill function provided, don't know what to do");
+		goto exit_1;
+	} else if (input) {
+		in_buf = input;
+	} else if (!fill || !posp) {
+		error("NULL input pointer and missing position pointer or fill function");
+		goto exit_1;
+	} else {
+		in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		if (!in_buf) {
+			error("Could not allocate input buffer");
+			goto exit_1;
+		}
+	}
+	in_buf_save = in_buf;
+
+	if (posp)
+		*posp = 0;
+
+	if (fill)
+		fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+
+	if (!parse_header(input, &skip)) {
+		error("invalid header");
+		goto exit_2;
+	}
+	in_buf += skip;
+
+	if (posp)
+		*posp = skip;
+
+	for (;;) {
+		/* read uncompressed block size */
+		dst_len = get_unaligned_be32(in_buf);
+		in_buf += 4;
+
+		/* exit if last block */
+		if (dst_len == 0) {
+			if (posp)
+				*posp += 4;
+			break;
+		}
+
+		if (dst_len > LZO_BLOCK_SIZE) {
+			error("dest len longer than block size");
+			goto exit_2;
+		}
+
+		/* read compressed block size, and skip block checksum info */
+		src_len = get_unaligned_be32(in_buf);
+		in_buf += 8;
+
+		if (src_len <= 0 || src_len > dst_len) {
+			error("file corrupted");
+			goto exit_2;
+		}
+
+		/* decompress */
+		tmp = dst_len;
+		r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+						out_buf, &tmp);
+
+		if (r != LZO_E_OK || dst_len != tmp) {
+			error("Compressed data violation");
+			goto exit_2;
+		}
+
+		obytes_processed += dst_len;
+		if (flush)
+			flush(out_buf, dst_len);
+		if (output)
+			out_buf += dst_len;
+		if (posp)
+			*posp += src_len + 12;
+		if (fill) {
+			in_buf = in_buf_save;
+			fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		} else
+			in_buf += src_len;
+	}
+
+exit_2:
+	if (!input)
+		free(in_buf);
+exit_1:
+	if (!output)
+		free(out_buf);
+exit:
+	return obytes_processed;
+}
+
+#define decompress unlzo
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 5dc6b29c1575..f2fd09850223 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -11,11 +11,13 @@
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
+#ifndef STATIC
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lzo.h>
-#include <asm/byteorder.h>
+#endif
+
 #include <asm/unaligned.h>
+#include <linux/lzo.h>
 #include "lzodefs.h"
 
 #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
@@ -244,9 +246,10 @@ lookbehind_overrun:
 	*out_len = op - out;
 	return LZO_E_LOOKBEHIND_OVERRUN;
 }
-
+#ifndef STATIC
 EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X Decompressor");
 
+#endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index cd815ac2a50b..0fe48cd91ffa 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -235,3 +235,8 @@ quiet_cmd_lzma = LZMA    $@
 cmd_lzma = (cat $(filter-out FORCE,$^) | \
 	lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
 	(rm -f $@ ; false)
+
+quiet_cmd_lzo = LZO    $@
+cmd_lzo = (cat $(filter-out FORCE,$^) | \
+	lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
-- 
cgit v1.2.3


From 80884094e34456887ecdbd107d40e72c4a40f9c9 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 8 Jan 2010 14:43:08 -0800
Subject: gpio: adp5588-gpio: new driver for ADP5588 GPIO expanders

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   9 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/adp5588-gpio.c | 266 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/adp5588.h |  12 ++
 4 files changed, 288 insertions(+)
 create mode 100644 drivers/gpio/adp5588-gpio.c

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index a019b49ecc9b..1f1d88ae68d6 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -172,6 +172,15 @@ config GPIO_ADP5520
 	  To compile this driver as a module, choose M here: the module will
 	  be called adp5520-gpio.
 
+config GPIO_ADP5588
+	tristate "ADP5588 I2C GPIO expander"
+	depends on I2C
+	help
+	  This option enables support for 18 GPIOs found
+	  on Analog Devices ADP5588 GPIO Expanders.
+	  To compile this driver as a module, choose M here: the module will be
+	  called adp5588-gpio.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_CS5535
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 52fe4cf734c7..48687238edb1 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
+obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c
new file mode 100644
index 000000000000..afc097a16b33
--- /dev/null
+++ b/drivers/gpio/adp5588-gpio.c
@@ -0,0 +1,266 @@
+/*
+ * GPIO Chip driver for Analog Devices
+ * ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c/adp5588.h>
+
+#define DRV_NAME		"adp5588-gpio"
+#define MAXGPIO			18
+#define ADP_BANK(offs)		((offs) >> 3)
+#define ADP_BIT(offs)		(1u << ((offs) & 0x7))
+
+struct adp5588_gpio {
+	struct i2c_client *client;
+	struct gpio_chip gpio_chip;
+	struct mutex lock;	/* protect cached dir, dat_out */
+	unsigned gpio_start;
+	uint8_t dat_out[3];
+	uint8_t dir[3];
+};
+
+static int adp5588_gpio_read(struct i2c_client *client, u8 reg)
+{
+	int ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Read Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	int ret = i2c_smbus_write_byte_data(client, reg, val);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Write Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off))
+		  & ADP_BIT(off));
+}
+
+static void adp5588_gpio_set_value(struct gpio_chip *chip,
+				   unsigned off, int val)
+{
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+			   dev->dat_out[bank]);
+	mutex_unlock(&dev->lock);
+}
+
+static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off)
+{
+	int ret;
+	unsigned bank;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] &= ~ADP_BIT(off);
+	ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int adp5588_gpio_direction_output(struct gpio_chip *chip,
+					 unsigned off, int val)
+{
+	int ret;
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] |= bit;
+
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	ret = adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+				 dev->dat_out[bank]);
+	ret |= adp5588_gpio_write(dev->client, GPIO_DIR1 + bank,
+				 dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int __devinit adp5588_gpio_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev;
+	struct gpio_chip *gc;
+	int ret, i, revid;
+
+	if (pdata == NULL) {
+		dev_err(&client->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		dev_err(&client->dev, "failed to alloc memory\n");
+		return -ENOMEM;
+	}
+
+	dev->client = client;
+
+	gc = &dev->gpio_chip;
+	gc->direction_input = adp5588_gpio_direction_input;
+	gc->direction_output = adp5588_gpio_direction_output;
+	gc->get = adp5588_gpio_get_value;
+	gc->set = adp5588_gpio_set_value;
+	gc->can_sleep = 1;
+
+	gc->base = pdata->gpio_start;
+	gc->ngpio = MAXGPIO;
+	gc->label = client->name;
+	gc->owner = THIS_MODULE;
+
+	mutex_init(&dev->lock);
+
+
+	ret = adp5588_gpio_read(dev->client, DEV_ID);
+	if (ret < 0)
+		goto err;
+
+	revid = ret & ADP5588_DEVICE_ID_MASK;
+
+	for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) {
+		dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i);
+		dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i);
+		ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0);
+		ret |= adp5588_gpio_write(client, GPIO_PULL1 + i,
+				(pdata->pullup_dis_mask >> (8 * i)) & 0xFF);
+
+		if (ret)
+			goto err;
+	}
+
+	ret = gpiochip_add(&dev->gpio_chip);
+	if (ret)
+		goto err;
+
+	dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n",
+			gc->base, gc->base + gc->ngpio - 1,
+			client->name, revid);
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, dev);
+	return 0;
+
+err:
+	kfree(dev);
+	return ret;
+}
+
+static int __devexit adp5588_gpio_remove(struct i2c_client *client)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev = i2c_get_clientdata(client);
+	int ret;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client,
+				      dev->gpio_chip.base, dev->gpio_chip.ngpio,
+				      pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "teardown failed %d\n", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&dev->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "gpiochip_remove failed %d\n", ret);
+		return ret;
+	}
+
+	kfree(dev);
+	return 0;
+}
+
+static const struct i2c_device_id adp5588_gpio_id[] = {
+	{DRV_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, adp5588_gpio_id);
+
+static struct i2c_driver adp5588_gpio_driver = {
+	.driver = {
+		   .name = DRV_NAME,
+		   },
+	.probe = adp5588_gpio_probe,
+	.remove = __devexit_p(adp5588_gpio_remove),
+	.id_table = adp5588_gpio_id,
+};
+
+static int __init adp5588_gpio_init(void)
+{
+	return i2c_add_driver(&adp5588_gpio_driver);
+}
+
+module_init(adp5588_gpio_init);
+
+static void __exit adp5588_gpio_exit(void)
+{
+	i2c_del_driver(&adp5588_gpio_driver);
+}
+
+module_exit(adp5588_gpio_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("GPIO ADP5588 Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index fc5db826b48e..02c9af374741 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -89,4 +89,16 @@ struct adp5588_kpad_platform_data {
 	unsigned short unlock_key2;	/* Unlock Key 2 */
 };
 
+struct adp5588_gpio_platform_data {
+	unsigned gpio_start;		/* GPIO Chip base # */
+	unsigned pullup_dis_mask;	/* Pull-Up Disable Mask */
+	int	(*setup)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	int	(*teardown)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	void	*context;
+};
+
 #endif
-- 
cgit v1.2.3


From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 10 Jan 2010 16:28:09 +0200
Subject: core, x86: make LIST_POISON less deadly

The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable
pointers in order to trap erronous use of freed list_heads.  Unfortunately
userspace can arrange for those pointers to actually be dereferencable,
potentially turning an oops to an expolit.

To avoid this allow architectures (currently x86_64 only) to override
the default values for these pointers with truly-undereferencable values.
This is easy on x86_64 as the virtual address space is large and contains
areas that cannot be mapped.

Other 64-bit architectures will likely find similar unmapped ranges.

[ingo: switch to 0xdead000000000000 as the unmapped area]
[ingo: add comments, cleanup]
[jaswinder: eliminate sparse warnings]

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig       |  5 +++++
 include/linux/poison.h | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6bf1f1ac478c..cbcbfdee3ee0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1247,6 +1247,11 @@ config ARCH_MEMORY_PROBE
 	def_bool X86_64
 	depends on MEMORY_HOTPLUG
 
+config ILLEGAL_POINTER_VALUE
+       hex
+       default 0 if X86_32
+       default 0xdead000000000000 if X86_64
+
 source "mm/Kconfig"
 
 config HIGHPTE
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 7fc194aef8c2..2110a81c5e2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -2,13 +2,25 @@
 #define _LINUX_POISON_H
 
 /********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
 /*
  * These are non-NULL pointers that will result in page faults
  * under normal circumstances, used to verify that nobody uses
  * non-initialized list entries.
  */
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
+#define LIST_POISON1  ((void *) 0x00100100 + POISON_POINTER_DELTA)
+#define LIST_POISON2  ((void *) 0x00200200 + POISON_POINTER_DELTA)
 
 /********** include/linux/timer.h **********/
 /*
-- 
cgit v1.2.3


From d218d11133d888f9745802146a50255a4781d37a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 11 Jan 2010 16:28:01 -0800
Subject: tcp: Generalized TTL Security Mechanism

This patch adds the kernel portions needed to implement
RFC 5082 Generalized TTL Security Mechanism (GTSM).
It is a lightweight security measure against forged
packets causing DoS attacks (for BGP).

This is already implemented the same way in BSD kernels.
For the necessary Quagga patch
  http://www.gossamer-threads.com/lists/quagga/dev/17389

Description from Cisco
  http://www.cisco.com/en/US/docs/ios/12_3t/12_3t7/feature/guide/gt_btsh.html

It does add one byte to each socket structure, but I did
a little rearrangement to reuse a hole (on 64 bit), but it
does grow the structure on 32 bit

This should be documented on ip(4) man page and the Glibc in.h
file also needs update.  IPV6_MINHOPLIMIT should also be added
(although BSD doesn't support that).

Only TCP is supported, but could also be added to UDP, DCCP, SCTP
if desired.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h      |  2 ++
 include/net/inet_sock.h |  4 +++-
 net/ipv4/ip_sockglue.c  | 14 +++++++++++++-
 net/ipv4/tcp_ipv4.c     |  3 +++
 4 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in.h b/include/linux/in.h
index b615649db129..583c76f9c30f 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -84,6 +84,8 @@ struct in_addr {
 #define IP_ORIGDSTADDR       20
 #define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
 
+#define IP_MINTTL       21
+
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
 #define IP_PMTUDISC_WANT		1	/* Use per route hints	*/
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index bd4c53f75ac0..83fd34437cf1 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -122,10 +122,12 @@ struct inet_sock {
 	__be32			inet_saddr;
 	__s16			uc_ttl;
 	__u16			cmsg_flags;
-	struct ip_options	*opt;
 	__be16			inet_sport;
 	__u16			inet_id;
+
+	struct ip_options	*opt;
 	__u8			tos;
+	__u8			min_ttl;
 	__u8			mc_ttl;
 	__u8			pmtudisc;
 	__u8			recverr:1,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cafad9baff03..644dc43a55de 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -451,7 +451,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_TTL) | (1<<IP_HDRINCL) |
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
-			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) ||
+			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
+			     (1<<IP_MINTTL))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -936,6 +937,14 @@ mc_msf_out:
 		inet->transparent = !!val;
 		break;
 
+	case IP_MINTTL:
+		if (optlen < 1)
+			goto e_inval;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		inet->min_ttl = val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -1198,6 +1207,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_TRANSPARENT:
 		val = inet->transparent;
 		break;
+	case IP_MINTTL:
+		val = inet->min_ttl;
+		break;
 	default:
 		release_sock(sk);
 		return -ENOPROTOOPT;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65b8ebfd078a..382f667238ec 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1649,6 +1649,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+	if (iph->ttl < inet_sk(sk)->min_ttl)
+		goto discard_and_relse;
+
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
-- 
cgit v1.2.3


From a393db6f10ef2d4f28257234cfc730e744dfb6a4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 22 Dec 2009 13:35:52 +0100
Subject: drbd: Allow online resizing of DRBD devices while peer not reachable
 (needs to be explicitly forced)

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  5 ++---
 drivers/block/drbd/drbd_nl.c       | 17 +++++++++++------
 drivers/block/drbd/drbd_receiver.c |  4 ++--
 include/linux/drbd_nl.h            |  1 +
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 79d8e22c4d0d..2bf3a6ef3684 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1371,10 +1371,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
-extern sector_t drbd_new_dev_size(struct drbd_conf *,
-		struct drbd_backing_dev *);
+extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local);
+extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
 extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3313901a4861..1292e0620663 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -510,7 +510,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local)
+enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, int force) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size;
@@ -541,7 +541,7 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho
 	/* TODO: should only be some assert here, not (re)init... */
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
-	size = drbd_new_dev_size(mdev, mdev->ldev);
+	size = drbd_new_dev_size(mdev, mdev->ldev, force);
 
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
@@ -596,7 +596,7 @@ out:
 }
 
 sector_t
-drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
 {
 	sector_t p_size = mdev->p_size;   /* partner's disk size. */
 	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
@@ -606,6 +606,11 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
 	m_size = drbd_get_max_capacity(bdev);
 
+	if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
+		dev_warn(DEV, "Resize while not connected was forced by the user!\n");
+		p_size = m_size;
+	}
+
 	if (p_size && m_size) {
 		size = min_t(sector_t, p_size, m_size);
 	} else {
@@ -965,7 +970,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	/* Prevent shrinking of consistent devices ! */
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
-	   drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) {
+	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
 		retcode = ERR_DISK_TO_SMALL;
 		goto force_diskless_dec;
@@ -1052,7 +1057,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1504,7 +1509,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	}
 
 	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, rs.resize_force);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e3716fadc6a5..f22a5283128a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2870,7 +2870,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 
 		/* Never shrink a device with usable data during connect.
 		   But allow online shrinking if we are connected. */
-		if (drbd_new_dev_size(mdev, mdev->ldev) <
+		if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
 		   drbd_get_capacity(mdev->this_bdev) &&
 		   mdev->state.disk >= D_OUTDATED &&
 		   mdev->state.conn < C_CONNECTED) {
@@ -2885,7 +2885,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 #undef min_not_zero
 
 	if (get_ldev(mdev)) {
-		dd = drbd_determin_dev_size(mdev);
+	  dd = drbd_determin_dev_size(mdev, 0);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
 			return FALSE;
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index db5721ad50d1..a4d82f895994 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -69,6 +69,7 @@ NL_PACKET(disconnect, 6, )
 
 NL_PACKET(resize, 7,
 	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
+	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
 )
 
 NL_PACKET(syncer_conf, 8,
-- 
cgit v1.2.3


From 2ebccd71a71e6078920bc65b40f120e72b71c2b6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 12 Jan 2010 10:09:07 +0100
Subject: drbd: The kernel code is now equivalent to out of tree release 8.3.7

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index e84f4733cb55..78962272338a 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.6"
+#define REL_VERSION "8.3.7"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 91
-- 
cgit v1.2.3


From 3ccd4c6167d3b39d52631767ebbf8b5677c5855d Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Tue, 12 Jan 2010 02:00:46 -0800
Subject: can: Unify droping of invalid tx skbs and netdev stats

To prevent the CAN drivers to operate on invalid socketbuffers the skbs are
now checked and silently dropped at the xmit-function consistently.

Also the netdev stats are consistently using the CAN data length code (dlc)
for [rx|tx]_bytes now.

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        |  3 +++
 drivers/net/can/bfin_can.c        |  3 +++
 drivers/net/can/mcp251x.c         |  6 +-----
 drivers/net/can/mscan/mscan.c     |  5 +----
 drivers/net/can/sja1000/sja1000.c |  3 +++
 drivers/net/can/ti_hecc.c         |  4 +++-
 drivers/net/can/usb/ems_usb.c     |  3 +++
 drivers/net/can/vcan.c            | 12 +++++++++---
 include/linux/can/dev.h           | 15 +++++++++++++++
 9 files changed, 41 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 166cc7e579c0..f7287497ba6e 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -342,6 +342,9 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	unsigned int mb, prio;
 	u32 reg_mid, reg_mcr;
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
 	mb = get_tx_next_mb(priv);
 	prio = get_tx_next_prio(priv);
 
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 0ec1524523cc..7e1926e79e98 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -318,6 +318,9 @@ static int bfin_can_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 val;
 	int i;
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
 	netif_stop_queue(dev);
 
 	/* fill id */
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 1a72ca066a17..afa2fa45fed9 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -494,12 +494,8 @@ static netdev_tx_t mcp251x_hard_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	if (skb->len != sizeof(struct can_frame)) {
-		dev_err(&spi->dev, "dropping packet - bad length\n");
-		dev_kfree_skb(skb);
-		net->stats.tx_dropped++;
+	if (can_dropped_invalid_skb(net, skb))
 		return NETDEV_TX_OK;
-	}
 
 	netif_stop_queue(net);
 	priv->tx_skb = skb;
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index 500d18918bd5..40827c128b65 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -204,11 +204,8 @@ static netdev_tx_t mscan_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i, rtr, buf_id;
 	u32 can_id;
 
-	if (skb->len != sizeof(*frame) || frame->can_dlc > 8) {
-		kfree_skb(skb);
-		dev->stats.tx_dropped++;
+	if (can_dropped_invalid_skb(dev, skb))
 		return NETDEV_TX_OK;
-	}
 
 	out_8(&regs->cantier, 0);
 
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 542a4f7255b4..345304d779b9 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -249,6 +249,9 @@ static netdev_tx_t sja1000_start_xmit(struct sk_buff *skb,
 	uint8_t dreg;
 	int i;
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
 	netif_stop_queue(dev);
 
 	fi = dlc = cf->can_dlc;
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 5c993c2da528..7d370e32a7a8 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -477,6 +477,9 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
 	u32 mbxno, mbx_mask, data;
 	unsigned long flags;
 
+	if (can_dropped_invalid_skb(ndev, skb))
+		return NETDEV_TX_OK;
+
 	mbxno = get_tx_head_mb(priv);
 	mbx_mask = BIT(mbxno);
 	spin_lock_irqsave(&priv->mbx_lock, flags);
@@ -491,7 +494,6 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
 	spin_unlock_irqrestore(&priv->mbx_lock, flags);
 
 	/* Prepare mailbox for transmission */
-	data = min_t(u8, cf->can_dlc, 8);
 	if (cf->can_id & CAN_RTR_FLAG) /* Remote transmission request */
 		data |= HECC_CANMCF_RTR;
 	data |= get_tx_head_prio(priv) << 8;
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index efbb05c71bf4..ddb17e256656 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -767,6 +767,9 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne
 	size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN
 			+ sizeof(struct cpc_can_msg);
 
+	if (can_dropped_invalid_skb(netdev, skb))
+		return NETDEV_TX_OK;
+
 	/* create a URB, and a buffer for it, and copy the data to the URB */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index 80ac56313981..d124d837ae58 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -47,6 +47,7 @@
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/can.h>
+#include <linux/can/dev.h>
 #include <net/rtnetlink.h>
 
 static __initdata const char banner[] =
@@ -70,10 +71,11 @@ MODULE_PARM_DESC(echo, "Echo sent frames (for testing). Default: 0 (Off)");
 
 static void vcan_rx(struct sk_buff *skb, struct net_device *dev)
 {
+	struct can_frame *cf = (struct can_frame *)skb->data;
 	struct net_device_stats *stats = &dev->stats;
 
 	stats->rx_packets++;
-	stats->rx_bytes += skb->len;
+	stats->rx_bytes += cf->can_dlc;
 
 	skb->protocol  = htons(ETH_P_CAN);
 	skb->pkt_type  = PACKET_BROADCAST;
@@ -85,11 +87,15 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev)
 
 static netdev_tx_t vcan_tx(struct sk_buff *skb, struct net_device *dev)
 {
+	struct can_frame *cf = (struct can_frame *)skb->data;
 	struct net_device_stats *stats = &dev->stats;
 	int loop;
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
 	stats->tx_packets++;
-	stats->tx_bytes += skb->len;
+	stats->tx_bytes += cf->can_dlc;
 
 	/* set flag whether this packet has to be looped back */
 	loop = skb->pkt_type == PACKET_LOOPBACK;
@@ -103,7 +109,7 @@ static netdev_tx_t vcan_tx(struct sk_buff *skb, struct net_device *dev)
 			 * CAN core already did the echo for us
 			 */
 			stats->rx_packets++;
-			stats->rx_bytes += skb->len;
+			stats->rx_bytes += cf->can_dlc;
 		}
 		kfree_skb(skb);
 		return NETDEV_TX_OK;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 3db7767d2a17..7e7c98a3e908 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -60,6 +60,21 @@ struct can_priv {
  */
 #define get_can_dlc(i)	(min_t(__u8, (i), 8))
 
+/* Drop a given socketbuffer if it does not contain a valid CAN frame. */
+static inline int can_dropped_invalid_skb(struct net_device *dev,
+					  struct sk_buff *skb)
+{
+	const struct can_frame *cf = (struct can_frame *)skb->data;
+
+	if (unlikely(skb->len != sizeof(*cf) || cf->can_dlc > 8)) {
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return 1;
+	}
+
+	return 0;
+}
+
 struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
 void free_candev(struct net_device *dev);
 
-- 
cgit v1.2.3


From 81077e82c3f591578625805dd6464a27a9ff56ec Mon Sep 17 00:00:00 2001
From: Lukáš Turek <8an@praha12.net>
Date: Mon, 21 Dec 2009 22:50:47 +0100
Subject: nl80211: Add new WIPHY attribute COVERAGE_CLASS

The new attribute NL80211_ATTR_WIPHY_COVERAGE_CLASS sets IEEE 802.11
Coverage Class, which depends on maximum distance of nodes in a
wireless network. It's required for long distance links (more than a few
hundred meters).

The attribute is now ignored by two non-mac80211 drivers, rndis and
iwmc3200wifi, together with WIPHY_PARAM_RETRY_SHORT and
WIPHY_PARAM_RETRY_LONG. If it turns out to be a problem, we could split
set_wiphy_params callback or add new capability bits.

Signed-off-by: Lukas Turek <8an@praha12.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 ++++
 include/net/cfg80211.h  |  2 ++
 net/wireless/core.c     |  1 +
 net/wireless/nl80211.c  | 15 +++++++++++++++
 4 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2bfbe88837ef..d4c556de7170 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -430,6 +430,8 @@ enum nl80211_commands {
  * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length
  *	larger than or equal to this use RTS/CTS handshake); allowed range:
  *	0..65536, disable with (u32)-1; dot11RTSThreshold; u32
+ * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11
+ *	section 7.3.2.9; dot11CoverageClass; u8
  *
  * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on
  * @NL80211_ATTR_IFNAME: network interface name
@@ -779,6 +781,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_COOKIE,
 
+	NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index add79930f47d..a8d5d04314b9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -837,6 +837,7 @@ enum wiphy_params_flags {
 	WIPHY_PARAM_RETRY_LONG		= 1 << 1,
 	WIPHY_PARAM_FRAG_THRESHOLD	= 1 << 2,
 	WIPHY_PARAM_RTS_THRESHOLD	= 1 << 3,
+	WIPHY_PARAM_COVERAGE_CLASS	= 1 << 4,
 };
 
 /**
@@ -1236,6 +1237,7 @@ struct wiphy {
 	u8 retry_long;
 	u32 frag_threshold;
 	u32 rts_threshold;
+	u8 coverage_class;
 
 	char fw_version[ETHTOOL_BUSINFO_LEN];
 	u32 hw_version;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index c2a2c563d21a..0a545bb6ed05 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -402,6 +402,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	rdev->wiphy.retry_long = 4;
 	rdev->wiphy.frag_threshold = (u32) -1;
 	rdev->wiphy.rts_threshold = (u32) -1;
+	rdev->wiphy.coverage_class = 0;
 
 	return &rdev->wiphy;
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e3bee3cecdfa..c09fbcd278fb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -69,6 +69,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 },
 	[NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 },
 
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -444,6 +445,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		    dev->wiphy.frag_threshold);
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
 		    dev->wiphy.rts_threshold);
+	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+		    dev->wiphy.coverage_class);
 
 	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
 		   dev->wiphy.max_scan_ssids);
@@ -684,6 +687,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	u32 changed;
 	u8 retry_short = 0, retry_long = 0;
 	u32 frag_threshold = 0, rts_threshold = 0;
+	u8 coverage_class = 0;
 
 	rtnl_lock();
 
@@ -806,9 +810,16 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		changed |= WIPHY_PARAM_RTS_THRESHOLD;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) {
+		coverage_class = nla_get_u8(
+			info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]);
+		changed |= WIPHY_PARAM_COVERAGE_CLASS;
+	}
+
 	if (changed) {
 		u8 old_retry_short, old_retry_long;
 		u32 old_frag_threshold, old_rts_threshold;
+		u8 old_coverage_class;
 
 		if (!rdev->ops->set_wiphy_params) {
 			result = -EOPNOTSUPP;
@@ -819,6 +830,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		old_retry_long = rdev->wiphy.retry_long;
 		old_frag_threshold = rdev->wiphy.frag_threshold;
 		old_rts_threshold = rdev->wiphy.rts_threshold;
+		old_coverage_class = rdev->wiphy.coverage_class;
 
 		if (changed & WIPHY_PARAM_RETRY_SHORT)
 			rdev->wiphy.retry_short = retry_short;
@@ -828,6 +840,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			rdev->wiphy.frag_threshold = frag_threshold;
 		if (changed & WIPHY_PARAM_RTS_THRESHOLD)
 			rdev->wiphy.rts_threshold = rts_threshold;
+		if (changed & WIPHY_PARAM_COVERAGE_CLASS)
+			rdev->wiphy.coverage_class = coverage_class;
 
 		result = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
 		if (result) {
@@ -835,6 +849,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			rdev->wiphy.retry_long = old_retry_long;
 			rdev->wiphy.frag_threshold = old_frag_threshold;
 			rdev->wiphy.rts_threshold = old_rts_threshold;
+			rdev->wiphy.coverage_class = old_coverage_class;
 		}
 	}
 
-- 
cgit v1.2.3


From 13ae75b103e07304a34ab40c9136e9f53e06475c Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 29 Dec 2009 12:59:45 +0200
Subject: nl80211: New command for setting TX rate mask for rate control

Add a new NL80211_CMD_SET_TX_BITRATE_MASK command and related
attributes to provide support for setting TX rate mask for rate
control. This uses the existing cfg80211 set_bitrate_mask operation
that was previously used only with WEXT compat code (SIOCSIWRATE). The
nl80211 command allows more generic configuration of allowed rates as
a mask instead of fixed/max rate.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  44 +++++++++++++++++++
 include/net/cfg80211.h  |   4 +-
 net/wireless/nl80211.c  | 111 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 157 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index d4c556de7170..7a1c8c145b22 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -295,6 +295,10 @@
  *	This command is also used as an event to notify when a requested
  *	remain-on-channel duration has expired.
  *
+ * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX
+ *	rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface
+ *	and @NL80211_ATTR_TX_RATES the set of allowed rates.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -381,6 +385,8 @@ enum nl80211_commands {
 	NL80211_CMD_REMAIN_ON_CHANNEL,
 	NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
 
+	NL80211_CMD_SET_TX_BITRATE_MASK,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -640,6 +646,13 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects.
  *
+ * @NL80211_ATTR_TX_RATES: Nested set of attributes
+ *	(enum nl80211_tx_rate_attributes) describing TX rates per band. The
+ *	enum nl80211_band value is used as the index (nla_type() of the nested
+ *	data. If a band is not included, it will be configured to allow all
+ *	rates based on negotiated supported rates information. This attribute
+ *	is used with %NL80211_CMD_SET_TX_BITRATE_MASK.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -783,6 +796,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_COVERAGE_CLASS,
 
+	NL80211_ATTR_TX_RATES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1482,4 +1497,33 @@ enum nl80211_key_attributes {
 	NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_tx_rate_attributes - TX rate set attributes
+ * @__NL80211_TXRATE_INVALID: invalid
+ * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection
+ *	in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with
+ *	1 = 500 kbps) but without the IE length restriction (at most
+ *	%NL80211_MAX_SUPP_RATES in a single array).
+ * @__NL80211_TXRATE_AFTER_LAST: internal
+ * @NL80211_TXRATE_MAX: highest TX rate attribute
+ */
+enum nl80211_tx_rate_attributes {
+	__NL80211_TXRATE_INVALID,
+	NL80211_TXRATE_LEGACY,
+
+	/* keep last */
+	__NL80211_TXRATE_AFTER_LAST,
+	NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_band - Frequency band
+ * @NL80211_BAND_2GHZ - 2.4 GHz ISM band
+ * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz)
+ */
+enum nl80211_band {
+	NL80211_BAND_2GHZ,
+	NL80211_BAND_5GHZ,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 22e062afb5a1..0d734413b5fb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -39,8 +39,8 @@
  * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7)
  */
 enum ieee80211_band {
-	IEEE80211_BAND_2GHZ,
-	IEEE80211_BAND_5GHZ,
+	IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ,
+	IEEE80211_BAND_5GHZ = NL80211_BAND_5GHZ,
 
 	/* keep last */
 	IEEE80211_NUM_BANDS
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c09fbcd278fb..b804062e0179 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -144,6 +144,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 				 .len = WLAN_PMKID_LEN },
 	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
 	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
+	[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
 };
 
 /* policy for the attributes */
@@ -575,6 +576,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(del_pmksa, DEL_PMKSA);
 	CMD(flush_pmksa, FLUSH_PMKSA);
 	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
+	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -4438,6 +4440,109 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb,
 	return err;
 }
 
+static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
+			   u8 *rates, u8 rates_len)
+{
+	u8 i;
+	u32 mask = 0;
+
+	for (i = 0; i < rates_len; i++) {
+		int rate = (rates[i] & 0x7f) * 5;
+		int ridx;
+		for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
+			struct ieee80211_rate *srate =
+				&sband->bitrates[ridx];
+			if (rate == srate->bitrate) {
+				mask |= 1 << ridx;
+				break;
+			}
+		}
+		if (ridx == sband->n_bitrates)
+			return 0; /* rate not found */
+	}
+
+	return mask;
+}
+
+static struct nla_policy
+nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] __read_mostly = {
+	[NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
+				    .len = NL80211_MAX_SUPP_RATES },
+};
+
+static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct nlattr *tb[NL80211_TXRATE_MAX + 1];
+	struct cfg80211_registered_device *rdev;
+	struct cfg80211_bitrate_mask mask;
+	int err, rem, i;
+	struct net_device *dev;
+	struct nlattr *tx_rates;
+	struct ieee80211_supported_band *sband;
+
+	if (info->attrs[NL80211_ATTR_TX_RATES] == NULL)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->set_bitrate_mask) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	memset(&mask, 0, sizeof(mask));
+	/* Default to all rates enabled */
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		sband = rdev->wiphy.bands[i];
+		mask.control[i].legacy =
+			sband ? (1 << sband->n_bitrates) - 1 : 0;
+	}
+
+	/*
+	 * The nested attribute uses enum nl80211_band as the index. This maps
+	 * directly to the enum ieee80211_band values used in cfg80211.
+	 */
+	nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem)
+	{
+		enum ieee80211_band band = nla_type(tx_rates);
+		if (band < 0 || band >= IEEE80211_NUM_BANDS) {
+			err = -EINVAL;
+			goto unlock;
+		}
+		sband = rdev->wiphy.bands[band];
+		if (sband == NULL) {
+			err = -EINVAL;
+			goto unlock;
+		}
+		nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
+			  nla_len(tx_rates), nl80211_txattr_policy);
+		if (tb[NL80211_TXRATE_LEGACY]) {
+			mask.control[band].legacy = rateset_to_mask(
+				sband,
+				nla_data(tb[NL80211_TXRATE_LEGACY]),
+				nla_len(tb[NL80211_TXRATE_LEGACY]));
+			if (mask.control[band].legacy == 0) {
+				err = -EINVAL;
+				goto unlock;
+			}
+		}
+	}
+
+	err = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask);
+
+ unlock:
+	dev_put(dev);
+	cfg80211_unlock_rdev(rdev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4712,6 +4817,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
+		.doit = nl80211_set_tx_bitrate_mask,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
-- 
cgit v1.2.3


From 7044cc565b45a898c140fb185174a66f2d68a163 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 5 Jan 2010 20:16:19 +0200
Subject: mac80211: add functions to create PS Poll and Nullfunc templates

Some hardware, for example wl1251 and wl1271, handle the transmission
of power save related frames in hardware, but the driver is responsible
for creating the templates. It's better to create the templates in mac80211,
that way all drivers can benefit from this.

Add two new functions, ieee80211_pspoll_get() and ieee80211_nullfunc_get()
which drivers need to call to get the frame. Drivers are also responsible
for updating the templates after each association.

Also new struct ieee80211_hdr_3addr is added to ieee80211.h to make it
easy to calculate length of the Nullfunc frame.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  9 ++++++
 include/net/mac80211.h    | 30 ++++++++++++++++++
 net/mac80211/tx.c         | 78 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index aeea282bd2fe..602c0692c3fc 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -130,6 +130,15 @@ struct ieee80211_hdr {
 	u8 addr4[6];
 } __attribute__ ((packed));
 
+struct ieee80211_hdr_3addr {
+	__le16 frame_control;
+	__le16 duration_id;
+	u8 addr1[6];
+	u8 addr2[6];
+	u8 addr3[6];
+	__le16 seq_ctrl;
+} __attribute__ ((packed));
+
 /**
  * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set
  * @fc: frame control bytes in little-endian byteorder
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7e5af6d90b93..75f46e26ad60 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1874,6 +1874,36 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	return ieee80211_beacon_get_tim(hw, vif, NULL, NULL);
 }
 
+/**
+ * ieee80211_pspoll_get - retrieve a PS Poll template
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * Creates a PS Poll a template which can, for example, uploaded to
+ * hardware. The template must be updated after association so that correct
+ * AID, BSSID and MAC address is used.
+ *
+ * Note: Caller (or hardware) is responsible for setting the
+ * &IEEE80211_FCTL_PM bit.
+ */
+struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif);
+
+/**
+ * ieee80211_nullfunc_get - retrieve a nullfunc template
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * Creates a Nullfunc template which can, for example, uploaded to
+ * hardware. The template must be updated after association so that correct
+ * BSSID and address is used.
+ *
+ * Note: Caller (or hardware) is responsible for setting the
+ * &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields.
+ */
+struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif);
+
 /**
  * ieee80211_rts_get - RTS frame generation function
  * @hw: pointer obtained from ieee80211_alloc_hw().
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d3a44812f8bf..055b45b146d9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2200,6 +2200,84 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_beacon_get_tim);
 
+struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+	struct ieee80211_pspoll *pspoll;
+	struct ieee80211_local *local;
+	struct sk_buff *skb;
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
+		return NULL;
+
+	sdata = vif_to_sdata(vif);
+	ifmgd = &sdata->u.mgd;
+	local = sdata->local;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll));
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for "
+		       "pspoll template\n", sdata->name);
+		return NULL;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll));
+	memset(pspoll, 0, sizeof(*pspoll));
+	pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
+					    IEEE80211_STYPE_PSPOLL);
+	pspoll->aid = cpu_to_le16(ifmgd->aid);
+
+	/* aid in PS-Poll has its two MSBs each set to 1 */
+	pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
+
+	memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN);
+	memcpy(pspoll->ta, vif->addr, ETH_ALEN);
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_pspoll_get);
+
+struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif)
+{
+	struct ieee80211_hdr_3addr *nullfunc;
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+	struct ieee80211_local *local;
+	struct sk_buff *skb;
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
+		return NULL;
+
+	sdata = vif_to_sdata(vif);
+	ifmgd = &sdata->u.mgd;
+	local = sdata->local;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc));
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
+		       "template\n", sdata->name);
+		return NULL;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	nullfunc = (struct ieee80211_hdr_3addr *) skb_put(skb,
+							  sizeof(*nullfunc));
+	memset(nullfunc, 0, sizeof(*nullfunc));
+	nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
+					      IEEE80211_STYPE_NULLFUNC |
+					      IEEE80211_FCTL_TODS);
+	memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
+	memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
+	memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN);
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_nullfunc_get);
+
 void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       const void *frame, size_t frame_len,
 		       const struct ieee80211_tx_info *frame_txctl,
-- 
cgit v1.2.3


From 34a6eddbabd704b3c7dae9362234552267573be2 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Wed, 6 Jan 2010 16:19:24 +0200
Subject: cfg80211: Store IEs from both Beacon and Probe Response frames

Store information elements from Beacon and Probe Response frames in
separate buffers to allow both sets to be made available through
nl80211. This allows user space applications to get access to IEs from
Beacon frames even if we have received Probe Response frames from the
BSS. Previously, the IEs from Probe Response frames would have
overridden the IEs from Beacon frames.

This feature is of somewhat limited use since most protocols include
the same (or extended) information in Probe Response frames. However,
there are couple of exceptions where the IEs from Beacon frames could
be of some use: TIM IE is only included in Beacon frames (and it would
be needed to figure out the DTIM period used in the BSS) and at least
some implementations of Wireless Provisioning Services seem to include
the full IE only in Beacon frames).

The new BSS attribute for scan results is added to allow both the IE
sets to be delivered. This is done in a way that maintains the
previously used behavior for applications that are not aware of the
new NL80211_BSS_BEACON_IES attribute.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  10 +++-
 include/net/cfg80211.h  |  12 ++++-
 net/wireless/core.h     |   3 +-
 net/wireless/nl80211.c  |   4 ++
 net/wireless/scan.c     | 120 ++++++++++++++++++++++++++++++++++++------------
 5 files changed, 116 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 7a1c8c145b22..127a73015760 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1378,13 +1378,20 @@ enum nl80211_channel_type {
  * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16)
  * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16)
  * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the
- *	raw information elements from the probe response/beacon (bin)
+ *	raw information elements from the probe response/beacon (bin);
+ *	if the %NL80211_BSS_BEACON_IES attribute is present, the IEs here are
+ *	from a Probe Response frame; otherwise they are from a Beacon frame.
+ *	However, if the driver does not indicate the source of the IEs, these
+ *	IEs may be from either frame subtype.
  * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon
  *	in mBm (100 * dBm) (s32)
  * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
  *	in unspecified units, scaled to 0..100 (u8)
  * @NL80211_BSS_STATUS: status, if this BSS is "used"
  * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms
+ * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information
+ *	elements from a Beacon frame (bin); not present if no Beacon frame has
+ *	yet been received
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -1400,6 +1407,7 @@ enum nl80211_bss {
 	NL80211_BSS_SIGNAL_UNSPEC,
 	NL80211_BSS_STATUS,
 	NL80211_BSS_SEEN_MS_AGO,
+	NL80211_BSS_BEACON_IES,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0d734413b5fb..2af52704e670 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -626,8 +626,14 @@ enum cfg80211_signal_type {
  * @beacon_interval: the beacon interval as from the frame
  * @capability: the capability field in host byte order
  * @information_elements: the information elements (Note that there
- *	is no guarantee that these are well-formed!)
+ *	is no guarantee that these are well-formed!); this is a pointer to
+ *	either the beacon_ies or proberesp_ies depending on whether Probe
+ *	Response frame has been received
  * @len_information_elements: total length of the information elements
+ * @beacon_ies: the information elements from the last Beacon frame
+ * @len_beacon_ies: total length of the beacon_ies
+ * @proberesp_ies: the information elements from the last Probe Response frame
+ * @len_proberesp_ies: total length of the proberesp_ies
  * @signal: signal strength value (type depends on the wiphy's signal_type)
  * @free_priv: function pointer to free private data
  * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
@@ -641,6 +647,10 @@ struct cfg80211_bss {
 	u16 capability;
 	u8 *information_elements;
 	size_t len_information_elements;
+	u8 *beacon_ies;
+	size_t len_beacon_ies;
+	u8 *proberesp_ies;
+	size_t len_proberesp_ies;
 
 	s32 signal;
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 30ec95f05b52..2d6a6b9c0c43 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -111,7 +111,8 @@ struct cfg80211_internal_bss {
 	unsigned long ts;
 	struct kref ref;
 	atomic_t hold;
-	bool ies_allocated;
+	bool beacon_ies_allocated;
+	bool proberesp_ies_allocated;
 
 	/* must be last because of priv member */
 	struct cfg80211_bss pub;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b804062e0179..4af7991a9ec8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3163,6 +3163,10 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS,
 			res->len_information_elements,
 			res->information_elements);
+	if (res->beacon_ies && res->len_beacon_ies &&
+	    res->beacon_ies != res->information_elements)
+		NLA_PUT(msg, NL80211_BSS_BEACON_IES,
+			res->len_beacon_ies, res->beacon_ies);
 	if (res->tsf)
 		NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf);
 	if (res->beacon_interval)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0c2cbbebca95..06b0231ee5e3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -100,8 +100,10 @@ static void bss_release(struct kref *ref)
 	if (bss->pub.free_priv)
 		bss->pub.free_priv(&bss->pub);
 
-	if (bss->ies_allocated)
-		kfree(bss->pub.information_elements);
+	if (bss->beacon_ies_allocated)
+		kfree(bss->pub.beacon_ies);
+	if (bss->proberesp_ies_allocated)
+		kfree(bss->pub.proberesp_ies);
 
 	BUG_ON(atomic_read(&bss->hold));
 
@@ -375,8 +377,7 @@ rb_find_bss(struct cfg80211_registered_device *dev,
 
 static struct cfg80211_internal_bss *
 cfg80211_bss_update(struct cfg80211_registered_device *dev,
-		    struct cfg80211_internal_bss *res,
-		    bool overwrite)
+		    struct cfg80211_internal_bss *res)
 {
 	struct cfg80211_internal_bss *found = NULL;
 	const u8 *meshid, *meshcfg;
@@ -418,28 +419,64 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 		found->pub.capability = res->pub.capability;
 		found->ts = res->ts;
 
-		/* overwrite IEs */
-		if (overwrite) {
+		/* Update IEs */
+		if (res->pub.proberesp_ies) {
 			size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
-			size_t ielen = res->pub.len_information_elements;
+			size_t ielen = res->pub.len_proberesp_ies;
+
+			if (found->pub.proberesp_ies &&
+			    !found->proberesp_ies_allocated &&
+			    ksize(found) >= used + ielen) {
+				memcpy(found->pub.proberesp_ies,
+				       res->pub.proberesp_ies, ielen);
+				found->pub.len_proberesp_ies = ielen;
+			} else {
+				u8 *ies = found->pub.proberesp_ies;
+
+				if (found->proberesp_ies_allocated)
+					ies = krealloc(ies, ielen, GFP_ATOMIC);
+				else
+					ies = kmalloc(ielen, GFP_ATOMIC);
+
+				if (ies) {
+					memcpy(ies, res->pub.proberesp_ies,
+					       ielen);
+					found->proberesp_ies_allocated = true;
+					found->pub.proberesp_ies = ies;
+					found->pub.len_proberesp_ies = ielen;
+				}
+			}
 
-			if (!found->ies_allocated && ksize(found) >= used + ielen) {
-				memcpy(found->pub.information_elements,
-				       res->pub.information_elements, ielen);
-				found->pub.len_information_elements = ielen;
+			/* Override possible earlier Beacon frame IEs */
+			found->pub.information_elements =
+				found->pub.proberesp_ies;
+			found->pub.len_information_elements =
+				found->pub.len_proberesp_ies;
+		}
+		if (res->pub.beacon_ies) {
+			size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
+			size_t ielen = res->pub.len_beacon_ies;
+
+			if (found->pub.beacon_ies &&
+			    !found->beacon_ies_allocated &&
+			    ksize(found) >= used + ielen) {
+				memcpy(found->pub.beacon_ies,
+				       res->pub.beacon_ies, ielen);
+				found->pub.len_beacon_ies = ielen;
 			} else {
-				u8 *ies = found->pub.information_elements;
+				u8 *ies = found->pub.beacon_ies;
 
-				if (found->ies_allocated)
+				if (found->beacon_ies_allocated)
 					ies = krealloc(ies, ielen, GFP_ATOMIC);
 				else
 					ies = kmalloc(ielen, GFP_ATOMIC);
 
 				if (ies) {
-					memcpy(ies, res->pub.information_elements, ielen);
-					found->ies_allocated = true;
-					found->pub.information_elements = ies;
-					found->pub.len_information_elements = ielen;
+					memcpy(ies, res->pub.beacon_ies,
+					       ielen);
+					found->beacon_ies_allocated = true;
+					found->pub.beacon_ies = ies;
+					found->pub.len_beacon_ies = ielen;
 				}
 			}
 		}
@@ -489,14 +526,26 @@ cfg80211_inform_bss(struct wiphy *wiphy,
 	res->pub.tsf = timestamp;
 	res->pub.beacon_interval = beacon_interval;
 	res->pub.capability = capability;
-	/* point to after the private area */
-	res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
-	memcpy(res->pub.information_elements, ie, ielen);
-	res->pub.len_information_elements = ielen;
+	/*
+	 * Since we do not know here whether the IEs are from a Beacon or Probe
+	 * Response frame, we need to pick one of the options and only use it
+	 * with the driver that does not provide the full Beacon/Probe Response
+	 * frame. Use Beacon frame pointer to avoid indicating that this should
+	 * override the information_elements pointer should we have received an
+	 * earlier indication of Probe Response data.
+	 *
+	 * The initial buffer for the IEs is allocated with the BSS entry and
+	 * is located after the private area.
+	 */
+	res->pub.beacon_ies = (u8 *)res + sizeof(*res) + privsz;
+	memcpy(res->pub.beacon_ies, ie, ielen);
+	res->pub.len_beacon_ies = ielen;
+	res->pub.information_elements = res->pub.beacon_ies;
+	res->pub.len_information_elements = res->pub.len_beacon_ies;
 
 	kref_init(&res->ref);
 
-	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, 0);
+	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res);
 	if (!res)
 		return NULL;
 
@@ -517,7 +566,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	struct cfg80211_internal_bss *res;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
 				      u.probe_resp.variable);
-	bool overwrite;
 	size_t privsz = wiphy->bss_priv_size;
 
 	if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC &&
@@ -538,16 +586,28 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
 	res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
-	/* point to after the private area */
-	res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
-	memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen);
-	res->pub.len_information_elements = ielen;
+	/*
+	 * The initial buffer for the IEs is allocated with the BSS entry and
+	 * is located after the private area.
+	 */
+	if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+		res->pub.proberesp_ies = (u8 *) res + sizeof(*res) + privsz;
+		memcpy(res->pub.proberesp_ies, mgmt->u.probe_resp.variable,
+		       ielen);
+		res->pub.len_proberesp_ies = ielen;
+		res->pub.information_elements = res->pub.proberesp_ies;
+		res->pub.len_information_elements = res->pub.len_proberesp_ies;
+	} else {
+		res->pub.beacon_ies = (u8 *) res + sizeof(*res) + privsz;
+		memcpy(res->pub.beacon_ies, mgmt->u.beacon.variable, ielen);
+		res->pub.len_beacon_ies = ielen;
+		res->pub.information_elements = res->pub.beacon_ies;
+		res->pub.len_information_elements = res->pub.len_beacon_ies;
+	}
 
 	kref_init(&res->ref);
 
-	overwrite = ieee80211_is_probe_resp(mgmt->frame_control);
-
-	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite);
+	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res);
 	if (!res)
 		return NULL;
 
-- 
cgit v1.2.3


From ab13315af97919fae0e014748105fdc2e30afb2d Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 12 Jan 2010 10:42:31 +0200
Subject: mac80211: add U-APSD client support

Add Unscheduled Automatic Power-Save Delivery (U-APSD) client support. The
idea is that the data frames from the client trigger AP to send the buffered
frames with ACs which have U-APSD enabled. This decreases latency and makes it
possible to save even more power.

Driver needs to use IEEE80211_HW_UAPSD to enable the feature. The current
implementation assumes that firmware takes care of the wakeup and
hardware needing IEEE80211_HW_PS_NULLFUNC_STACK is not yet supported.

Tested with wl1251 on a Nokia N900 and Cisco Aironet 1231G AP and running
various test traffic with ping.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 18 ++++++++++++++++++
 include/net/mac80211.h     |  7 +++++++
 net/mac80211/cfg.c         |  7 +++++++
 net/mac80211/ieee80211_i.h | 13 ++++++++++++-
 net/mac80211/main.c        |  4 ++++
 net/mac80211/mlme.c        | 31 ++++++++++++++++++++++++++++---
 net/mac80211/scan.c        | 18 ++++++++++++++++++
 net/mac80211/util.c        |  2 ++
 net/mac80211/work.c        | 12 ++++++++++--
 9 files changed, 106 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 602c0692c3fc..a8c6069a0d9f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -120,6 +120,24 @@
 #define IEEE80211_QOS_CTL_TID_MASK	0x000F
 #define IEEE80211_QOS_CTL_TAG1D_MASK	0x0007
 
+/* U-APSD queue for WMM IEs sent by AP */
+#define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD	(1<<7)
+
+/* U-APSD queues for WMM IEs sent by STA */
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VO	(1<<0)
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VI	(1<<1)
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BK	(1<<2)
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BE	(1<<3)
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK	0x0f
+
+/* U-APSD max SP length for WMM IEs sent by STA */
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL	0x00
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_2	0x01
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_4	0x02
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_6	0x03
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK	0x03
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT	5
+
 struct ieee80211_hdr {
 	__le16 frame_control;
 	__le16 duration_id;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e1e73c6abeff..f313a3cbabda 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -113,6 +113,7 @@ struct ieee80211_tx_queue_params {
 	u16 cw_min;
 	u16 cw_max;
 	u8 aifs;
+	bool uapsd;
 };
 
 /**
@@ -929,6 +930,11 @@ enum ieee80211_tkip_key_type {
  *	Hardware supports dynamic spatial multiplexing powersave,
  *	ie. can turn off all but one chain and then wake the rest
  *	up as required after, for example, rts/cts handshake.
+ *
+ * @IEEE80211_HW_SUPPORTS_UAPSD:
+ *	Hardware supports Unscheduled Automatic Power Save Delivery
+ *	(U-APSD) in managed mode. The mode is configured with
+ *	conf_tx() operation.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -948,6 +954,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_BEACON_FILTER			= 1<<14,
 	IEEE80211_HW_SUPPORTS_STATIC_SMPS		= 1<<15,
 	IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS		= 1<<16,
+	IEEE80211_HW_SUPPORTS_UAPSD			= 1<<17,
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index dc12e9466ffd..8286df5822d5 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1128,6 +1128,13 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 	p.cw_max = params->cwmax;
 	p.cw_min = params->cwmin;
 	p.txop = params->txop;
+
+	/*
+	 * Setting tx queue params disables u-apsd because it's only
+	 * called in master mode.
+	 */
+	p.uapsd = false;
+
 	if (drv_conf_tx(local, params->queue, &p)) {
 		printk(KERN_DEBUG "%s: failed to set TX queue "
 		       "parameters for queue %d\n",
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3e4ac3f30857..3468e378509a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -58,6 +58,15 @@ struct ieee80211_local;
 
 #define TU_TO_EXP_TIME(x)	(jiffies + usecs_to_jiffies((x) * 1024))
 
+#define IEEE80211_DEFAULT_UAPSD_QUEUES \
+	(IEEE80211_WMM_IE_STA_QOSINFO_AC_BK |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_BE |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_VI |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
+
+#define IEEE80211_DEFAULT_MAX_SP_LEN		\
+	IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
+
 struct ieee80211_fragment_entry {
 	unsigned long first_frag_time;
 	unsigned int seq;
@@ -78,6 +87,7 @@ struct ieee80211_bss {
 	u8 dtim_period;
 
 	bool wmm_used;
+	bool uapsd_supported;
 
 	unsigned long last_probe_resp;
 
@@ -285,7 +295,7 @@ struct ieee80211_work {
 			u8 ssid[IEEE80211_MAX_SSID_LEN];
 			u8 ssid_len;
 			u8 supp_rates_len;
-			bool wmm_used, use_11n;
+			bool wmm_used, use_11n, uapsd_used;
 		} assoc;
 		struct {
 			u32 duration;
@@ -306,6 +316,7 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_DISABLE_11N	= BIT(4),
 	IEEE80211_STA_CSA_RECEIVED	= BIT(5),
 	IEEE80211_STA_MFP_ENABLED	= BIT(6),
+	IEEE80211_STA_UAPSD_ENABLED	= BIT(7),
 };
 
 struct ieee80211_if_managed {
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 468829143991..0054bba08ce1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -491,6 +491,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
 		local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
 
+	WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
+	     && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
+	     "U-APSD not supported with HW_PS_NULLFUNC_STACK\n");
+
 	/*
 	 * Calculate scan IE length -- we need this to alloc
 	 * memory and to subtract from the driver limit. It
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 86f025bc9456..39c27d83a4f2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -569,7 +569,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	struct ieee80211_tx_queue_params params;
 	size_t left;
 	int count;
-	u8 *pos;
+	u8 *pos, uapsd_queues = 0;
 
 	if (local->hw.queues < 4)
 		return;
@@ -579,6 +579,10 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 
 	if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1)
 		return;
+
+	if (ifmgd->flags & IEEE80211_STA_UAPSD_ENABLED)
+		uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
+
 	count = wmm_param[6] & 0x0f;
 	if (count == ifmgd->wmm_last_param_set)
 		return;
@@ -593,6 +597,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	for (; left >= 4; left -= 4, pos += 4) {
 		int aci = (pos[0] >> 5) & 0x03;
 		int acm = (pos[0] >> 4) & 0x01;
+		bool uapsd = false;
 		int queue;
 
 		switch (aci) {
@@ -600,22 +605,30 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 			queue = 3;
 			if (acm)
 				local->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
+				uapsd = true;
 			break;
 		case 2: /* AC_VI */
 			queue = 1;
 			if (acm)
 				local->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
+				uapsd = true;
 			break;
 		case 3: /* AC_VO */
 			queue = 0;
 			if (acm)
 				local->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
+				uapsd = true;
 			break;
 		case 0: /* AC_BE */
 		default:
 			queue = 2;
 			if (acm)
 				local->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
+				uapsd = true;
 			break;
 		}
 
@@ -623,11 +636,14 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
 		params.cw_min = ecw2cw(pos[1] & 0x0f);
 		params.txop = get_unaligned_le16(pos + 2);
+		params.uapsd = uapsd;
+
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d "
-		       "cWmin=%d cWmax=%d txop=%d\n",
+		       "cWmin=%d cWmax=%d txop=%d uapsd=%d\n",
 		       wiphy_name(local->hw.wiphy), queue, aci, acm,
-		       params.aifs, params.cw_min, params.cw_max, params.txop);
+		       params.aifs, params.cw_min, params.cw_max, params.txop,
+		       params.uapsd);
 #endif
 		if (drv_conf_tx(local, queue, &params) && local->ops->conf_tx)
 			printk(KERN_DEBUG "%s: failed to set TX queue "
@@ -1906,6 +1922,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	wk->assoc.ht_information_ie =
 		ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_INFORMATION);
 
+	if (bss->wmm_used && bss->uapsd_supported &&
+	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
+		wk->assoc.uapsd_used = true;
+		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
+	} else {
+		wk->assoc.uapsd_used = false;
+		ifmgd->flags &= ~IEEE80211_STA_UAPSD_ENABLED;
+	}
+
 	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
 	memcpy(wk->assoc.ssid, ssid + 2, ssid[1]);
 	wk->assoc.ssid_len = ssid[1];
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 30cb62bb45b3..9afe2f9885dc 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -54,6 +54,23 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local,
 	cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv));
 }
 
+static bool is_uapsd_supported(struct ieee802_11_elems *elems)
+{
+	u8 qos_info;
+
+	if (elems->wmm_info && elems->wmm_info_len == 7
+	    && elems->wmm_info[5] == 1)
+		qos_info = elems->wmm_info[6];
+	else if (elems->wmm_param && elems->wmm_param_len == 24
+		 && elems->wmm_param[5] == 1)
+		qos_info = elems->wmm_param[6];
+	else
+		/* no valid wmm information or parameter element found */
+		return false;
+
+	return qos_info & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD;
+}
+
 struct ieee80211_bss *
 ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_rx_status *rx_status,
@@ -117,6 +134,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	}
 
 	bss->wmm_used = elems->wmm_param || elems->wmm_info;
+	bss->uapsd_supported = is_uapsd_supported(elems);
 
 	if (!beacon)
 		bss->last_probe_resp = jiffies;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a2ba6e29bd9a..e278f97c8305 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -792,6 +792,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 			break;
 		}
 
+		qparam.uapsd = false;
+
 		drv_conf_tx(local, queue, &qparam);
 	}
 }
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 7c5d95b1bc04..a74fd6ee0083 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -202,7 +202,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos;
+	u8 *pos, qos_info;
 	const u8 *ies;
 	size_t offset = 0, noffset;
 	int i, len, count, rates_len, supp_rates_len;
@@ -375,6 +375,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (wk->assoc.wmm_used && local->hw.queues >= 4) {
+		if (wk->assoc.uapsd_used) {
+			qos_info = IEEE80211_DEFAULT_UAPSD_QUEUES;
+			qos_info |= (IEEE80211_DEFAULT_MAX_SP_LEN <<
+				     IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT);
+		} else {
+			qos_info = 0;
+		}
+
 		pos = skb_put(skb, 9);
 		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
 		*pos++ = 7; /* len */
@@ -384,7 +392,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		*pos++ = 2; /* WME */
 		*pos++ = 0; /* WME info */
 		*pos++ = 1; /* WME ver */
-		*pos++ = 0;
+		*pos++ = qos_info;
 	}
 
 	/* add any remaining custom (i.e. vendor specific here) IEs */
-- 
cgit v1.2.3


From 558a6669d7cb407fbb0b5aec184b5c3b9a893d30 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 12 Jan 2010 10:43:00 +0200
Subject: ieee80211: add struct ieee80211_hdr_qos

The header can be used to create qos nullfunc frames, for example.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a8c6069a0d9f..842701906ae9 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -157,6 +157,16 @@ struct ieee80211_hdr_3addr {
 	__le16 seq_ctrl;
 } __attribute__ ((packed));
 
+struct ieee80211_qos_hdr {
+	__le16 frame_control;
+	__le16 duration_id;
+	u8 addr1[6];
+	u8 addr2[6];
+	u8 addr3[6];
+	__le16 seq_ctrl;
+	__le16 qos_ctrl;
+} __attribute__ ((packed));
+
 /**
  * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set
  * @fc: frame control bytes in little-endian byteorder
-- 
cgit v1.2.3


From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 11 Jan 2010 11:14:44 +0900
Subject: libata: retry link resume if necessary

Interestingly, when SIDPR is used in ata_piix, writes to DET in
SControl sometimes get ignored leading to detection failure.  Update
sata_link_resume() such that it reads back SControl after clearing DET
and retry if it's not clear.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: fengxiangjun <fengxiangjun@neusoft.com>
Reported-by: Jim Faulkner <jfaulkne@ccs.neu.edu>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 38 +++++++++++++++++++++++++++++++-------
 include/linux/libata.h    |  3 +++
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 22ff51bdbc8a..6728328f3bea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
 int sata_link_resume(struct ata_link *link, const unsigned long *params,
 		     unsigned long deadline)
 {
+	int tries = ATA_LINK_RESUME_TRIES;
 	u32 scontrol, serror;
 	int rc;
 
 	if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
 		return rc;
 
-	scontrol = (scontrol & 0x0f0) | 0x300;
+	/*
+	 * Writes to SControl sometimes get ignored under certain
+	 * controllers (ata_piix SIDPR).  Make sure DET actually is
+	 * cleared.
+	 */
+	do {
+		scontrol = (scontrol & 0x0f0) | 0x300;
+		if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+			return rc;
+		/*
+		 * Some PHYs react badly if SStatus is pounded
+		 * immediately after resuming.  Delay 200ms before
+		 * debouncing.
+		 */
+		msleep(200);
 
-	if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
-		return rc;
+		/* is SControl restored correctly? */
+		if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+			return rc;
+	} while ((scontrol & 0xf0f) != 0x300 && --tries);
 
-	/* Some PHYs react badly if SStatus is pounded immediately
-	 * after resuming.  Delay 200ms before debouncing.
-	 */
-	msleep(200);
+	if ((scontrol & 0xf0f) != 0x300) {
+		ata_link_printk(link, KERN_ERR,
+				"failed to resume link (SControl %X)\n",
+				scontrol);
+		return 0;
+	}
+
+	if (tries < ATA_LINK_RESUME_TRIES)
+		ata_link_printk(link, KERN_WARNING,
+				"link resume succeeded after %d retries\n",
+				ATA_LINK_RESUME_TRIES - tries);
 
 	if ((rc = sata_link_debounce(link, params, deadline)))
 		return rc;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6a9c4ddd3d95..73112250862c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -354,6 +354,9 @@ enum {
 	/* max tries if error condition is still set after ->error_handler */
 	ATA_EH_MAX_TRIES	= 5,
 
+	/* sometimes resuming a link requires several retries */
+	ATA_LINK_RESUME_TRIES	= 5,
+
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
 	ATA_EH_DEV_TRIES	= 3,
-- 
cgit v1.2.3


From 2c761270d5520dd84ab0b4e47c24d99ff8503c38 Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Tue, 12 Jan 2010 17:39:16 +1100
Subject: lib: Introduce generic list_sort function

There are two copies of list_sort() in the tree already, one in the DRM
code, another in ubifs.  Now XFS needs this as well.  Create a generic
list_sort() function from the ubifs version and convert existing users
to it so we don't end up with yet another copy in the tree.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Artem Bityutskiy <dedekind@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_modes.c |  90 ++------------------------------------
 fs/ubifs/gc.c               |  96 +----------------------------------------
 include/linux/list_sort.h   |  11 +++++
 lib/Makefile                |   2 +-
 lib/list_sort.c             | 102 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 119 insertions(+), 182 deletions(-)
 create mode 100644 include/linux/list_sort.h
 create mode 100644 lib/list_sort.c

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 6d81a02463a3..76d63394c776 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1,9 +1,4 @@
 /*
- * The list_sort function is (presumably) licensed under the GPL (see the
- * top level "COPYING" file for details).
- *
- * The remainder of this file is:
- *
  * Copyright © 1997-2003 by The XFree86 Project, Inc.
  * Copyright © 2007 Dave Airlie
  * Copyright © 2007-2008 Intel Corporation
@@ -36,6 +31,7 @@
  */
 
 #include <linux/list.h>
+#include <linux/list_sort.h>
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc.h"
@@ -855,6 +851,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
 
 /**
  * drm_mode_compare - compare modes for favorability
+ * @priv: unused
  * @lh_a: list_head for first mode
  * @lh_b: list_head for second mode
  *
@@ -868,7 +865,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
  * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or
  * positive if @lh_b is better than @lh_a.
  */
-static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
+static int drm_mode_compare(void *priv, struct list_head *lh_a, struct list_head *lh_b)
 {
 	struct drm_display_mode *a = list_entry(lh_a, struct drm_display_mode, head);
 	struct drm_display_mode *b = list_entry(lh_b, struct drm_display_mode, head);
@@ -885,85 +882,6 @@ static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
 	return diff;
 }
 
-/* FIXME: what we don't have a list sort function? */
-/* list sort from Mark J Roberts (mjr@znex.org) */
-void list_sort(struct list_head *head,
-	       int (*cmp)(struct list_head *a, struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * drm_mode_sort - sort mode list
  * @mode_list: list to sort
@@ -975,7 +893,7 @@ void list_sort(struct list_head *head,
  */
 void drm_mode_sort(struct list_head *mode_list)
 {
-	list_sort(mode_list, drm_mode_compare);
+	list_sort(NULL, mode_list, drm_mode_compare);
 }
 EXPORT_SYMBOL(drm_mode_sort);
 
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/list_sort.h>
 #include "ubifs.h"
 
 /*
@@ -107,101 +108,6 @@ static int switch_gc_head(struct ubifs_info *c)
 	return err;
 }
 
-/**
- * list_sort - sort a list.
- * @priv: private data, passed to @cmp
- * @head: the list to sort
- * @cmp: the elements comparison function
- *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * in ascending order.
- *
- * The comparison function @cmp is supposed to return a negative value if @a is
- * than @b, and a positive value if @a is greater than @b. If @a and @b are
- * equivalent, then it does not matter what this function returns.
- */
-static void list_sort(void *priv, struct list_head *head,
-		      int (*cmp)(void *priv, struct list_head *a,
-				 struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	if (list_empty(head))
-		return;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(priv, p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * data_nodes_cmp - compare 2 data nodes.
  * @priv: UBIFS file-system description object
diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h
new file mode 100644
index 000000000000..1a2df2efb771
--- /dev/null
+++ b/include/linux/list_sort.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LIST_SORT_H
+#define _LINUX_LIST_SORT_H
+
+#include <linux/types.h>
+
+struct list_head;
+
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b));
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 911b25aed1e7..3b0b4a696db9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o
+	 string_helpers.o gcd.o list_sort.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/list_sort.c b/lib/list_sort.c
new file mode 100644
index 000000000000..19d11e0bb958
--- /dev/null
+++ b/lib/list_sort.c
@@ -0,0 +1,102 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+/**
+ * list_sort - sort a list.
+ * @priv: private data, passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
+ * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
+ * in ascending order.
+ *
+ * The comparison function @cmp is supposed to return a negative value if @a is
+ * less than @b, and a positive value if @a is greater than @b. If @a and @b
+ * are equivalent, then it does not matter what this function returns.
+ */
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b))
+{
+	struct list_head *p, *q, *e, *list, *tail, *oldhead;
+	int insize, nmerges, psize, qsize, i;
+
+	if (list_empty(head))
+		return;
+
+	list = head->next;
+	list_del(head);
+	insize = 1;
+	for (;;) {
+		p = oldhead = list;
+		list = tail = NULL;
+		nmerges = 0;
+
+		while (p) {
+			nmerges++;
+			q = p;
+			psize = 0;
+			for (i = 0; i < insize; i++) {
+				psize++;
+				q = q->next == oldhead ? NULL : q->next;
+				if (!q)
+					break;
+			}
+
+			qsize = insize;
+			while (psize > 0 || (qsize > 0 && q)) {
+				if (!psize) {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				} else if (!qsize || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else if (cmp(priv, p, q) <= 0) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				}
+				if (tail)
+					tail->next = e;
+				else
+					list = e;
+				e->prev = tail;
+				tail = e;
+			}
+			p = q;
+		}
+
+		tail->next = list;
+		list->prev = tail;
+
+		if (nmerges <= 1)
+			break;
+
+		insize *= 2;
+	}
+
+	head->next = list;
+	head->prev = list->prev;
+	list->prev->next = head;
+	list->prev = head;
+}
+
+EXPORT_SYMBOL(list_sort);
-- 
cgit v1.2.3


From bf66f18e79e34c421bbd8f6511e2c556b779df2f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 4 Jan 2010 15:09:10 -0800
Subject: rcu: Add force_quiescent_state() testing to rcutorture

Add force_quiescent_state() testing to rcutorture, with a
separate thread that repeatedly invokes force_quiescent_state()
in bursts. This can greatly increase the probability of
encountering certain types of race conditions.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1262646551116-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutiny.h | 12 ++++++++
 include/linux/rcutree.h |  3 ++
 kernel/rcutorture.c     | 80 +++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/rcutree.c        | 18 +++++++++++
 kernel/rcutree_plugin.h | 19 ++++++++++++
 5 files changed, 130 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 96cc307ed9f4..2b70d4e37383 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -62,6 +62,18 @@ static inline long rcu_batches_completed_bh(void)
 
 extern int rcu_expedited_torture_stats(char *page);
 
+static inline void rcu_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_bh_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_sched_force_quiescent_state(void)
+{
+}
+
 #define synchronize_rcu synchronize_sched
 
 static inline void synchronize_rcu_expedited(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 8044b1b94333..704a010f686c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -99,6 +99,9 @@ extern void rcu_check_callbacks(int cpu, int user);
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 extern long rcu_batches_completed_sched(void);
+extern void rcu_force_quiescent_state(void);
+extern void rcu_bh_force_quiescent_state(void);
+extern void rcu_sched_force_quiescent_state(void);
 
 #ifdef CONFIG_NO_HZ
 void rcu_enter_nohz(void);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9bb52177af02..adda92bfafac 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -61,6 +61,9 @@ static int test_no_idle_hz;	/* Test RCU's support for tickless idle CPUs. */
 static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
 static int stutter = 5;		/* Start/stop testing interval (in sec) */
 static int irqreader = 1;	/* RCU readers from irq (timers). */
+static int fqs_duration = 0;	/* Duration of bursts (us), 0 to disable. */
+static int fqs_holdoff = 0;	/* Hold time within burst (us). */
+static int fqs_stutter = 3;	/* Wait time between bursts (s). */
 static char *torture_type = "rcu"; /* What RCU implementation to torture. */
 
 module_param(nreaders, int, 0444);
@@ -79,6 +82,12 @@ module_param(stutter, int, 0444);
 MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
 module_param(irqreader, int, 0444);
 MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
+module_param(fqs_duration, int, 0444);
+MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us)");
+module_param(fqs_holdoff, int, 0444);
+MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
+module_param(fqs_stutter, int, 0444);
+MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
 module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
 
@@ -99,6 +108,7 @@ static struct task_struct **reader_tasks;
 static struct task_struct *stats_task;
 static struct task_struct *shuffler_task;
 static struct task_struct *stutter_task;
+static struct task_struct *fqs_task;
 
 #define RCU_TORTURE_PIPE_LEN 10
 
@@ -263,6 +273,7 @@ struct rcu_torture_ops {
 	void (*deferred_free)(struct rcu_torture *p);
 	void (*sync)(void);
 	void (*cb_barrier)(void);
+	void (*fqs)(void);
 	int (*stats)(char *page);
 	int irq_capable;
 	char *name;
@@ -347,6 +358,7 @@ static struct rcu_torture_ops rcu_ops = {
 	.deferred_free	= rcu_torture_deferred_free,
 	.sync		= synchronize_rcu,
 	.cb_barrier	= rcu_barrier,
+	.fqs		= rcu_force_quiescent_state,
 	.stats		= NULL,
 	.irq_capable	= 1,
 	.name		= "rcu"
@@ -388,6 +400,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
 	.deferred_free	= rcu_sync_torture_deferred_free,
 	.sync		= synchronize_rcu,
 	.cb_barrier	= NULL,
+	.fqs		= rcu_force_quiescent_state,
 	.stats		= NULL,
 	.irq_capable	= 1,
 	.name		= "rcu_sync"
@@ -403,6 +416,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
 	.deferred_free	= rcu_sync_torture_deferred_free,
 	.sync		= synchronize_rcu_expedited,
 	.cb_barrier	= NULL,
+	.fqs		= rcu_force_quiescent_state,
 	.stats		= NULL,
 	.irq_capable	= 1,
 	.name		= "rcu_expedited"
@@ -465,6 +479,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
 	.deferred_free	= rcu_bh_torture_deferred_free,
 	.sync		= rcu_bh_torture_synchronize,
 	.cb_barrier	= rcu_barrier_bh,
+	.fqs		= rcu_bh_force_quiescent_state,
 	.stats		= NULL,
 	.irq_capable	= 1,
 	.name		= "rcu_bh"
@@ -480,6 +495,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
 	.deferred_free	= rcu_sync_torture_deferred_free,
 	.sync		= rcu_bh_torture_synchronize,
 	.cb_barrier	= NULL,
+	.fqs		= rcu_bh_force_quiescent_state,
 	.stats		= NULL,
 	.irq_capable	= 1,
 	.name		= "rcu_bh_sync"
@@ -621,6 +637,7 @@ static struct rcu_torture_ops sched_ops = {
 	.deferred_free	= rcu_sched_torture_deferred_free,
 	.sync		= sched_torture_synchronize,
 	.cb_barrier	= rcu_barrier_sched,
+	.fqs		= rcu_sched_force_quiescent_state,
 	.stats		= NULL,
 	.irq_capable	= 1,
 	.name		= "sched"
@@ -636,6 +653,7 @@ static struct rcu_torture_ops sched_sync_ops = {
 	.deferred_free	= rcu_sync_torture_deferred_free,
 	.sync		= sched_torture_synchronize,
 	.cb_barrier	= NULL,
+	.fqs		= rcu_sched_force_quiescent_state,
 	.stats		= NULL,
 	.name		= "sched_sync"
 };
@@ -650,11 +668,44 @@ static struct rcu_torture_ops sched_expedited_ops = {
 	.deferred_free	= rcu_sync_torture_deferred_free,
 	.sync		= synchronize_sched_expedited,
 	.cb_barrier	= NULL,
+	.fqs		= rcu_sched_force_quiescent_state,
 	.stats		= rcu_expedited_torture_stats,
 	.irq_capable	= 1,
 	.name		= "sched_expedited"
 };
 
+/*
+ * RCU torture force-quiescent-state kthread.  Repeatedly induces
+ * bursts of calls to force_quiescent_state(), increasing the probability
+ * of occurrence of some important types of race conditions.
+ */
+static int
+rcu_torture_fqs(void *arg)
+{
+	unsigned long fqs_resume_time;
+	int fqs_burst_remaining;
+
+	VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
+	do {
+		fqs_resume_time = jiffies + fqs_stutter * HZ;
+		while (jiffies - fqs_resume_time > LONG_MAX) {
+			schedule_timeout_interruptible(1);
+		}
+		fqs_burst_remaining = fqs_duration;
+		while (fqs_burst_remaining > 0) {
+			cur_ops->fqs();
+			udelay(fqs_holdoff);
+			fqs_burst_remaining -= fqs_holdoff;
+		}
+		rcu_stutter_wait("rcu_torture_fqs");
+	} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+	VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping");
+	rcutorture_shutdown_absorb("rcu_torture_fqs");
+	while (!kthread_should_stop())
+		schedule_timeout_uninterruptible(1);
+	return 0;
+}
+
 /*
  * RCU torture writer kthread.  Repeatedly substitutes a new structure
  * for that pointed to by rcu_torture_current, freeing the old structure
@@ -1030,10 +1081,11 @@ rcu_torture_print_module_parms(char *tag)
 	printk(KERN_ALERT "%s" TORTURE_FLAG
 		"--- %s: nreaders=%d nfakewriters=%d "
 		"stat_interval=%d verbose=%d test_no_idle_hz=%d "
-		"shuffle_interval=%d stutter=%d irqreader=%d\n",
+		"shuffle_interval=%d stutter=%d irqreader=%d "
+		"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n",
 		torture_type, tag, nrealreaders, nfakewriters,
 		stat_interval, verbose, test_no_idle_hz, shuffle_interval,
-		stutter, irqreader);
+		stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter);
 }
 
 static struct notifier_block rcutorture_nb = {
@@ -1109,6 +1161,12 @@ rcu_torture_cleanup(void)
 	}
 	stats_task = NULL;
 
+	if (fqs_task) {
+		VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task");
+		kthread_stop(fqs_task);
+	}
+	fqs_task = NULL;
+
 	/* Wait for all RCU callbacks to fire.  */
 
 	if (cur_ops->cb_barrier != NULL)
@@ -1154,6 +1212,11 @@ rcu_torture_init(void)
 		mutex_unlock(&fullstop_mutex);
 		return -EINVAL;
 	}
+	if (cur_ops->fqs == NULL && fqs_duration != 0) {
+		printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero "
+				  "fqs_duration, fqs disabled.\n");
+		fqs_duration = 0;
+	}
 	if (cur_ops->init)
 		cur_ops->init(); /* no "goto unwind" prior to this point!!! */
 
@@ -1282,6 +1345,19 @@ rcu_torture_init(void)
 			goto unwind;
 		}
 	}
+	if (fqs_duration < 0)
+		fqs_duration = 0;
+	if (fqs_duration) {
+		/* Create the stutter thread */
+		fqs_task = kthread_run(rcu_torture_fqs, NULL,
+				       "rcu_torture_fqs");
+		if (IS_ERR(fqs_task)) {
+			firsterr = PTR_ERR(fqs_task);
+			VERBOSE_PRINTK_ERRSTRING("Failed to create fqs");
+			fqs_task = NULL;
+			goto unwind;
+		}
+	}
 	register_reboot_notifier(&rcutorture_nb);
 	mutex_unlock(&fullstop_mutex);
 	return 0;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 55e8f6ef8195..0a4c32879398 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -156,6 +156,24 @@ long rcu_batches_completed_bh(void)
 }
 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 
+/*
+ * Force a quiescent state for RCU BH.
+ */
+void rcu_bh_force_quiescent_state(void)
+{
+	force_quiescent_state(&rcu_bh_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
+
+/*
+ * Force a quiescent state for RCU-sched.
+ */
+void rcu_sched_force_quiescent_state(void)
+{
+	force_quiescent_state(&rcu_sched_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
+
 /*
  * Does the CPU have callbacks ready to be invoked?
  */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 37fbccdf41d5..f11ebd44b454 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -61,6 +61,15 @@ long rcu_batches_completed(void)
 }
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 
+/*
+ * Force a quiescent state for preemptible RCU.
+ */
+void rcu_force_quiescent_state(void)
+{
+	force_quiescent_state(&rcu_preempt_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
 /*
  * Record a preemptable-RCU quiescent state for the specified CPU.  Note
  * that this just means that the task currently running on the CPU is
@@ -712,6 +721,16 @@ long rcu_batches_completed(void)
 }
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 
+/*
+ * Force a quiescent state for RCU, which, because there is no preemptible
+ * RCU, becomes the same as rcu-sched.
+ */
+void rcu_force_quiescent_state(void)
+{
+	rcu_sched_force_quiescent_state();
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
 /*
  * Because preemptable RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
-- 
cgit v1.2.3


From f6a8c60960bbea378142d1fa1b3d111555ee41c7 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Sun, 29 Nov 2009 15:23:51 +0000
Subject: mtd: Really add ARM pismo support

(Commit 7cb777a3d71f9d1f7eb149c7a504d21f24219ae8 (mtd: add ARM pismo support)
intended to add this, but seems only to have patched the Makefile without
touching Kconfig or providing any code...)

The following patch adds support for PISMO modules found on ARM Ltd
development platforms.  These are MTD modules, and can have a
selection of SRAM, flash or DOC devices as described by an on-board
I2C EEPROM.

We support SRAM and NOR flash devices only by registering appropriate
conventional MTD platform devices as children of the 'pismo' device.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/Kconfig  |  17 +++
 drivers/mtd/maps/pismo.c  | 320 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/pismo.h |  17 +++
 3 files changed, 354 insertions(+)
 create mode 100644 drivers/mtd/maps/pismo.c
 create mode 100644 include/linux/mtd/pismo.h

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 4c364d44ad59..2de0cc823d60 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -549,4 +549,21 @@ config MTD_VMU
 	  To build this as a module select M here, the module will be called
 	  vmu-flash.
 
+config MTD_PISMO
+	tristate "MTD discovery driver for PISMO modules"
+	depends on I2C
+	depends on ARCH_VERSATILE
+	help
+	  This driver allows for discovery of PISMO modules - see
+	  <http://www.pismoworld.org/>.  These are small modules containing
+	  up to five memory devices (eg, SRAM, flash, DOC) described by an
+	  I2C EEPROM.
+
+	  This driver does not create any MTD maps itself; instead it
+	  creates MTD physmap and MTD SRAM platform devices.  If you
+	  enable this option, you should consider enabling MTD_PHYSMAP
+	  and/or MTD_PLATRAM according to the devices on your module.
+
+	  When built as a module, it will be called pismo.ko
+
 endmenu
diff --git a/drivers/mtd/maps/pismo.c b/drivers/mtd/maps/pismo.c
new file mode 100644
index 000000000000..c48cad271f5d
--- /dev/null
+++ b/drivers/mtd/maps/pismo.c
@@ -0,0 +1,320 @@
+/*
+ * PISMO memory driver - http://www.pismoworld.org/
+ *
+ * For ARM Realview and Versatile platforms
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/mtd/physmap.h>
+#include <linux/mtd/plat-ram.h>
+#include <linux/mtd/pismo.h>
+
+#define PISMO_NUM_CS	5
+
+struct pismo_cs_block {
+	u8	type;
+	u8	width;
+	__le16	access;
+	__le32	size;
+	u32	reserved[2];
+	char	device[32];
+} __packed;
+
+struct pismo_eeprom {
+	struct pismo_cs_block cs[PISMO_NUM_CS];
+	char	board[15];
+	u8	sum;
+} __packed;
+
+struct pismo_mem {
+	phys_addr_t base;
+	u32	size;
+	u16	access;
+	u8	width;
+	u8	type;
+};
+
+struct pismo_data {
+	struct i2c_client	*client;
+	void			(*vpp)(void *, int);
+	void			*vpp_data;
+	struct platform_device	*dev[PISMO_NUM_CS];
+};
+
+/* FIXME: set_vpp could do with a better calling convention */
+static struct pismo_data *vpp_pismo;
+static DEFINE_MUTEX(pismo_mutex);
+
+static int pismo_setvpp_probe_fix(struct pismo_data *pismo)
+{
+	mutex_lock(&pismo_mutex);
+	if (vpp_pismo) {
+		mutex_unlock(&pismo_mutex);
+		kfree(pismo);
+		return -EBUSY;
+	}
+	vpp_pismo = pismo;
+	mutex_unlock(&pismo_mutex);
+	return 0;
+}
+
+static void pismo_setvpp_remove_fix(struct pismo_data *pismo)
+{
+	mutex_lock(&pismo_mutex);
+	if (vpp_pismo == pismo)
+		vpp_pismo = NULL;
+	mutex_unlock(&pismo_mutex);
+}
+
+static void pismo_set_vpp(struct map_info *map, int on)
+{
+	struct pismo_data *pismo = vpp_pismo;
+
+	pismo->vpp(pismo->vpp_data, on);
+}
+/* end of hack */
+
+
+static unsigned int __devinit pismo_width_to_bytes(unsigned int width)
+{
+	width &= 15;
+	if (width > 2)
+		return 0;
+	return 1 << width;
+}
+
+static int __devinit pismo_eeprom_read(struct i2c_client *client, void *buf,
+	u8 addr, size_t size)
+{
+	int ret;
+	struct i2c_msg msg[] = {
+		{
+			.addr = client->addr,
+			.len = sizeof(addr),
+			.buf = &addr,
+		}, {
+			.addr = client->addr,
+			.flags = I2C_M_RD,
+			.len = size,
+			.buf = buf,
+		},
+	};
+
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+
+	return ret == ARRAY_SIZE(msg) ? size : -EIO;
+}
+
+static int __devinit pismo_add_device(struct pismo_data *pismo, int i,
+	struct pismo_mem *region, const char *name, void *pdata, size_t psize)
+{
+	struct platform_device *dev;
+	struct resource res = { };
+	phys_addr_t base = region.base;
+	int ret;
+
+	if (base == ~0)
+		return -ENXIO;
+
+	res.start = base;
+	res.end = base + region->size - 1;
+	res.flags = IORESOURCE_MEM;
+
+	dev = platform_device_alloc(name, i);
+	if (!dev)
+		return -ENOMEM;
+	dev->dev.parent = &pismo->client->dev;
+
+	do {
+		ret = platform_device_add_resources(dev, &res, 1);
+		if (ret)
+			break;
+
+		ret = platform_device_add_data(dev, pdata, psize);
+		if (ret)
+			break;
+
+		ret = platform_device_add(dev);
+		if (ret)
+			break;
+
+		pismo->dev[i] = dev;
+		return 0;
+	} while (0);
+
+	platform_device_put(dev);
+	return ret;
+}
+
+static int __devinit pismo_add_nor(struct pismo_data *pismo, int i,
+	struct pismo_mem *region)
+{
+	struct physmap_flash_data data = {
+		.width = region->width,
+	};
+
+	if (pismo->vpp)
+		data.set_vpp = pismo_set_vpp;
+
+	return pismo_add_device(pismo, i, region, "physmap-flash",
+		&data, sizeof(data));
+}
+
+static int __devinit pismo_add_sram(struct pismo_data *pismo, int i,
+	struct pismo_mem *region)
+{
+	struct platdata_mtd_ram data = {
+		.bankwidth = region->width,
+	};
+
+	return pismo_add_device(pismo, i, region, "mtd-ram",
+		&data, sizeof(data));
+}
+
+static void __devinit pismo_add_one(struct pismo_data *pismo, int i,
+	const struct pismo_cs_block *cs, phys_addr_t base)
+{
+	struct device *dev = &pismo->client->dev;
+	struct pismo_mem region;
+
+	region.base = base;
+	region.type = cs->type;
+	region.width = pismo_width_to_bytes(cs->width);
+	region.access = le16_to_cpu(cs->access);
+	region.size = le32_to_cpu(cs->size);
+
+	if (region.width == 0) {
+		dev_err(dev, "cs%u: bad width: %02x, ignoring\n", i, cs->width);
+		return;
+	}
+
+	/*
+	 * FIXME: may need to the platforms memory controller here, but at
+	 * the moment we assume that it has already been correctly setup.
+	 * The memory controller can also tell us the base address as well.
+	 */
+
+	dev_info(dev, "cs%u: %.32s: type %02x access %u00ps size %uK\n",
+		i, cs->device, region.type, region.access, region.size / 1024);
+
+	switch (region.type) {
+	case 0:
+		break;
+	case 1:
+		/* static DOC */
+		break;
+	case 2:
+		/* static NOR */
+		pismo_add_nor(pismo, i, &region);
+		break;
+	case 3:
+		/* static RAM */
+		pismo_add_sram(pismo, i, &region);
+		break;
+	}
+}
+
+static int __devexit pismo_remove(struct i2c_client *client)
+{
+	struct pismo_data *pismo = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pismo->dev); i++)
+		platform_device_unregister(pismo->dev[i]);
+
+	/* FIXME: set_vpp needs saner arguments */
+	pismo_setvpp_remove_fix(pismo);
+
+	kfree(pismo);
+
+	return 0;
+}
+
+static int __devinit pismo_probe(struct i2c_client *client,
+				 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct pismo_pdata *pdata = client->dev.platform_data;
+	struct pismo_eeprom eeprom;
+	struct pismo_data *pismo;
+	int ret, i;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "functionality mismatch\n");
+		return -EIO;
+	}
+
+	pismo = kzalloc(sizeof(*pismo), GFP_KERNEL);
+	if (!pismo)
+		return -ENOMEM;
+
+	/* FIXME: set_vpp needs saner arguments */
+	ret = pismo_setvpp_probe_fix(pismo);
+	if (ret)
+		return ret;
+
+	pismo->client = client;
+	if (pdata) {
+		pismo->vpp = pdata->set_vpp;
+		pismo->vpp_data = pdata->vpp_data;
+	}
+	i2c_set_clientdata(client, pismo);
+
+	ret = pismo_eeprom_read(client, &eeprom, 0, sizeof(eeprom));
+	if (ret < 0) {
+		dev_err(&client->dev, "error reading EEPROM: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(&client->dev, "%.15s board found\n", eeprom.board);
+
+	for (i = 0; i < ARRAY_SIZE(eeprom.cs); i++)
+		if (eeprom.cs[i].type != 0xff)
+			pismo_add_one(pismo, i, &eeprom.cs[i],
+				      pdata->cs_addrs[i]);
+
+	return 0;
+}
+
+static const struct i2c_device_id pismo_id[] = {
+	{ "pismo" },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, pismo_id);
+
+static struct i2c_driver pismo_driver = {
+	.driver	= {
+		.name	= "pismo",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pismo_probe,
+	.remove		= __devexit_p(pismo_remove),
+	.id_table	= pismo_id,
+};
+
+static int __init pismo_init(void)
+{
+	BUILD_BUG_ON(sizeof(struct pismo_cs_block) != 48);
+	BUILD_BUG_ON(sizeof(struct pismo_eeprom) != 256);
+
+	return i2c_add_driver(&pismo_driver);
+}
+module_init(pismo_init);
+
+static void __exit pismo_exit(void)
+{
+	i2c_del_driver(&pismo_driver);
+}
+module_exit(pismo_exit);
+
+MODULE_AUTHOR("Russell King <linux@arm.linux.org.uk>");
+MODULE_DESCRIPTION("PISMO memory driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mtd/pismo.h b/include/linux/mtd/pismo.h
new file mode 100644
index 000000000000..8dfb7e1421c5
--- /dev/null
+++ b/include/linux/mtd/pismo.h
@@ -0,0 +1,17 @@
+/*
+ * PISMO memory driver - http://www.pismoworld.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ */
+#ifndef __LINUX_MTD_PISMO_H
+#define __LINUX_MTD_PISMO_H
+
+struct pismo_pdata {
+	void			(*set_vpp)(void *, int);
+	void			*vpp_data;
+	phys_addr_t		cs_addrs[5];
+};
+
+#endif
-- 
cgit v1.2.3


From 9ca94d7c016130f9ed77f142424ace9c19742809 Mon Sep 17 00:00:00 2001
From: John Kacur <jkacur@redhat.com>
Date: Mon, 11 Jan 2010 21:21:06 +0100
Subject: plist: Fix grammar mistake, and c-style mistake

Signed-off-by: John Kacur <jkacur@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1263241267-25204-2-git-send-email-jkacur@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/plist.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index 8227f717c70f..6898985e7b38 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -45,7 +45,7 @@
  * the insertion of new nodes. There are no nodes with duplicate
  * priorites on the list.
  *
- * The nodes on the node_list is ordered by priority and can contain
+ * The nodes on the node_list are ordered by priority and can contain
  * entries which have the same priority. Those entries are ordered
  * FIFO
  *
@@ -265,7 +265,7 @@ static inline int plist_node_empty(const struct plist_node *node)
  *
  * Assumes the plist is _not_ empty.
  */
-static inline struct plist_node* plist_first(const struct plist_head *head)
+static inline struct plist_node *plist_first(const struct plist_head *head)
 {
 	return list_entry(head->node_list.next,
 			  struct plist_node, plist.node_list);
-- 
cgit v1.2.3


From 599faa0e264fe2e7f563f87b4aad8c83e9dc46d1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 13:29:58 +0000
Subject: genirq: Fix documentation of default chip disable()

The documentation says that by default disable() will be
chip->mask but in fact default_disable() is a noop.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
LKML-Reference: <1262698198-30392-1-git-send-email-broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 451481c082b5..d13492df57a1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -90,7 +90,7 @@ struct msi_desc;
  * @startup:		start up the interrupt (defaults to ->enable if NULL)
  * @shutdown:		shut down the interrupt (defaults to ->disable if NULL)
  * @enable:		enable the interrupt (defaults to chip->unmask if NULL)
- * @disable:		disable the interrupt (defaults to chip->mask if NULL)
+ * @disable:		disable the interrupt
  * @ack:		start of a new interrupt
  * @mask:		mask an interrupt source
  * @mask_ack:		ack and mask an interrupt source
-- 
cgit v1.2.3


From cd8c20b650f49354722b8cc1f03320b004815a0a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 13 Jan 2010 16:02:14 +0100
Subject: netfilter: nfnetlink: netns support

Make nfnl socket per-petns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink.h  |  8 ++---
 include/net/net_namespace.h          |  2 ++
 net/netfilter/nf_conntrack_netlink.c | 13 ++++----
 net/netfilter/nfnetlink.c            | 65 +++++++++++++++++++++++-------------
 net/netfilter/nfnetlink_log.c        |  3 +-
 net/netfilter/nfnetlink_queue.c      |  2 +-
 6 files changed, 58 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 49d321f3ccd2..53923868c9bd 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -73,11 +73,11 @@ struct nfnetlink_subsystem {
 extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
 extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
 
-extern int nfnetlink_has_listeners(unsigned int group);
-extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, 
+extern int nfnetlink_has_listeners(struct net *net, unsigned int group);
+extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group,
 			  int echo, gfp_t flags);
-extern void nfnetlink_set_err(u32 pid, u32 group, int error);
-extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
+extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error);
+extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags);
 
 extern void nfnl_lock(void);
 extern void nfnl_unlock(void);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index f307e133d14c..82b7be4db89a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -81,6 +81,8 @@ struct net {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct netns_ct		ct;
 #endif
+	struct sock		*nfnl;
+	struct sock		*nfnl_stash;
 #endif
 #ifdef CONFIG_XFRM
 	struct netns_xfrm	xfrm;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59d8064eb522..d4c5d06677f9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -482,7 +482,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	} else
 		return 0;
 
-	if (!item->report && !nfnetlink_has_listeners(group))
+	if (!item->report && !nfnetlink_has_listeners(&init_net, group))
 		return 0;
 
 	skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
@@ -559,7 +559,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+	err = nfnetlink_send(skb, &init_net, item->pid, group, item->report,
+			     GFP_ATOMIC);
 	if (err == -ENOBUFS || err == -EAGAIN)
 		return -ENOBUFS;
 
@@ -571,7 +572,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(0, group, -ENOBUFS);
+	nfnetlink_set_err(&init_net, 0, group, -ENOBUFS);
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -1539,7 +1540,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 		return 0;
 
 	if (!item->report &&
-	    !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+	    !nfnetlink_has_listeners(&init_net, NFNLGRP_CONNTRACK_EXP_NEW))
 		return 0;
 
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1562,7 +1563,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+	nfnetlink_send(skb, &init_net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
 		       item->report, GFP_ATOMIC);
 	return 0;
 
@@ -1572,7 +1573,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(0, 0, -ENOBUFS);
+	nfnetlink_set_err(&init_net, 0, 0, -ENOBUFS);
 	return 0;
 }
 #endif
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index eedc0c1ac7a4..8eb0cc23ada3 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-static struct sock *nfnl = NULL;
 static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
 static DEFINE_MUTEX(nfnl_mutex);
 
@@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss)
 	return &ss->cb[cb_id];
 }
 
-int nfnetlink_has_listeners(unsigned int group)
+int nfnetlink_has_listeners(struct net *net, unsigned int group)
 {
-	return netlink_has_listeners(nfnl, group);
+	return netlink_has_listeners(net->nfnl, group);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
-int nfnetlink_send(struct sk_buff *skb, u32 pid,
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
 		   unsigned group, int echo, gfp_t flags)
 {
-	return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
+	return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-void nfnetlink_set_err(u32 pid, u32 group, int error)
+void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
 {
-	netlink_set_err(nfnl, pid, group, error);
+	netlink_set_err(net->nfnl, pid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
-int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags)
 {
-	return netlink_unicast(nfnl, skb, pid, flags);
+	return netlink_unicast(net->nfnl, skb, pid, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
 /* Process one complete nfnetlink message. */
 static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
+	struct net *net = sock_net(skb->sk);
 	const struct nfnl_callback *nc;
 	const struct nfnetlink_subsystem *ss;
 	int type, err;
@@ -170,7 +170,7 @@ replay:
 		if (err < 0)
 			return err;
 
-		err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda);
+		err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda);
 		if (err == -EAGAIN)
 			goto replay;
 		return err;
@@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 	nfnl_unlock();
 }
 
-static void __exit nfnetlink_exit(void)
+static int __net_init nfnetlink_net_init(struct net *net)
 {
-	printk("Removing netfilter NETLINK layer.\n");
-	netlink_kernel_release(nfnl);
-	return;
+	struct sock *nfnl;
+
+	nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX,
+				     nfnetlink_rcv, NULL, THIS_MODULE);
+	if (!nfnl)
+		return -ENOMEM;
+	net->nfnl_stash = nfnl;
+	rcu_assign_pointer(net->nfnl, nfnl);
+	return 0;
 }
 
-static int __init nfnetlink_init(void)
+static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 {
-	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+	struct net *net;
 
-	nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX,
-				     nfnetlink_rcv, NULL, THIS_MODULE);
-	if (!nfnl) {
-		printk(KERN_ERR "cannot initialize nfnetlink!\n");
-		return -ENOMEM;
-	}
+	list_for_each_entry(net, net_exit_list, exit_list)
+		rcu_assign_pointer(net->nfnl, NULL);
+	synchronize_net();
+	list_for_each_entry(net, net_exit_list, exit_list)
+		netlink_kernel_release(net->nfnl_stash);
+}
 
-	return 0;
+static struct pernet_operations nfnetlink_net_ops = {
+	.init		= nfnetlink_net_init,
+	.exit_batch	= nfnetlink_net_exit_batch,
+};
+
+static int __init nfnetlink_init(void)
+{
+	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+	return register_pernet_subsys(&nfnetlink_net_ops);
 }
 
+static void __exit nfnetlink_exit(void)
+{
+	printk("Removing netfilter NETLINK layer.\n");
+	unregister_pernet_subsys(&nfnetlink_net_ops);
+}
 module_init(nfnetlink_init);
 module_exit(nfnetlink_exit);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 9de0470d557e..285e9029a9ff 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -323,7 +323,8 @@ __nfulnl_send(struct nfulnl_instance *inst)
 			  NLMSG_DONE,
 			  sizeof(struct nfgenmsg));
 
-	status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
+	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+				   MSG_DONTWAIT);
 
 	inst->qlen = 0;
 	inst->skb = NULL;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7e3fa410641e..5c589b27d6eb 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -420,7 +420,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	}
 
 	/* nfnetlink_unicast will either free the nskb or add it to a socket */
-	err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+	err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
 	if (err < 0) {
 		queue->queue_user_dropped++;
 		goto err_out_unlock;
-- 
cgit v1.2.3


From 508e14b4a4fb1a824a14f2c5b8d7df67b313f8e4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <danborkmann@googlemail.com>
Date: Tue, 12 Jan 2010 14:27:30 +0000
Subject: netpoll: allow execution of multiple rx_hooks per interface

Signed-off-by: Daniel Borkmann <danborkmann@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  11 +++-
 net/core/netpoll.c      | 169 ++++++++++++++++++++++++++++++------------------
 2 files changed, 114 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 2524267210d3..a765ea898549 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -21,15 +21,20 @@ struct netpoll {
 	__be32 local_ip, remote_ip;
 	u16 local_port, remote_port;
 	u8 remote_mac[ETH_ALEN];
+
+	struct list_head rx; /* rx_np list element */
 };
 
 struct netpoll_info {
 	atomic_t refcnt;
+
 	int rx_flags;
 	spinlock_t rx_lock;
-	struct netpoll *rx_np; /* netpoll that registered an rx_hook */
+	struct list_head rx_np; /* netpolls that registered an rx_hook */
+
 	struct sk_buff_head arp_tx; /* list of arp requests to reply to */
 	struct sk_buff_head txq;
+
 	struct delayed_work tx_work;
 };
 
@@ -51,7 +56,7 @@ static inline int netpoll_rx(struct sk_buff *skb)
 	unsigned long flags;
 	int ret = 0;
 
-	if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags))
+	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
 		return 0;
 
 	spin_lock_irqsave(&npinfo->rx_lock, flags);
@@ -67,7 +72,7 @@ static inline int netpoll_rx_on(struct sk_buff *skb)
 {
 	struct netpoll_info *npinfo = skb->dev->npinfo;
 
-	return npinfo && (npinfo->rx_np || npinfo->rx_flags);
+	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
 }
 
 static inline int netpoll_receive_skb(struct sk_buff *skb)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 0b4d0d35ef40..7aa697253765 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -407,11 +407,24 @@ static void arp_reply(struct sk_buff *skb)
 	__be32 sip, tip;
 	unsigned char *sha;
 	struct sk_buff *send_skb;
-	struct netpoll *np = NULL;
+	struct netpoll *np, *tmp;
+	unsigned long flags;
+	int hits = 0;
+
+	if (list_empty(&npinfo->rx_np))
+		return;
+
+	/* Before checking the packet, we do some early
+	   inspection whether this is interesting at all */
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->dev == skb->dev)
+			hits++;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
-	if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
-		np = npinfo->rx_np;
-	if (!np)
+	/* No netpoll struct is using this dev */
+	if (!hits)
 		return;
 
 	/* No arp on this interface */
@@ -437,77 +450,91 @@ static void arp_reply(struct sk_buff *skb)
 	arp_ptr += skb->dev->addr_len;
 	memcpy(&sip, arp_ptr, 4);
 	arp_ptr += 4;
-	/* if we actually cared about dst hw addr, it would get copied here */
+	/* If we actually cared about dst hw addr,
+	   it would get copied here */
 	arp_ptr += skb->dev->addr_len;
 	memcpy(&tip, arp_ptr, 4);
 
 	/* Should we ignore arp? */
-	if (tip != np->local_ip ||
-	    ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
 		return;
 
 	size = arp_hdr_len(skb->dev);
-	send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
-			    LL_RESERVED_SPACE(np->dev));
 
-	if (!send_skb)
-		return;
-
-	skb_reset_network_header(send_skb);
-	arp = (struct arphdr *) skb_put(send_skb, size);
-	send_skb->dev = skb->dev;
-	send_skb->protocol = htons(ETH_P_ARP);
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (tip != np->local_ip)
+			continue;
 
-	/* Fill the device header for the ARP frame */
-	if (dev_hard_header(send_skb, skb->dev, ptype,
-			    sha, np->dev->dev_addr,
-			    send_skb->len) < 0) {
-		kfree_skb(send_skb);
-		return;
-	}
+		send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
+				    LL_RESERVED_SPACE(np->dev));
+		if (!send_skb)
+			continue;
 
-	/*
-	 * Fill out the arp protocol part.
-	 *
-	 * we only support ethernet device type,
-	 * which (according to RFC 1390) should always equal 1 (Ethernet).
-	 */
+		skb_reset_network_header(send_skb);
+		arp = (struct arphdr *) skb_put(send_skb, size);
+		send_skb->dev = skb->dev;
+		send_skb->protocol = htons(ETH_P_ARP);
 
-	arp->ar_hrd = htons(np->dev->type);
-	arp->ar_pro = htons(ETH_P_IP);
-	arp->ar_hln = np->dev->addr_len;
-	arp->ar_pln = 4;
-	arp->ar_op = htons(type);
+		/* Fill the device header for the ARP frame */
+		if (dev_hard_header(send_skb, skb->dev, ptype,
+				    sha, np->dev->dev_addr,
+				    send_skb->len) < 0) {
+			kfree_skb(send_skb);
+			continue;
+		}
 
-	arp_ptr=(unsigned char *)(arp + 1);
-	memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
-	arp_ptr += np->dev->addr_len;
-	memcpy(arp_ptr, &tip, 4);
-	arp_ptr += 4;
-	memcpy(arp_ptr, sha, np->dev->addr_len);
-	arp_ptr += np->dev->addr_len;
-	memcpy(arp_ptr, &sip, 4);
+		/*
+		 * Fill out the arp protocol part.
+		 *
+		 * we only support ethernet device type,
+		 * which (according to RFC 1390) should
+		 * always equal 1 (Ethernet).
+		 */
 
-	netpoll_send_skb(np, send_skb);
+		arp->ar_hrd = htons(np->dev->type);
+		arp->ar_pro = htons(ETH_P_IP);
+		arp->ar_hln = np->dev->addr_len;
+		arp->ar_pln = 4;
+		arp->ar_op = htons(type);
+
+		arp_ptr = (unsigned char *)(arp + 1);
+		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &tip, 4);
+		arp_ptr += 4;
+		memcpy(arp_ptr, sha, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &sip, 4);
+
+		netpoll_send_skb(np, send_skb);
+
+		/* If there are several rx_hooks for the same address,
+		   we're fine by sending a single reply */
+		break;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
 int __netpoll_rx(struct sk_buff *skb)
 {
 	int proto, len, ulen;
+	int hits = 0;
 	struct iphdr *iph;
 	struct udphdr *uh;
-	struct netpoll_info *npi = skb->dev->npinfo;
-	struct netpoll *np = npi->rx_np;
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll *np, *tmp;
 
-	if (!np)
+	if (list_empty(&npinfo->rx_np))
 		goto out;
+
 	if (skb->dev->type != ARPHRD_ETHER)
 		goto out;
 
 	/* check if netpoll clients need ARP */
 	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
-		skb_queue_tail(&npi->arp_tx, skb);
+		skb_queue_tail(&npinfo->arp_tx, skb);
 		return 1;
 	}
 
@@ -551,16 +578,23 @@ int __netpoll_rx(struct sk_buff *skb)
 		goto out;
 	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 		goto out;
-	if (np->local_ip && np->local_ip != iph->daddr)
-		goto out;
-	if (np->remote_ip && np->remote_ip != iph->saddr)
-		goto out;
-	if (np->local_port && np->local_port != ntohs(uh->dest))
-		goto out;
 
-	np->rx_hook(np, ntohs(uh->source),
-		    (char *)(uh+1),
-		    ulen - sizeof(struct udphdr));
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->local_ip && np->local_ip != iph->daddr)
+			continue;
+		if (np->remote_ip && np->remote_ip != iph->saddr)
+			continue;
+		if (np->local_port && np->local_port != ntohs(uh->dest))
+			continue;
+
+		np->rx_hook(np, ntohs(uh->source),
+			       (char *)(uh+1),
+			       ulen - sizeof(struct udphdr));
+		hits++;
+	}
+
+	if (!hits)
+		goto out;
 
 	kfree_skb(skb);
 	return 1;
@@ -684,6 +718,7 @@ int netpoll_setup(struct netpoll *np)
 	struct net_device *ndev = NULL;
 	struct in_device *in_dev;
 	struct netpoll_info *npinfo;
+	struct netpoll *npe, *tmp;
 	unsigned long flags;
 	int err;
 
@@ -704,7 +739,7 @@ int netpoll_setup(struct netpoll *np)
 		}
 
 		npinfo->rx_flags = 0;
-		npinfo->rx_np = NULL;
+		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
 		skb_queue_head_init(&npinfo->arp_tx);
@@ -785,7 +820,7 @@ int netpoll_setup(struct netpoll *np)
 	if (np->rx_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
 		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
-		npinfo->rx_np = np;
+		list_add_tail(&np->rx, &npinfo->rx_np);
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
@@ -801,9 +836,16 @@ int netpoll_setup(struct netpoll *np)
 	return 0;
 
  release:
-	if (!ndev->npinfo)
+	if (!ndev->npinfo) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
+			npe->dev = NULL;
+		}
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+
 		kfree(npinfo);
-	np->dev = NULL;
+	}
+
 	dev_put(ndev);
 	return err;
 }
@@ -823,10 +865,11 @@ void netpoll_cleanup(struct netpoll *np)
 	if (np->dev) {
 		npinfo = np->dev->npinfo;
 		if (npinfo) {
-			if (npinfo->rx_np == np) {
+			if (!list_empty(&npinfo->rx_np)) {
 				spin_lock_irqsave(&npinfo->rx_lock, flags);
-				npinfo->rx_np = NULL;
-				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+				list_del(&np->rx);
+				if (list_empty(&npinfo->rx_np))
+					npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
 				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 			}
 
-- 
cgit v1.2.3


From 9a58a80a701bdb2d220cdab4914218df5b48d781 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 14 Jan 2010 03:10:54 -0800
Subject: proc_fops: convert drivers/isdn/ to seq_file

Convert code away from ->read_proc/->write_proc interfaces.  Switch to
proc_create()/proc_create_data() which make addition of proc entries
reliable wrt NULL ->proc_fops, NULL ->data and so on.

Problem with ->read_proc et al is described here commit
786d7e1612f0b0adb6046f19b906609e4fe8b1ba "Fix rmmod/read/write races in
/proc entries"

[akpm@linux-foundation.org: CONFIG_PROC_FS=n build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/isdn/INTERFACE.CAPI       |   9 +-
 drivers/isdn/capi/capi.c                |  99 ++++++----------
 drivers/isdn/capi/capidrv.c             |  55 +++------
 drivers/isdn/capi/kcapi.c               |   8 +-
 drivers/isdn/gigaset/capi.c             |  75 ++++++------
 drivers/isdn/hardware/avm/avmcard.h     |   6 +-
 drivers/isdn/hardware/avm/b1.c          |  54 +++++----
 drivers/isdn/hardware/avm/b1dma.c       |  71 ++++++------
 drivers/isdn/hardware/avm/b1isa.c       |   2 +-
 drivers/isdn/hardware/avm/b1pci.c       |   4 +-
 drivers/isdn/hardware/avm/b1pcmcia.c    |   2 +-
 drivers/isdn/hardware/avm/c4.c          |  53 +++++----
 drivers/isdn/hardware/avm/t1isa.c       |   2 +-
 drivers/isdn/hardware/avm/t1pci.c       |   2 +-
 drivers/isdn/hardware/eicon/capimain.c  |  40 ++++---
 drivers/isdn/hardware/eicon/diva_didd.c |  45 ++++----
 drivers/isdn/hardware/eicon/divasi.c    |  48 ++++----
 drivers/isdn/hardware/eicon/divasproc.c | 198 ++++++++++++++------------------
 drivers/isdn/hysdn/hycapi.c             |  56 ++++-----
 include/linux/isdn/capilli.h            |   3 +-
 net/bluetooth/cmtp/capi.c               |  37 +++---
 21 files changed, 411 insertions(+), 458 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
index 5fe8de5cc727..f172091fb7cd 100644
--- a/Documentation/isdn/INTERFACE.CAPI
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -149,10 +149,11 @@ char *(*procinfo)(struct capi_ctr *ctrlr)
 	pointer to a callback function returning the entry for the device in
 	the CAPI controller info table, /proc/capi/controller
 
-read_proc_t *ctr_read_proc
-	pointer to the read_proc callback function for the device's proc file
-	system entry, /proc/capi/controllers/<n>; will be called with a
-	pointer to the device's capi_ctr structure as the last (data) argument
+const struct file_operations *proc_fops
+	pointers to callback functions for the device's proc file
+	system entry, /proc/capi/controllers/<n>; pointer to the device's
+	capi_ctr structure is available from struct proc_dir_entry::data
+	which is available from struct inode.
 
 Note: Callback functions except send_message() are never called in interrupt
 context.
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 65bf91e16a42..79f9364aded6 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -33,6 +33,7 @@
 #endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/poll.h>
 #include <linux/capi.h>
 #include <linux/kernelcapi.h>
@@ -1407,114 +1408,84 @@ static void capinc_tty_exit(void)
  * /proc/capi/capi20:
  *  minor applid nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
  */
-static int proc_capidev_read_proc(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data)
+static int capi20_proc_show(struct seq_file *m, void *v)
 {
         struct capidev *cdev;
 	struct list_head *l;
-	int len = 0;
 
 	read_lock(&capidev_list_lock);
 	list_for_each(l, &capidev_list) {
 		cdev = list_entry(l, struct capidev, list);
-		len += sprintf(page+len, "0 %d %lu %lu %lu %lu\n",
+		seq_printf(m, "0 %d %lu %lu %lu %lu\n",
 			cdev->ap.applid,
 			cdev->ap.nrecvctlpkt,
 			cdev->ap.nrecvdatapkt,
 			cdev->ap.nsentctlpkt,
 			cdev->ap.nsentdatapkt);
-		if (len <= off) {
-			off -= len;
-			len = 0;
-		} else {
-			if (len-off > count)
-				goto endloop;
-		}
 	}
-
-endloop:
 	read_unlock(&capidev_list_lock);
-	if (len < count)
-		*eof = 1;
-	if (len > count) len = count;
-	if (len < 0) len = 0;
-	return len;
+	return 0;
 }
 
+static int capi20_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, capi20_proc_show, NULL);
+}
+
+static const struct file_operations capi20_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= capi20_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /*
  * /proc/capi/capi20ncci:
  *  applid ncci
  */
-static int proc_capincci_read_proc(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data)
+static int capi20ncci_proc_show(struct seq_file *m, void *v)
 {
         struct capidev *cdev;
         struct capincci *np;
 	struct list_head *l;
-	int len = 0;
 
 	read_lock(&capidev_list_lock);
 	list_for_each(l, &capidev_list) {
 		cdev = list_entry(l, struct capidev, list);
 		for (np=cdev->nccis; np; np = np->next) {
-			len += sprintf(page+len, "%d 0x%x\n",
+			seq_printf(m, "%d 0x%x\n",
 				       cdev->ap.applid,
 				       np->ncci);
-			if (len <= off) {
-				off -= len;
-				len = 0;
-			} else {
-				if (len-off > count)
-					goto endloop;
-			}
 		}
 	}
-endloop:
 	read_unlock(&capidev_list_lock);
-	*start = page+off;
-	if (len < count)
-		*eof = 1;
-	if (len>count) len = count;
-	if (len<0) len = 0;
-	return len;
+	return 0;
 }
 
-static struct procfsentries {
-  char *name;
-  mode_t mode;
-  int (*read_proc)(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data);
-  struct proc_dir_entry *procent;
-} procfsentries[] = {
-   /* { "capi",		  S_IFDIR, 0 }, */
-   { "capi/capi20", 	  0	 , proc_capidev_read_proc },
-   { "capi/capi20ncci",   0	 , proc_capincci_read_proc },
+static int capi20ncci_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, capi20ncci_proc_show, NULL);
+}
+
+static const struct file_operations capi20ncci_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= capi20ncci_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 
 static void __init proc_init(void)
 {
-    int nelem = ARRAY_SIZE(procfsentries);
-    int i;
-
-    for (i=0; i < nelem; i++) {
-        struct procfsentries *p = procfsentries + i;
-	p->procent = create_proc_entry(p->name, p->mode, NULL);
-	if (p->procent) p->procent->read_proc = p->read_proc;
-    }
+	proc_create("capi/capi20", 0, NULL, &capi20_proc_fops);
+	proc_create("capi/capi20ncci", 0, NULL, &capi20ncci_proc_fops);
 }
 
 static void __exit proc_exit(void)
 {
-    int nelem = ARRAY_SIZE(procfsentries);
-    int i;
-
-    for (i=nelem-1; i >= 0; i--) {
-        struct procfsentries *p = procfsentries + i;
-	if (p->procent) {
-	   remove_proc_entry(p->name, NULL);
-	   p->procent = NULL;
-	}
-    }
+	remove_proc_entry("capi/capi20", NULL);
+	remove_proc_entry("capi/capi20ncci", NULL);
 }
 
 /* -------- init function and module interface ---------------------- */
diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index 66b7d7a86474..bb450152fb74 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -24,6 +24,7 @@
 #include <linux/isdn.h>
 #include <linux/isdnif.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/capi.h>
 #include <linux/kernelcapi.h>
 #include <linux/ctype.h>
@@ -2229,59 +2230,37 @@ static void lower_callback(unsigned int cmd, u32 contr, void *data)
  * /proc/capi/capidrv:
  * nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
  */
-static int proc_capidrv_read_proc(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data)
+static int capidrv_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
-
-	len += sprintf(page+len, "%lu %lu %lu %lu\n",
+	seq_printf(m, "%lu %lu %lu %lu\n",
 			global.ap.nrecvctlpkt,
 			global.ap.nrecvdatapkt,
 			global.ap.nsentctlpkt,
 			global.ap.nsentdatapkt);
-	if (off+count >= len)
-	   *eof = 1;
-	if (len < off)
-           return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	return 0;
+}
+
+static int capidrv_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, capidrv_proc_show, NULL);
 }
 
-static struct procfsentries {
-  char *name;
-  mode_t mode;
-  int (*read_proc)(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data);
-  struct proc_dir_entry *procent;
-} procfsentries[] = {
-   /* { "capi",		  S_IFDIR, 0 }, */
-   { "capi/capidrv", 	  0	 , proc_capidrv_read_proc },
+static const struct file_operations capidrv_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= capidrv_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 
 static void __init proc_init(void)
 {
-    int nelem = ARRAY_SIZE(procfsentries);
-    int i;
-
-    for (i=0; i < nelem; i++) {
-        struct procfsentries *p = procfsentries + i;
-	p->procent = create_proc_entry(p->name, p->mode, NULL);
-	if (p->procent) p->procent->read_proc = p->read_proc;
-    }
+	proc_create("capi/capidrv", 0, NULL, &capidrv_proc_fops);
 }
 
 static void __exit proc_exit(void)
 {
-    int nelem = ARRAY_SIZE(procfsentries);
-    int i;
-
-    for (i=nelem-1; i >= 0; i--) {
-        struct procfsentries *p = procfsentries + i;
-	if (p->procent) {
-	   remove_proc_entry(p->name, NULL);
-	   p->procent = NULL;
-	}
-    }
+	remove_proc_entry("capi/capidrv", NULL);
 }
 
 static int __init capidrv_init(void)
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index dc506ab99cac..b0bacf377c18 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -490,13 +490,7 @@ attach_capi_ctr(struct capi_ctr *card)
 	card->traceflag = showcapimsgs;
 
 	sprintf(card->procfn, "capi/controllers/%d", card->cnr);
-	card->procent = create_proc_entry(card->procfn, 0, NULL);
-	if (card->procent) {
-	   card->procent->read_proc = 
-		(int (*)(char *,char **,off_t,int,int *,void *))
-			card->ctr_read_proc;
-	   card->procent->data = card;
-	}
+	card->procent = proc_create_data(card->procfn, 0, NULL, card->proc_fops, card);
 
 	ncards++;
 	printk(KERN_NOTICE "kcapi: Controller [%03d]: %s attached\n",
diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c
index 3f5cd06af104..6f0ae32906bf 100644
--- a/drivers/isdn/gigaset/capi.c
+++ b/drivers/isdn/gigaset/capi.c
@@ -13,6 +13,8 @@
 
 #include "gigaset.h"
 #include <linux/ctype.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/isdn/capilli.h>
 #include <linux/isdn/capicmd.h>
 #include <linux/isdn/capiutil.h>
@@ -2106,35 +2108,22 @@ static char *gigaset_procinfo(struct capi_ctr *ctr)
 	return ctr->name;	/* ToDo: more? */
 }
 
-/**
- * gigaset_ctr_read_proc() - build controller proc file entry
- * @page:	buffer of PAGE_SIZE bytes for receiving the entry.
- * @start:	unused.
- * @off:	unused.
- * @count:	unused.
- * @eof:	unused.
- * @ctr:	controller descriptor structure.
- *
- * Return value: length of generated entry
- */
-static int gigaset_ctr_read_proc(char *page, char **start, off_t off,
-			  int count, int *eof, struct capi_ctr *ctr)
+static int gigaset_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctr = m->private;
 	struct cardstate *cs = ctr->driverdata;
 	char *s;
 	int i;
-	int len = 0;
-	len += sprintf(page+len, "%-16s %s\n", "name", ctr->name);
-	len += sprintf(page+len, "%-16s %s %s\n", "dev",
+
+	seq_printf(m, "%-16s %s\n", "name", ctr->name);
+	seq_printf(m, "%-16s %s %s\n", "dev",
 			dev_driver_string(cs->dev), dev_name(cs->dev));
-	len += sprintf(page+len, "%-16s %d\n", "id", cs->myid);
+	seq_printf(m, "%-16s %d\n", "id", cs->myid);
 	if (cs->gotfwver)
-		len += sprintf(page+len, "%-16s %d.%d.%d.%d\n", "firmware",
+		seq_printf(m, "%-16s %d.%d.%d.%d\n", "firmware",
 			cs->fwver[0], cs->fwver[1], cs->fwver[2], cs->fwver[3]);
-	len += sprintf(page+len, "%-16s %d\n", "channels",
-			cs->channels);
-	len += sprintf(page+len, "%-16s %s\n", "onechannel",
-			cs->onechannel ? "yes" : "no");
+	seq_printf(m, "%-16s %d\n", "channels", cs->channels);
+	seq_printf(m, "%-16s %s\n", "onechannel", cs->onechannel ? "yes" : "no");
 
 	switch (cs->mode) {
 	case M_UNKNOWN:
@@ -2152,7 +2141,7 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off,
 	default:
 		s = "??";
 	}
-	len += sprintf(page+len, "%-16s %s\n", "mode", s);
+	seq_printf(m, "%-16s %s\n", "mode", s);
 
 	switch (cs->mstate) {
 	case MS_UNINITIALIZED:
@@ -2176,25 +2165,21 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off,
 	default:
 		s = "??";
 	}
-	len += sprintf(page+len, "%-16s %s\n", "mstate", s);
+	seq_printf(m, "%-16s %s\n", "mstate", s);
 
-	len += sprintf(page+len, "%-16s %s\n", "running",
-			cs->running ? "yes" : "no");
-	len += sprintf(page+len, "%-16s %s\n", "connected",
-			cs->connected ? "yes" : "no");
-	len += sprintf(page+len, "%-16s %s\n", "isdn_up",
-			cs->isdn_up ? "yes" : "no");
-	len += sprintf(page+len, "%-16s %s\n", "cidmode",
-			cs->cidmode ? "yes" : "no");
+	seq_printf(m, "%-16s %s\n", "running", cs->running ? "yes" : "no");
+	seq_printf(m, "%-16s %s\n", "connected", cs->connected ? "yes" : "no");
+	seq_printf(m, "%-16s %s\n", "isdn_up", cs->isdn_up ? "yes" : "no");
+	seq_printf(m, "%-16s %s\n", "cidmode", cs->cidmode ? "yes" : "no");
 
 	for (i = 0; i < cs->channels; i++) {
-		len += sprintf(page+len, "[%d]%-13s %d\n", i, "corrupted",
+		seq_printf(m, "[%d]%-13s %d\n", i, "corrupted",
 				cs->bcs[i].corrupted);
-		len += sprintf(page+len, "[%d]%-13s %d\n", i, "trans_down",
+		seq_printf(m, "[%d]%-13s %d\n", i, "trans_down",
 				cs->bcs[i].trans_down);
-		len += sprintf(page+len, "[%d]%-13s %d\n", i, "trans_up",
+		seq_printf(m, "[%d]%-13s %d\n", i, "trans_up",
 				cs->bcs[i].trans_up);
-		len += sprintf(page+len, "[%d]%-13s %d\n", i, "chstate",
+		seq_printf(m, "[%d]%-13s %d\n", i, "chstate",
 				cs->bcs[i].chstate);
 		switch (cs->bcs[i].proto2) {
 		case L2_BITSYNC:
@@ -2209,11 +2194,23 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off,
 		default:
 			s = "??";
 		}
-		len += sprintf(page+len, "[%d]%-13s %s\n", i, "proto2", s);
+		seq_printf(m, "[%d]%-13s %s\n", i, "proto2", s);
 	}
-	return len;
+	return 0;
 }
 
+static int gigaset_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gigaset_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations gigaset_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= gigaset_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 static struct capi_driver capi_driver_gigaset = {
 	.name		= "gigaset",
@@ -2256,7 +2253,7 @@ int gigaset_isdn_register(struct cardstate *cs, const char *isdnid)
 	iif->ctr.release_appl  = gigaset_release_appl;
 	iif->ctr.send_message  = gigaset_send_message;
 	iif->ctr.procinfo      = gigaset_procinfo;
-	iif->ctr.ctr_read_proc = gigaset_ctr_read_proc;
+	iif->ctr.proc_fops = &gigaset_proc_fops;
 	INIT_LIST_HEAD(&iif->appls);
 	skb_queue_head_init(&iif->sendqueue);
 	atomic_set(&iif->sendqlen, 0);
diff --git a/drivers/isdn/hardware/avm/avmcard.h b/drivers/isdn/hardware/avm/avmcard.h
index d964f07e4a56..a70e8854461d 100644
--- a/drivers/isdn/hardware/avm/avmcard.h
+++ b/drivers/isdn/hardware/avm/avmcard.h
@@ -556,8 +556,7 @@ u16  b1_send_message(struct capi_ctr *ctrl, struct sk_buff *skb);
 void b1_parse_version(avmctrl_info *card);
 irqreturn_t b1_interrupt(int interrupt, void *devptr);
 
-int b1ctl_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl);
+extern const struct file_operations b1ctl_proc_fops;
 
 avmcard_dmainfo *avmcard_dma_alloc(char *name, struct pci_dev *,
 				   long rsize, long ssize);
@@ -577,7 +576,6 @@ void b1dma_register_appl(struct capi_ctr *ctrl,
 				capi_register_params *rp);
 void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl);
 u16  b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb);
-int b1dmactl_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl);
+extern const struct file_operations b1dmactl_proc_fops;
 
 #endif /* _AVMCARD_H_ */
diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c
index a7c0083e78a7..c38fa0f4c729 100644
--- a/drivers/isdn/hardware/avm/b1.c
+++ b/drivers/isdn/hardware/avm/b1.c
@@ -12,6 +12,8 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
@@ -634,18 +636,17 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr)
 }
 
 /* ------------------------------------------------------------- */
-int b1ctl_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl)
+static int b1ctl_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
 	avmcard *card = cinfo->card;
 	u8 flag;
-	int len = 0;
 	char *s;
 
-	len += sprintf(page+len, "%-16s %s\n", "name", card->name);
-	len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port);
-	len += sprintf(page+len, "%-16s %d\n", "irq", card->irq);
+	seq_printf(m, "%-16s %s\n", "name", card->name);
+	seq_printf(m, "%-16s 0x%x\n", "io", card->port);
+	seq_printf(m, "%-16s %d\n", "irq", card->irq);
 	switch (card->cardtype) {
 	case avm_b1isa: s = "B1 ISA"; break;
 	case avm_b1pci: s = "B1 PCI"; break;
@@ -658,20 +659,20 @@ int b1ctl_read_proc(char *page, char **start, off_t off,
 	case avm_c2: s = "C2"; break;
 	default: s = "???"; break;
 	}
-	len += sprintf(page+len, "%-16s %s\n", "type", s);
+	seq_printf(m, "%-16s %s\n", "type", s);
 	if (card->cardtype == avm_t1isa)
-	   len += sprintf(page+len, "%-16s %d\n", "cardnr", card->cardnr);
+		seq_printf(m, "%-16s %d\n", "cardnr", card->cardnr);
 	if ((s = cinfo->version[VER_DRIVER]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
+		seq_printf(m, "%-16s %s\n", "ver_driver", s);
 	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
+		seq_printf(m, "%-16s %s\n", "ver_cardtype", s);
 	if ((s = cinfo->version[VER_SERIAL]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
+		seq_printf(m, "%-16s %s\n", "ver_serial", s);
 
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[3];
         	if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s%s%s%s\n",
 			"protocol",
 			(flag & 0x01) ? " DSS1" : "",
 			(flag & 0x02) ? " CT1" : "",
@@ -685,7 +686,7 @@ int b1ctl_read_proc(char *page, char **start, off_t off,
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[5];
 		if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s\n",
 			"linetype",
 			(flag & 0x01) ? " point to point" : "",
 			(flag & 0x02) ? " point to multipoint" : "",
@@ -693,16 +694,25 @@ int b1ctl_read_proc(char *page, char **start, off_t off,
 			(flag & 0x04) ? " leased line with D-channel" : ""
 			);
 	}
-	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
-
-	if (off+count >= len)
-	   *eof = 1;
-	if (len < off)
-           return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname);
+
+	return 0;
+}
+
+static int b1ctl_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, b1ctl_proc_show, PDE(inode)->data);
 }
 
+const struct file_operations b1ctl_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= b1ctl_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+EXPORT_SYMBOL(b1ctl_proc_fops);
+
 /* ------------------------------------------------------------- */
 
 #ifdef CONFIG_PCI
@@ -781,8 +791,6 @@ EXPORT_SYMBOL(b1_send_message);
 EXPORT_SYMBOL(b1_parse_version);
 EXPORT_SYMBOL(b1_interrupt);
 
-EXPORT_SYMBOL(b1ctl_read_proc);
-
 static int __init b1_init(void)
 {
 	char *p;
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 0e84aaae43fd..124550d0dbf3 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
@@ -855,21 +857,20 @@ u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 
 /* ------------------------------------------------------------- */
 
-int b1dmactl_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl)
+static int b1dmactl_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
 	avmcard *card = cinfo->card;
 	u8 flag;
-	int len = 0;
 	char *s;
 	u32 txoff, txlen, rxoff, rxlen, csr;
 	unsigned long flags;
 
-	len += sprintf(page+len, "%-16s %s\n", "name", card->name);
-	len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port);
-	len += sprintf(page+len, "%-16s %d\n", "irq", card->irq);
-	len += sprintf(page+len, "%-16s 0x%lx\n", "membase", card->membase);
+	seq_printf(m, "%-16s %s\n", "name", card->name);
+	seq_printf(m, "%-16s 0x%x\n", "io", card->port);
+	seq_printf(m, "%-16s %d\n", "irq", card->irq);
+	seq_printf(m, "%-16s 0x%lx\n", "membase", card->membase);
 	switch (card->cardtype) {
 	case avm_b1isa: s = "B1 ISA"; break;
 	case avm_b1pci: s = "B1 PCI"; break;
@@ -882,18 +883,18 @@ int b1dmactl_read_proc(char *page, char **start, off_t off,
 	case avm_c2: s = "C2"; break;
 	default: s = "???"; break;
 	}
-	len += sprintf(page+len, "%-16s %s\n", "type", s);
+	seq_printf(m, "%-16s %s\n", "type", s);
 	if ((s = cinfo->version[VER_DRIVER]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
+		seq_printf(m, "%-16s %s\n", "ver_driver", s);
 	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
+		seq_printf(m, "%-16s %s\n", "ver_cardtype", s);
 	if ((s = cinfo->version[VER_SERIAL]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
+		seq_printf(m, "%-16s %s\n", "ver_serial", s);
 
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[3];
         	if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s%s%s%s\n",
 			"protocol",
 			(flag & 0x01) ? " DSS1" : "",
 			(flag & 0x02) ? " CT1" : "",
@@ -907,7 +908,7 @@ int b1dmactl_read_proc(char *page, char **start, off_t off,
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[5];
 		if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s\n",
 			"linetype",
 			(flag & 0x01) ? " point to point" : "",
 			(flag & 0x02) ? " point to multipoint" : "",
@@ -915,7 +916,7 @@ int b1dmactl_read_proc(char *page, char **start, off_t off,
 			(flag & 0x04) ? " leased line with D-channel" : ""
 			);
 	}
-	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
+	seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname);
 
 
 	spin_lock_irqsave(&card->lock, flags);
@@ -930,27 +931,30 @@ int b1dmactl_read_proc(char *page, char **start, off_t off,
 
 	spin_unlock_irqrestore(&card->lock, flags);
 
-        len += sprintf(page+len, "%-16s 0x%lx\n",
-				"csr (cached)", (unsigned long)card->csr);
-        len += sprintf(page+len, "%-16s 0x%lx\n",
-				"csr", (unsigned long)csr);
-        len += sprintf(page+len, "%-16s %lu\n",
-				"txoff", (unsigned long)txoff);
-        len += sprintf(page+len, "%-16s %lu\n",
-				"txlen", (unsigned long)txlen);
-        len += sprintf(page+len, "%-16s %lu\n",
-				"rxoff", (unsigned long)rxoff);
-        len += sprintf(page+len, "%-16s %lu\n",
-				"rxlen", (unsigned long)rxlen);
-
-	if (off+count >= len)
-	   *eof = 1;
-	if (len < off)
-           return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	seq_printf(m, "%-16s 0x%lx\n", "csr (cached)", (unsigned long)card->csr);
+	seq_printf(m, "%-16s 0x%lx\n", "csr", (unsigned long)csr);
+	seq_printf(m, "%-16s %lu\n", "txoff", (unsigned long)txoff);
+	seq_printf(m, "%-16s %lu\n", "txlen", (unsigned long)txlen);
+	seq_printf(m, "%-16s %lu\n", "rxoff", (unsigned long)rxoff);
+	seq_printf(m, "%-16s %lu\n", "rxlen", (unsigned long)rxlen);
+
+	return 0;
+}
+
+static int b1dmactl_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, b1dmactl_proc_show, PDE(inode)->data);
 }
 
+const struct file_operations b1dmactl_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= b1dmactl_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+EXPORT_SYMBOL(b1dmactl_proc_fops);
+
 /* ------------------------------------------------------------- */
 
 EXPORT_SYMBOL(b1dma_reset);
@@ -963,7 +967,6 @@ EXPORT_SYMBOL(b1dma_reset_ctr);
 EXPORT_SYMBOL(b1dma_register_appl);
 EXPORT_SYMBOL(b1dma_release_appl);
 EXPORT_SYMBOL(b1dma_send_message);
-EXPORT_SYMBOL(b1dmactl_read_proc);
 
 static int __init b1dma_init(void)
 {
diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c
index 6461a32bc838..ff5390546f92 100644
--- a/drivers/isdn/hardware/avm/b1isa.c
+++ b/drivers/isdn/hardware/avm/b1isa.c
@@ -121,7 +121,7 @@ static int b1isa_probe(struct pci_dev *pdev)
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1isa_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/b1pci.c b/drivers/isdn/hardware/avm/b1pci.c
index 5b314a2c4049..c97e4315079d 100644
--- a/drivers/isdn/hardware/avm/b1pci.c
+++ b/drivers/isdn/hardware/avm/b1pci.c
@@ -112,7 +112,7 @@ static int b1pci_probe(struct capicardparams *p, struct pci_dev *pdev)
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pci_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 	cinfo->capi_ctrl.owner         = THIS_MODULE;
 
@@ -251,7 +251,7 @@ static int b1pciv4_probe(struct capicardparams *p, struct pci_dev *pdev)
 	cinfo->capi_ctrl.load_firmware = b1dma_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1dma_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pciv4_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1dmactl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/b1pcmcia.c b/drivers/isdn/hardware/avm/b1pcmcia.c
index 7740403b40e1..d6391e0afeea 100644
--- a/drivers/isdn/hardware/avm/b1pcmcia.c
+++ b/drivers/isdn/hardware/avm/b1pcmcia.c
@@ -108,7 +108,7 @@ static int b1pcmcia_add_card(unsigned int port, unsigned irq,
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pcmcia_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 6833301a45fc..de6e6b311819 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
@@ -1062,19 +1064,18 @@ static char *c4_procinfo(struct capi_ctr *ctrl)
 	return cinfo->infobuf;
 }
 
-static int c4_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl)
+static int c4_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
 	avmcard *card = cinfo->card;
 	u8 flag;
-	int len = 0;
 	char *s;
 
-	len += sprintf(page+len, "%-16s %s\n", "name", card->name);
-	len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port);
-	len += sprintf(page+len, "%-16s %d\n", "irq", card->irq);
-	len += sprintf(page+len, "%-16s 0x%lx\n", "membase", card->membase);
+	seq_printf(m, "%-16s %s\n", "name", card->name);
+	seq_printf(m, "%-16s 0x%x\n", "io", card->port);
+	seq_printf(m, "%-16s %d\n", "irq", card->irq);
+	seq_printf(m, "%-16s 0x%lx\n", "membase", card->membase);
 	switch (card->cardtype) {
 	case avm_b1isa: s = "B1 ISA"; break;
 	case avm_b1pci: s = "B1 PCI"; break;
@@ -1087,18 +1088,18 @@ static int c4_read_proc(char *page, char **start, off_t off,
 	case avm_c2: s = "C2"; break;
 	default: s = "???"; break;
 	}
-	len += sprintf(page+len, "%-16s %s\n", "type", s);
+	seq_printf(m, "%-16s %s\n", "type", s);
 	if ((s = cinfo->version[VER_DRIVER]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
+		seq_printf(m, "%-16s %s\n", "ver_driver", s);
 	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
+		seq_printf(m, "%-16s %s\n", "ver_cardtype", s);
 	if ((s = cinfo->version[VER_SERIAL]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
+		seq_printf(m, "%-16s %s\n", "ver_serial", s);
 
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[3];
         	if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s%s%s%s\n",
 			"protocol",
 			(flag & 0x01) ? " DSS1" : "",
 			(flag & 0x02) ? " CT1" : "",
@@ -1112,7 +1113,7 @@ static int c4_read_proc(char *page, char **start, off_t off,
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[5];
 		if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s\n",
 			"linetype",
 			(flag & 0x01) ? " point to point" : "",
 			(flag & 0x02) ? " point to multipoint" : "",
@@ -1120,16 +1121,24 @@ static int c4_read_proc(char *page, char **start, off_t off,
 			(flag & 0x04) ? " leased line with D-channel" : ""
 			);
 	}
-	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
-
-	if (off+count >= len)
-	   *eof = 1;
-	if (len < off)
-           return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname);
+
+	return 0;
 }
 
+static int c4_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, c4_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations c4_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= c4_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /* ------------------------------------------------------------- */
 
 static int c4_add_card(struct capicardparams *p, struct pci_dev *dev,
@@ -1201,7 +1210,7 @@ static int c4_add_card(struct capicardparams *p, struct pci_dev *dev,
 		cinfo->capi_ctrl.load_firmware = c4_load_firmware;
 		cinfo->capi_ctrl.reset_ctr     = c4_reset_ctr;
 		cinfo->capi_ctrl.procinfo      = c4_procinfo;
-		cinfo->capi_ctrl.ctr_read_proc = c4_read_proc;
+		cinfo->capi_ctrl.proc_fops = &c4_proc_fops;
 		strcpy(cinfo->capi_ctrl.name, card->name);
 
 		retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index 1c53fd49adb6..baeeb3c2a3ee 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -429,7 +429,7 @@ static int t1isa_probe(struct pci_dev *pdev, int cardnr)
 	cinfo->capi_ctrl.load_firmware = t1isa_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = t1isa_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = t1isa_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/t1pci.c b/drivers/isdn/hardware/avm/t1pci.c
index e6d298d75146..5a3f83098018 100644
--- a/drivers/isdn/hardware/avm/t1pci.c
+++ b/drivers/isdn/hardware/avm/t1pci.c
@@ -119,7 +119,7 @@ static int t1pci_add_card(struct capicardparams *p, struct pci_dev *pdev)
 	cinfo->capi_ctrl.load_firmware = b1dma_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1dma_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = t1pci_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1dmactl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c
index 98fcdfc7ca55..0f073cd73763 100644
--- a/drivers/isdn/hardware/eicon/capimain.c
+++ b/drivers/isdn/hardware/eicon/capimain.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <asm/uaccess.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 
 #include "os_capi.h"
@@ -75,25 +76,32 @@ void diva_os_free_message_buffer(diva_os_message_buffer_s * dmb)
 /*
  * proc function for controller info
  */
-static int diva_ctl_read_proc(char *page, char **start, off_t off,
-			      int count, int *eof, struct capi_ctr *ctrl)
+static int diva_ctl_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	diva_card *card = (diva_card *) ctrl->driverdata;
-	int len = 0;
-
-	len += sprintf(page + len, "%s\n", ctrl->name);
-	len += sprintf(page + len, "Serial No. : %s\n", ctrl->serial);
-	len += sprintf(page + len, "Id         : %d\n", card->Id);
-	len += sprintf(page + len, "Channels   : %d\n", card->d.channels);
-
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+
+	seq_printf(m, "%s\n", ctrl->name);
+	seq_printf(m, "Serial No. : %s\n", ctrl->serial);
+	seq_printf(m, "Id         : %d\n", card->Id);
+	seq_printf(m, "Channels   : %d\n", card->d.channels);
+
+	return 0;
+}
+
+static int diva_ctl_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, diva_ctl_proc_show, NULL);
 }
 
+static const struct file_operations diva_ctl_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= diva_ctl_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /*
  * set additional os settings in capi_ctr struct
  */
@@ -102,7 +110,7 @@ void diva_os_set_controller_struct(struct capi_ctr *ctrl)
 	ctrl->driver_name = DRIVERLNAME;
 	ctrl->load_firmware = NULL;
 	ctrl->reset_ctr = NULL;
-	ctrl->ctr_read_proc = diva_ctl_read_proc;
+	ctrl->proc_fops = &diva_ctl_proc_fops;
 	ctrl->owner = THIS_MODULE;
 }
 
diff --git a/drivers/isdn/hardware/eicon/diva_didd.c b/drivers/isdn/hardware/eicon/diva_didd.c
index 993b14cf1778..5d06a7437824 100644
--- a/drivers/isdn/hardware/eicon/diva_didd.c
+++ b/drivers/isdn/hardware/eicon/diva_didd.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <net/net_namespace.h>
 
 #include "platform.h"
@@ -62,39 +63,41 @@ static char *getrev(const char *revision)
 	return rev;
 }
 
-static int
-proc_read(char *page, char **start, off_t off, int count, int *eof,
-	  void *data)
+static int divadidd_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	char tmprev[32];
 
 	strcpy(tmprev, main_revision);
-	len += sprintf(page + len, "%s\n", DRIVERNAME);
-	len += sprintf(page + len, "name     : %s\n", DRIVERLNAME);
-	len += sprintf(page + len, "release  : %s\n", DRIVERRELEASE_DIDD);
-	len += sprintf(page + len, "build    : %s(%s)\n",
+	seq_printf(m, "%s\n", DRIVERNAME);
+	seq_printf(m, "name     : %s\n", DRIVERLNAME);
+	seq_printf(m, "release  : %s\n", DRIVERRELEASE_DIDD);
+	seq_printf(m, "build    : %s(%s)\n",
 		       diva_didd_common_code_build, DIVA_BUILD);
-	len += sprintf(page + len, "revision : %s\n", getrev(tmprev));
-
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+	seq_printf(m, "revision : %s\n", getrev(tmprev));
+
+	return 0;
 }
 
+static int divadidd_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, divadidd_proc_show, NULL);
+}
+
+static const struct file_operations divadidd_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= divadidd_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int DIVA_INIT_FUNCTION create_proc(void)
 {
 	proc_net_eicon = proc_mkdir("eicon", init_net.proc_net);
 
 	if (proc_net_eicon) {
-		if ((proc_didd =
-		     create_proc_entry(DRIVERLNAME, S_IFREG | S_IRUGO,
-				       proc_net_eicon))) {
-			proc_didd->read_proc = proc_read;
-		}
+		proc_didd = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon,
+					&divadidd_proc_fops);
 		return (1);
 	}
 	return (0);
diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c
index 69e71ebe7841..f577719ab3fa 100644
--- a/drivers/isdn/hardware/eicon/divasi.c
+++ b/drivers/isdn/hardware/eicon/divasi.c
@@ -17,6 +17,7 @@
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
+#include <linux/seq_file.h>
 #include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
@@ -86,39 +87,40 @@ static void diva_um_timer_function(unsigned long data);
 extern struct proc_dir_entry *proc_net_eicon;
 static struct proc_dir_entry *um_idi_proc_entry = NULL;
 
-static int
-um_idi_proc_read(char *page, char **start, off_t off, int count, int *eof,
-		 void *data)
+static int um_idi_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	char tmprev[32];
 
-	len += sprintf(page + len, "%s\n", DRIVERNAME);
-	len += sprintf(page + len, "name     : %s\n", DRIVERLNAME);
-	len += sprintf(page + len, "release  : %s\n", DRIVERRELEASE_IDI);
+	seq_printf(m, "%s\n", DRIVERNAME);
+	seq_printf(m, "name     : %s\n", DRIVERLNAME);
+	seq_printf(m, "release  : %s\n", DRIVERRELEASE_IDI);
 	strcpy(tmprev, main_revision);
-	len += sprintf(page + len, "revision : %s\n", getrev(tmprev));
-	len += sprintf(page + len, "build    : %s\n", DIVA_BUILD);
-	len += sprintf(page + len, "major    : %d\n", major);
-
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+	seq_printf(m, "revision : %s\n", getrev(tmprev));
+	seq_printf(m, "build    : %s\n", DIVA_BUILD);
+	seq_printf(m, "major    : %d\n", major);
+
+	return 0;
+}
+
+static int um_idi_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, um_idi_proc_show, NULL);
 }
 
+static const struct file_operations um_idi_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= um_idi_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int DIVA_INIT_FUNCTION create_um_idi_proc(void)
 {
-	um_idi_proc_entry = create_proc_entry(DRIVERLNAME,
-					      S_IFREG | S_IRUGO | S_IWUSR,
-					      proc_net_eicon);
+	um_idi_proc_entry = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon,
+					&um_idi_proc_fops);
 	if (!um_idi_proc_entry)
 		return (0);
-
-	um_idi_proc_entry->read_proc = um_idi_proc_read;
-
 	return (1);
 }
 
diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c
index 040827288ec9..46d44a942624 100644
--- a/drivers/isdn/hardware/eicon/divasproc.c
+++ b/drivers/isdn/hardware/eicon/divasproc.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/list.h>
 #include <asm/uaccess.h>
 
@@ -141,14 +142,10 @@ void remove_divas_proc(void)
 	}
 }
 
-/*
-** write group_optimization 
-*/
-static int
-write_grp_opt(struct file *file, const char __user *buffer, unsigned long count,
-	      void *data)
+static ssize_t grp_opt_proc_write(struct file *file, const char __user *buffer,
+				  size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
 	if ((count == 1) || (count == 2)) {
@@ -172,14 +169,10 @@ write_grp_opt(struct file *file, const char __user *buffer, unsigned long count,
 	return (-EINVAL);
 }
 
-/*
-** write dynamic_l1_down
-*/
-static int
-write_d_l1_down(struct file *file, const char __user *buffer, unsigned long count,
-		void *data)
+static ssize_t d_l1_down_proc_write(struct file *file, const char __user *buffer,
+				    size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
 	if ((count == 1) || (count == 2)) {
@@ -203,63 +196,62 @@ write_d_l1_down(struct file *file, const char __user *buffer, unsigned long coun
 	return (-EINVAL);
 }
 
-
-/*
-** read dynamic_l1_down 
-*/
-static int
-read_d_l1_down(char *page, char **start, off_t off, int count, int *eof,
-	       void *data)
+static int d_l1_down_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = m->private;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
-	len += sprintf(page + len, "%s\n",
+	seq_printf(m, "%s\n",
 		       (IoAdapter->capi_cfg.
 			cfg_1 & DIVA_XDI_CAPI_CFG_1_DYNAMIC_L1_ON) ? "1" :
 		       "0");
+	return 0;
+}
 
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+static int d_l1_down_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, d_l1_down_proc_show, PDE(inode)->data);
 }
 
-/*
-** read group_optimization
-*/
-static int
-read_grp_opt(char *page, char **start, off_t off, int count, int *eof,
-	     void *data)
+static const struct file_operations d_l1_down_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= d_l1_down_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= d_l1_down_proc_write,
+};
+
+static int grp_opt_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = m->private;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
-	len += sprintf(page + len, "%s\n",
+	seq_printf(m, "%s\n",
 		       (IoAdapter->capi_cfg.
 			cfg_1 & DIVA_XDI_CAPI_CFG_1_GROUP_POPTIMIZATION_ON)
 		       ? "1" : "0");
+	return 0;
+}
 
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+static int grp_opt_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, grp_opt_proc_show, PDE(inode)->data);
 }
 
-/*
-** info write
-*/
-static int
-info_write(struct file *file, const char __user *buffer, unsigned long count,
-	   void *data)
+static const struct file_operations grp_opt_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= grp_opt_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= grp_opt_proc_write,
+};
+
+static ssize_t info_proc_write(struct file *file, const char __user *buffer,
+			       size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 	char c[4];
 
@@ -277,63 +269,46 @@ info_write(struct file *file, const char __user *buffer, unsigned long count,
 	return (-EINVAL);
 }
 
-/*
-** info read
-*/
-static int
-info_read(char *page, char **start, off_t off, int count, int *eof,
-	  void *data)
+static int info_proc_show(struct seq_file *m, void *v)
 {
 	int i = 0;
-	int len = 0;
 	char *p;
 	char tmpser[16];
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = m->private;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
-	len +=
-	    sprintf(page + len, "Name        : %s\n",
-		    IoAdapter->Properties.Name);
-	len += sprintf(page + len, "DSP state   : %08x\n", a->dsp_mask);
-	len += sprintf(page + len, "Channels    : %02d\n",
-		       IoAdapter->Properties.Channels);
-	len += sprintf(page + len, "E. max/used : %03d/%03d\n",
+	seq_printf(m, "Name        : %s\n", IoAdapter->Properties.Name);
+	seq_printf(m, "DSP state   : %08x\n", a->dsp_mask);
+	seq_printf(m, "Channels    : %02d\n", IoAdapter->Properties.Channels);
+	seq_printf(m, "E. max/used : %03d/%03d\n",
 		       IoAdapter->e_max, IoAdapter->e_count);
 	diva_get_vserial_number(IoAdapter, tmpser);
-	len += sprintf(page + len, "Serial      : %s\n", tmpser);
-	len +=
-	    sprintf(page + len, "IRQ         : %d\n",
-		    IoAdapter->irq_info.irq_nr);
-	len += sprintf(page + len, "CardIndex   : %d\n", a->CardIndex);
-	len += sprintf(page + len, "CardOrdinal : %d\n", a->CardOrdinal);
-	len += sprintf(page + len, "Controller  : %d\n", a->controller);
-	len += sprintf(page + len, "Bus-Type    : %s\n",
+	seq_printf(m, "Serial      : %s\n", tmpser);
+	seq_printf(m, "IRQ         : %d\n", IoAdapter->irq_info.irq_nr);
+	seq_printf(m, "CardIndex   : %d\n", a->CardIndex);
+	seq_printf(m, "CardOrdinal : %d\n", a->CardOrdinal);
+	seq_printf(m, "Controller  : %d\n", a->controller);
+	seq_printf(m, "Bus-Type    : %s\n",
 		       (a->Bus ==
 			DIVAS_XDI_ADAPTER_BUS_ISA) ? "ISA" : "PCI");
-	len += sprintf(page + len, "Port-Name   : %s\n", a->port_name);
+	seq_printf(m, "Port-Name   : %s\n", a->port_name);
 	if (a->Bus == DIVAS_XDI_ADAPTER_BUS_PCI) {
-		len +=
-		    sprintf(page + len, "PCI-bus     : %d\n",
-			    a->resources.pci.bus);
-		len +=
-		    sprintf(page + len, "PCI-func    : %d\n",
-			    a->resources.pci.func);
+		seq_printf(m, "PCI-bus     : %d\n", a->resources.pci.bus);
+		seq_printf(m, "PCI-func    : %d\n", a->resources.pci.func);
 		for (i = 0; i < 8; i++) {
 			if (a->resources.pci.bar[i]) {
-				len +=
-				    sprintf(page + len,
+				seq_printf(m,
 					    "Mem / I/O %d : 0x%x / mapped : 0x%lx",
 					    i, a->resources.pci.bar[i],
 					    (unsigned long) a->resources.
 					    pci.addr[i]);
 				if (a->resources.pci.length[i]) {
-					len +=
-					    sprintf(page + len,
+					seq_printf(m,
 						    " / length : %d",
 						    a->resources.pci.
 						    length[i]);
 				}
-				len += sprintf(page + len, "\n");
+				seq_putc(m, '\n');
 			}
 		}
 	}
@@ -353,16 +328,25 @@ info_read(char *page, char **start, off_t off, int count, int *eof,
 	} else {
 		p = "ready";
 	}
-	len += sprintf(page + len, "State       : %s\n", p);
+	seq_printf(m, "State       : %s\n", p);
 
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+	return 0;
+}
+
+static int info_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, info_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations info_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= info_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= info_proc_write,
+};
+
 /*
 ** adapter proc init/de-init
 */
@@ -380,28 +364,20 @@ int create_adapter_proc(diva_os_xdi_adapter_t * a)
 		return (0);
 	a->proc_adapter_dir = (void *) de;
 
-	if (!(pe =
-	     create_proc_entry(info_proc_name, S_IFREG | S_IRUGO | S_IWUSR, de)))
+	pe = proc_create_data(info_proc_name, S_IRUGO | S_IWUSR, de,
+			      &info_proc_fops, a);
+	if (!pe)
 		return (0);
 	a->proc_info = (void *) pe;
-	pe->write_proc = info_write;
-	pe->read_proc = info_read;
-	pe->data = a;
 
-	if ((pe = create_proc_entry(grp_opt_proc_name,
-			       S_IFREG | S_IRUGO | S_IWUSR, de))) {
+	pe = proc_create_data(grp_opt_proc_name, S_IRUGO | S_IWUSR, de,
+			      &grp_opt_proc_fops, a);
+	if (pe)
 		a->proc_grp_opt = (void *) pe;
-		pe->write_proc = write_grp_opt;
-		pe->read_proc = read_grp_opt;
-		pe->data = a;
-	}
-	if ((pe = create_proc_entry(d_l1_down_proc_name,
-			       S_IFREG | S_IRUGO | S_IWUSR, de))) {
+	pe = proc_create_data(d_l1_down_proc_name, S_IRUGO | S_IWUSR, de,
+			      &d_l1_down_proc_fops, a);
+	if (pe)
 		a->proc_d_l1_down = (void *) pe;
-		pe->write_proc = write_d_l1_down;
-		pe->read_proc = read_d_l1_down;
-		pe->data = a;
-	}
 
 	DBG_TRC(("proc entry %s created", tmp));
 
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index 4ffaa14b9fc4..fe874afa4f81 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -11,6 +11,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
@@ -432,26 +434,16 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 	return retval;
 }
 
-/*********************************************************************
-hycapi_read_proc
-
-Informations provided in the /proc/capi-entries.
-
-*********************************************************************/
-
-static int hycapi_read_proc(char *page, char **start, off_t off,
-			    int count, int *eof, struct capi_ctr *ctrl)
+static int hycapi_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	hycapictrl_info *cinfo = (hycapictrl_info *)(ctrl->driverdata);
 	hysdn_card *card = cinfo->card;
-	int len = 0;
 	char *s;
-#ifdef HYCAPI_PRINTFNAMES
-	printk(KERN_NOTICE "hycapi_read_proc\n");    
-#endif
-	len += sprintf(page+len, "%-16s %s\n", "name", cinfo->cardname);
-	len += sprintf(page+len, "%-16s 0x%x\n", "io", card->iobase);
-	len += sprintf(page+len, "%-16s %d\n", "irq", card->irq);
+
+	seq_printf(m, "%-16s %s\n", "name", cinfo->cardname);
+	seq_printf(m, "%-16s 0x%x\n", "io", card->iobase);
+	seq_printf(m, "%-16s %d\n", "irq", card->irq);
     
 	switch (card->brdtype) {
 		case BD_PCCARD:  s = "HYSDN Hycard"; break;
@@ -461,24 +453,32 @@ static int hycapi_read_proc(char *page, char **start, off_t off,
 		case BD_PLEXUS: s = "HYSDN Plexus30"; break;
 		default: s = "???"; break;
 	}
-	len += sprintf(page+len, "%-16s %s\n", "type", s);
+	seq_printf(m, "%-16s %s\n", "type", s);
 	if ((s = cinfo->version[VER_DRIVER]) != NULL)
-		len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
+		seq_printf(m, "%-16s %s\n", "ver_driver", s);
 	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
-		len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
+		seq_printf(m, "%-16s %s\n", "ver_cardtype", s);
 	if ((s = cinfo->version[VER_SERIAL]) != NULL)
-		len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
+		seq_printf(m, "%-16s %s\n", "ver_serial", s);
     
-	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
+	seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname);
     
-	if (off+count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	return 0;
+}
+
+static int hycapi_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hycapi_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations hycapi_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= hycapi_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /**************************************************************
 hycapi_load_firmware
 
@@ -774,7 +774,7 @@ hycapi_capi_create(hysdn_card *card)
 		ctrl->load_firmware = hycapi_load_firmware;
 		ctrl->reset_ctr     = hycapi_reset_ctr;
 		ctrl->procinfo      = hycapi_procinfo;
-		ctrl->ctr_read_proc = hycapi_read_proc;
+		ctrl->proc_fops = &hycapi_proc_fops;
 		strcpy(ctrl->name, cinfo->cardname);
 		ctrl->owner = THIS_MODULE;
 
diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h
index 7acb87a44872..d3e5e9da0c82 100644
--- a/include/linux/isdn/capilli.h
+++ b/include/linux/isdn/capilli.h
@@ -50,8 +50,7 @@ struct capi_ctr {
 	u16  (*send_message)(struct capi_ctr *, struct sk_buff *skb);
 	
 	char *(*procinfo)(struct capi_ctr *);
-	int (*ctr_read_proc)(char *page, char **start, off_t off,
-			     int count, int *eof, struct capi_ctr *card);
+	const struct file_operations *proc_fops;
 
 	/* filled in before calling ready callback */
 	u8 manu[CAPI_MANUFACTURER_LEN];		/* CAPI_GET_MANUFACTURER */
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 97f8d68d574d..3487cfe74aec 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -21,7 +21,8 @@
 */
 
 #include <linux/module.h>
-
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -516,33 +517,37 @@ static char *cmtp_procinfo(struct capi_ctr *ctrl)
 	return "CAPI Message Transport Protocol";
 }
 
-static int cmtp_ctr_read_proc(char *page, char **start, off_t off, int count, int *eof, struct capi_ctr *ctrl)
+static int cmtp_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	struct cmtp_session *session = ctrl->driverdata;
 	struct cmtp_application *app;
 	struct list_head *p, *n;
-	int len = 0;
 
-	len += sprintf(page + len, "%s\n\n", cmtp_procinfo(ctrl));
-	len += sprintf(page + len, "addr %s\n", session->name);
-	len += sprintf(page + len, "ctrl %d\n", session->num);
+	seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl));
+	seq_printf(m, "addr %s\n", session->name);
+	seq_printf(m, "ctrl %d\n", session->num);
 
 	list_for_each_safe(p, n, &session->applications) {
 		app = list_entry(p, struct cmtp_application, list);
-		len += sprintf(page + len, "appl %d -> %d\n", app->appl, app->mapping);
+		seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping);
 	}
 
-	if (off + count >= len)
-		*eof = 1;
-
-	if (len < off)
-		return 0;
-
-	*start = page + off;
+	return 0;
+}
 
-	return ((count < len - off) ? count : len - off);
+static int cmtp_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cmtp_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations cmtp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= cmtp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 int cmtp_attach_device(struct cmtp_session *session)
 {
@@ -582,7 +587,7 @@ int cmtp_attach_device(struct cmtp_session *session)
 	session->ctrl.send_message  = cmtp_send_message;
 
 	session->ctrl.procinfo      = cmtp_procinfo;
-	session->ctrl.ctr_read_proc = cmtp_ctr_read_proc;
+	session->ctrl.proc_fops = &cmtp_proc_fops;
 
 	if (attach_capi_ctr(&session->ctrl) < 0) {
 		BT_ERR("Can't attach new controller");
-- 
cgit v1.2.3


From 6d125529c6cbfe570ce3bf9a0728548f087499da Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Dec 2009 06:58:56 -0500
Subject: Fix ACC_MODE() for real

commit 5300990c0370e804e49d9a59d928c5d53fb73487 had stepped on a rather
nasty mess: definitions of ACC_MODE used to be different.  Fixed the
resulting breakage, converting them to variant that takes O_... value;
all callers have that and it actually simplifies life (see tomoyo part
of changes).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               | 2 +-
 include/linux/fs.h       | 2 +-
 security/tomoyo/tomoyo.c | 7 +------
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 1b26b1620664..d930f1856ed2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1620,7 +1620,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 		open_flag |= O_DSYNC;
 
 	if (!acc_mode)
-		acc_mode = MAY_OPEN | ACC_MODE(flag);
+		acc_mode = MAY_OPEN | ACC_MODE(open_flag);
 
 	/* O_TRUNC implies we need access checks for write permissions */
 	if (flag & O_TRUNC)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9147ca88f253..b1bcb275b596 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2463,7 +2463,7 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
 
 #endif /* __KERNEL__ */
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 8a00ade85166..2aceebf5f354 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -80,9 +80,8 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 		return tomoyo_find_next_domain(bprm);
 	/*
 	 * Read permission is checked against interpreters using next domain.
-	 * '1' is the result of open_to_namei_flags(O_RDONLY).
 	 */
-	return tomoyo_check_open_permission(domain, &bprm->file->f_path, 1);
+	return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
 }
 
 static int tomoyo_path_truncate(struct path *path, loff_t length,
@@ -184,10 +183,6 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
 static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
 {
 	int flags = f->f_flags;
-
-	if ((flags + 1) & O_ACCMODE)
-		flags++;
-	flags |= f->f_flags & (O_APPEND | O_TRUNC);
 	/* Don't check read permission here if called from do_execve(). */
 	if (current->in_execve)
 		return 0;
-- 
cgit v1.2.3


From d5f1fb53353edc38da326445267c1df0c9676df2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:53:55 +0800
Subject: lib: Introduce strnstr()

It differs strstr() in that it limits the length to be searched
in the first string.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8743.6030805@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/string.h |  5 ++++-
 lib/string.c           | 27 ++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 651839a2a755..a716ee2a8adb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -72,7 +72,10 @@ static inline __must_check char *strstrip(char *str)
 }
 
 #ifndef __HAVE_ARCH_STRSTR
-extern char * strstr(const char *,const char *);
+extern char * strstr(const char *, const char *);
+#endif
+#ifndef __HAVE_ARCH_STRNSTR
+extern char * strnstr(const char *, const char *, size_t);
 #endif
 #ifndef __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
diff --git a/lib/string.c b/lib/string.c
index 9f75b4ec50b8..a1cdcfcc42d0 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -667,7 +667,7 @@ EXPORT_SYMBOL(memscan);
  */
 char *strstr(const char *s1, const char *s2)
 {
-	int l1, l2;
+	size_t l1, l2;
 
 	l2 = strlen(s2);
 	if (!l2)
@@ -684,6 +684,31 @@ char *strstr(const char *s1, const char *s2)
 EXPORT_SYMBOL(strstr);
 #endif
 
+#ifndef __HAVE_ARCH_STRNSTR
+/**
+ * strnstr - Find the first substring in a length-limited string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ * @len: the maximum number of characters to search
+ */
+char *strnstr(const char *s1, const char *s2, size_t len)
+{
+	size_t l1 = len, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strnstr);
+#endif
+
 #ifndef __HAVE_ARCH_MEMCHR
 /**
  * memchr - Find a character in an area of memory.
-- 
cgit v1.2.3


From ad72c347e56bf3a0231b9d686e17764157d2961c Mon Sep 17 00:00:00 2001
From: Christian Pellegrin <chripell@fsfe.org>
Date: Thu, 14 Jan 2010 07:08:34 +0000
Subject: can: Proper ctrlmode handling for CAN devices

This patch adds error checking of ctrlmode values for CAN devices. As
an example all availabe bits are implemented in the mcp251x driver.

Signed-off-by: Christian Pellegrin <chripell@fsfe.org>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        |  1 +
 drivers/net/can/bfin_can.c        |  1 +
 drivers/net/can/dev.c             |  2 ++
 drivers/net/can/mcp251x.c         | 11 ++++++++++-
 drivers/net/can/mscan/mscan.c     |  1 +
 drivers/net/can/sja1000/sja1000.c |  1 +
 drivers/net/can/ti_hecc.c         |  1 +
 drivers/net/can/usb/ems_usb.c     |  1 +
 include/linux/can/dev.h           |  1 +
 9 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index f7287497ba6e..a2f29a38798a 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -1073,6 +1073,7 @@ static int __init at91_can_probe(struct platform_device *pdev)
 	priv->can.bittiming_const = &at91_bittiming_const;
 	priv->can.do_set_bittiming = at91_set_bittiming;
 	priv->can.do_set_mode = at91_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 	priv->reg_base = addr;
 	priv->dev = dev;
 	priv->clk = clk;
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 7e1926e79e98..bf7f9ba2d903 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -603,6 +603,7 @@ struct net_device *alloc_bfin_candev(void)
 	priv->can.bittiming_const = &bfin_can_bittiming_const;
 	priv->can.do_set_bittiming = bfin_can_set_bittiming;
 	priv->can.do_set_mode = bfin_can_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	return dev;
 }
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index c1bb29f0322b..f08f1202ff00 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -592,6 +592,8 @@ static int can_changelink(struct net_device *dev,
 		if (dev->flags & IFF_UP)
 			return -EBUSY;
 		cm = nla_data(data[IFLA_CAN_CTRLMODE]);
+		if (cm->flags & ~priv->ctrlmode_supported)
+			return -EOPNOTSUPP;
 		priv->ctrlmode &= ~cm->mask;
 		priv->ctrlmode |= cm->flags;
 	}
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index afa2fa45fed9..bbe186b5a0ed 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -539,9 +539,14 @@ static void mcp251x_set_normal_mode(struct spi_device *spi)
 	if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) {
 		/* Put device into loopback mode */
 		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_LOOPBACK);
+	} else if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) {
+		/* Put device into listen-only mode */
+		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_LISTEN_ONLY);
 	} else {
 		/* Put device into normal mode */
-		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_NORMAL);
+		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_NORMAL |
+				  (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT ?
+				   CANCTRL_OSM : 0));
 
 		/* Wait for the device to enter normal mode */
 		timeout = jiffies + HZ;
@@ -948,6 +953,10 @@ static int __devinit mcp251x_can_probe(struct spi_device *spi)
 	priv->can.bittiming_const = &mcp251x_bittiming_const;
 	priv->can.do_set_mode = mcp251x_do_set_mode;
 	priv->can.clock.freq = pdata->oscillator_frequency / 2;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES |
+		CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY;
+	if (pdata->model == CAN_MCP251X_MCP2515)
+		priv->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT;
 	priv->net = net;
 	dev_set_drvdata(&spi->dev, priv);
 
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index 40827c128b65..6b7dd578d417 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -686,6 +686,7 @@ struct net_device *alloc_mscandev(void)
 	priv->can.bittiming_const = &mscan_bittiming_const;
 	priv->can.do_set_bittiming = mscan_do_set_bittiming;
 	priv->can.do_set_mode = mscan_do_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	for (i = 0; i < TX_QUEUE_SIZE; i++) {
 		priv->tx_queue[i].id = i;
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 345304d779b9..ace103a44833 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -567,6 +567,7 @@ struct net_device *alloc_sja1000dev(int sizeof_priv)
 	priv->can.bittiming_const = &sja1000_bittiming_const;
 	priv->can.do_set_bittiming = sja1000_set_bittiming;
 	priv->can.do_set_mode = sja1000_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	if (sizeof_priv)
 		priv->priv = (void *)priv + sizeof(struct sja1000_priv);
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 7d370e32a7a8..8332e242b0be 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -909,6 +909,7 @@ static int ti_hecc_probe(struct platform_device *pdev)
 	priv->can.bittiming_const = &ti_hecc_bittiming_const;
 	priv->can.do_set_mode = ti_hecc_do_set_mode;
 	priv->can.do_get_state = ti_hecc_get_state;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	ndev->irq = irq->start;
 	ndev->flags |= IFF_ECHO;
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index ddb17e256656..bfab283ba9b1 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -1022,6 +1022,7 @@ static int ems_usb_probe(struct usb_interface *intf,
 	dev->can.bittiming_const = &ems_usb_bittiming_const;
 	dev->can.do_set_bittiming = ems_usb_set_bittiming;
 	dev->can.do_set_mode = ems_usb_set_mode;
+	dev->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	netdev->flags |= IFF_ECHO; /* we support local echo */
 
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 7e7c98a3e908..c8c660a79f90 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -38,6 +38,7 @@ struct can_priv {
 
 	enum can_state state;
 	u32 ctrlmode;
+	u32 ctrlmode_supported;
 
 	int restart_ms;
 	struct timer_list restart_timer;
-- 
cgit v1.2.3


From 05c2828c72c4eabf62376adfe27bd24797621f62 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 14 Jan 2010 06:17:09 +0000
Subject: tun: export underlying socket

Tun device looks similar to a packet socket
in that both pass complete frames from/to userspace.

This patch fills in enough fields in the socket underlying tun driver
to support sendmsg/recvmsg operations, and message flags
MSG_TRUNC and MSG_DONTWAIT, and exports access to this socket
to modules.  Regular read/write behaviour is unchanged.

This way, code using raw sockets to inject packets
into a physical device, can support injecting
packets into host network stack almost without modification.

First user of this interface will be vhost virtualization
accelerator.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 101 +++++++++++++++++++++++++++++++++++++++----------
 include/linux/if_tun.h |  14 +++++++
 2 files changed, 96 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2834a01bae24..5adb3d150552 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 	err = 0;
 	tfile->tun = tun;
 	tun->tfile = tfile;
+	tun->socket.file = file;
 	dev_hold(tun->dev);
 	sock_hold(tun->socket.sk);
 	atomic_inc(&tfile->count);
@@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun)
 	/* Detach from net device */
 	netif_tx_lock_bh(tun->dev);
 	tun->tfile = NULL;
+	tun->socket.file = NULL;
 	netif_tx_unlock_bh(tun->dev);
 
 	/* Drop read queue */
@@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Notify and wake up reader process */
 	if (tun->flags & TUN_FASYNC)
 		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
-	wake_up_interruptible(&tun->socket.wait);
+	wake_up_interruptible_poll(&tun->socket.wait, POLLIN |
+				   POLLRDNORM | POLLRDBAND);
 	return NETDEV_TX_OK;
 
 drop:
@@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 	len = min_t(int, skb->len, len);
 
 	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
-	total += len;
+	total += skb->len;
 
 	tun->dev->stats.tx_packets++;
 	tun->dev->stats.tx_bytes += len;
@@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 	return total;
 }
 
-static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
-			    unsigned long count, loff_t pos)
+static ssize_t tun_do_read(struct tun_struct *tun,
+			   struct kiocb *iocb, const struct iovec *iv,
+			   ssize_t len, int noblock)
 {
-	struct file *file = iocb->ki_filp;
-	struct tun_file *tfile = file->private_data;
-	struct tun_struct *tun = __tun_get(tfile);
 	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
-	ssize_t len, ret = 0;
-
-	if (!tun)
-		return -EBADFD;
+	ssize_t ret = 0;
 
 	DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
 
-	len = iov_length(iv, count);
-	if (len < 0) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	add_wait_queue(&tun->socket.wait, &wait);
 	while (len) {
 		current->state = TASK_INTERRUPTIBLE;
 
 		/* Read frames from the queue */
 		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
-			if (file->f_flags & O_NONBLOCK) {
+			if (noblock) {
 				ret = -EAGAIN;
 				break;
 			}
@@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&tun->socket.wait, &wait);
 
+	return ret;
+}
+
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+			    unsigned long count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct tun_file *tfile = file->private_data;
+	struct tun_struct *tun = __tun_get(tfile);
+	ssize_t len, ret;
+
+	if (!tun)
+		return -EBADFD;
+	len = iov_length(iv, count);
+	if (len < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK);
+	ret = min_t(ssize_t, ret, len);
 out:
 	tun_put(tun);
 	return ret;
@@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk)
 		return;
 
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_sync(sk->sk_sleep);
+		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+						POLLWRNORM | POLLWRBAND);
 
 	tun = tun_sk(sk)->tun;
 	kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
@@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk)
 	free_netdev(tun_sk(sk)->tun->dev);
 }
 
+static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t total_len)
+{
+	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+	return tun_get_user(tun, m->msg_iov, total_len,
+			    m->msg_flags & MSG_DONTWAIT);
+}
+
+static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t total_len,
+		       int flags)
+{
+	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+	int ret;
+	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+		return -EINVAL;
+	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
+			  flags & MSG_DONTWAIT);
+	if (ret > total_len) {
+		m->msg_flags |= MSG_TRUNC;
+		ret = flags & MSG_TRUNC ? ret : total_len;
+	}
+	return ret;
+}
+
+/* Ops structure to mimic raw sockets with tun */
+static const struct proto_ops tun_socket_ops = {
+	.sendmsg = tun_sendmsg,
+	.recvmsg = tun_recvmsg,
+};
+
 static struct proto tun_proto = {
 	.name		= "tun",
 	.owner		= THIS_MODULE,
@@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			goto err_free_dev;
 
 		init_waitqueue_head(&tun->socket.wait);
+		tun->socket.ops = &tun_socket_ops;
 		sock_init_data(&tun->socket, sk);
 		sk->sk_write_space = tun_sock_write_space;
 		sk->sk_sndbuf = INT_MAX;
@@ -1525,6 +1571,23 @@ static void tun_cleanup(void)
 	rtnl_link_unregister(&tun_link_ops);
 }
 
+/* Get an underlying socket object from tun file.  Returns error unless file is
+ * attached to a device.  The returned object works like a packet socket, it
+ * can be used for sock_sendmsg/sock_recvmsg.  The caller is responsible for
+ * holding a reference to the file for as long as the socket is in use. */
+struct socket *tun_get_socket(struct file *file)
+{
+	struct tun_struct *tun;
+	if (file->f_op != &tun_fops)
+		return ERR_PTR(-EINVAL);
+	tun = tun_get(file);
+	if (!tun)
+		return ERR_PTR(-EBADFD);
+	tun_put(tun);
+	return &tun->socket;
+}
+EXPORT_SYMBOL_GPL(tun_get_socket);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3f5fd523b49d..404abe00162c 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -86,4 +86,18 @@ struct tun_filter {
 	__u8   addr[0][ETH_ALEN];
 };
 
+#ifdef __KERNEL__
+#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
+struct socket *tun_get_socket(struct file *);
+#else
+#include <linux/err.h>
+#include <linux/errno.h>
+struct file;
+struct socket;
+static inline struct socket *tun_get_socket(struct file *f)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif /* CONFIG_TUN */
+#endif /* __KERNEL__ */
 #endif /* __IF_TUN_H */
-- 
cgit v1.2.3


From 3a4d5c94e959359ece6d6b55045c3f046677f55c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 14 Jan 2010 06:17:27 +0000
Subject: vhost_net: a kernel-level virtio server

What it is: vhost net is a character device that can be used to reduce
the number of system calls involved in virtio networking.
Existing virtio net code is used in the guest without modification.

There's similarity with vringfd, with some differences and reduced scope
- uses eventfd for signalling
- structures can be moved around in memory at any time (good for
  migration, bug work-arounds in userspace)
- write logging is supported (good for migration)
- support memory table and not just an offset (needed for kvm)

common virtio related code has been put in a separate file vhost.c and
can be made into a separate module if/when more backends appear.  I used
Rusty's lguest.c as the source for developing this part : this supplied
me with witty comments I wouldn't be able to write myself.

What it is not: vhost net is not a bus, and not a generic new system
call. No assumptions are made on how guest performs hypercalls.
Userspace hypervisors are supported as well as kvm.

How it works: Basically, we connect virtio frontend (configured by
userspace) to a backend. The backend could be a network device, or a tap
device.  Backend is also configured by userspace, including vlan/mac
etc.

Status: This works for me, and I haven't see any crashes.
Compared to userspace, people reported improved latency (as I save up to
4 system calls per packet), as well as better bandwidth and CPU
utilization.

Features that I plan to look at in the future:
- mergeable buffers
- zero copy
- scalability tuning: figure out the best threading model to use

Note on RCU usage (this is also documented in vhost.h, near
private_pointer which is the value protected by this variant of RCU):
what is happening is that the rcu_dereference() is being used in a
workqueue item.  The role of rcu_read_lock() is taken on by the start of
execution of the workqueue item, of rcu_read_unlock() by the end of
execution of the workqueue item, and of synchronize_rcu() by
flush_workqueue()/flush_work(). In the future we might need to apply
some gcc attribute or sparse annotation to the function passed to
INIT_WORK(). Paul's ack below is for this RCU usage.

(Includes fixes by Alan Cox <alan@linux.intel.com>,
David L Stevens <dlstevens@us.ibm.com>,
Chris Wright <chrisw@redhat.com>)

Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                |    9 +
 arch/ia64/kvm/Kconfig      |    1 +
 arch/powerpc/kvm/Kconfig   |    1 +
 arch/s390/kvm/Kconfig      |    1 +
 arch/x86/kvm/Kconfig       |    1 +
 drivers/Makefile           |    1 +
 drivers/vhost/Kconfig      |   11 +
 drivers/vhost/Makefile     |    2 +
 drivers/vhost/net.c        |  661 ++++++++++++++++++++++++++
 drivers/vhost/vhost.c      | 1098 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/vhost/vhost.h      |  161 +++++++
 include/linux/Kbuild       |    1 +
 include/linux/miscdevice.h |    1 +
 include/linux/vhost.h      |  130 ++++++
 14 files changed, 2079 insertions(+)
 create mode 100644 drivers/vhost/Kconfig
 create mode 100644 drivers/vhost/Makefile
 create mode 100644 drivers/vhost/net.c
 create mode 100644 drivers/vhost/vhost.c
 create mode 100644 drivers/vhost/vhost.h
 create mode 100644 include/linux/vhost.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 745643b8c344..337dffbe9a47 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5803,6 +5803,15 @@ S:	Maintained
 F:	Documentation/filesystems/vfat.txt
 F:	fs/fat/
 
+VIRTIO HOST (VHOST)
+M:	"Michael S. Tsirkin" <mst@redhat.com>
+L:	kvm@vger.kernel.org
+L:	virtualization@lists.osdl.org
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/vhost/
+F:	include/linux/vhost.h
+
 VIA RHINE NETWORK DRIVER
 M:	Roger Luethi <rl@hellgate.ch>
 S:	Maintained
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index ef3e7be29caf..01c75797119c 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -47,6 +47,7 @@ config KVM_INTEL
 	  Provides support for KVM on Itanium 2 processors equipped with the VT
 	  extensions.
 
+source drivers/vhost/Kconfig
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 07703f72330e..e28841fbfb8d 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -75,6 +75,7 @@ config KVM_E500
 
 	  If unsure, say N.
 
+source drivers/vhost/Kconfig
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 6ee55ae84ce2..a7251580891c 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -35,6 +35,7 @@ config KVM
 
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
+source drivers/vhost/Kconfig
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 4cd498332466..3c4d0109ad20 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -65,6 +65,7 @@ config KVM_AMD
 
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
+source drivers/vhost/Kconfig
 source drivers/lguest/Kconfig
 source drivers/virtio/Kconfig
 
diff --git a/drivers/Makefile b/drivers/Makefile
index 6ee53c7a57a1..81e36596b1e9 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_HID)		+= hid/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_OF)		+= of/
 obj-$(CONFIG_SSB)		+= ssb/
+obj-$(CONFIG_VHOST_NET)		+= vhost/
 obj-$(CONFIG_VIRTIO)		+= virtio/
 obj-$(CONFIG_VLYNQ)		+= vlynq/
 obj-$(CONFIG_STAGING)		+= staging/
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
new file mode 100644
index 000000000000..9f409f447aea
--- /dev/null
+++ b/drivers/vhost/Kconfig
@@ -0,0 +1,11 @@
+config VHOST_NET
+	tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)"
+	depends on NET && EVENTFD && EXPERIMENTAL
+	---help---
+	  This kernel module can be loaded in host kernel to accelerate
+	  guest networking with virtio_net. Not to be confused with virtio_net
+	  module itself which needs to be loaded in guest kernel.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called vhost_net.
+
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
new file mode 100644
index 000000000000..72dd02050bb9
--- /dev/null
+++ b/drivers/vhost/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_VHOST_NET) += vhost_net.o
+vhost_net-y := vhost.o net.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
new file mode 100644
index 000000000000..4c8928319e1d
--- /dev/null
+++ b/drivers/vhost/net.c
@@ -0,0 +1,661 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * virtio-net server in host kernel.
+ */
+
+#include <linux/compat.h>
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/mmu_context.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/file.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <net/sock.h>
+
+#include "vhost.h"
+
+/* Max number of bytes transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others. */
+#define VHOST_NET_WEIGHT 0x80000
+
+enum {
+	VHOST_NET_VQ_RX = 0,
+	VHOST_NET_VQ_TX = 1,
+	VHOST_NET_VQ_MAX = 2,
+};
+
+enum vhost_net_poll_state {
+	VHOST_NET_POLL_DISABLED = 0,
+	VHOST_NET_POLL_STARTED = 1,
+	VHOST_NET_POLL_STOPPED = 2,
+};
+
+struct vhost_net {
+	struct vhost_dev dev;
+	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
+	struct vhost_poll poll[VHOST_NET_VQ_MAX];
+	/* Tells us whether we are polling a socket for TX.
+	 * We only do this when socket buffer fills up.
+	 * Protected by tx vq lock. */
+	enum vhost_net_poll_state tx_poll_state;
+};
+
+/* Pop first len bytes from iovec. Return number of segments used. */
+static int move_iovec_hdr(struct iovec *from, struct iovec *to,
+			  size_t len, int iov_count)
+{
+	int seg = 0;
+	size_t size;
+	while (len && seg < iov_count) {
+		size = min(from->iov_len, len);
+		to->iov_base = from->iov_base;
+		to->iov_len = size;
+		from->iov_len -= size;
+		from->iov_base += size;
+		len -= size;
+		++from;
+		++to;
+		++seg;
+	}
+	return seg;
+}
+
+/* Caller must have TX VQ lock */
+static void tx_poll_stop(struct vhost_net *net)
+{
+	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
+		return;
+	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
+	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
+}
+
+/* Caller must have TX VQ lock */
+static void tx_poll_start(struct vhost_net *net, struct socket *sock)
+{
+	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
+		return;
+	vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
+	net->tx_poll_state = VHOST_NET_POLL_STARTED;
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_tx(struct vhost_net *net)
+{
+	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
+	unsigned head, out, in, s;
+	struct msghdr msg = {
+		.msg_name = NULL,
+		.msg_namelen = 0,
+		.msg_control = NULL,
+		.msg_controllen = 0,
+		.msg_iov = vq->iov,
+		.msg_flags = MSG_DONTWAIT,
+	};
+	size_t len, total_len = 0;
+	int err, wmem;
+	size_t hdr_size;
+	struct socket *sock = rcu_dereference(vq->private_data);
+	if (!sock)
+		return;
+
+	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
+	if (wmem >= sock->sk->sk_sndbuf)
+		return;
+
+	use_mm(net->dev.mm);
+	mutex_lock(&vq->mutex);
+	vhost_disable_notify(vq);
+
+	if (wmem < sock->sk->sk_sndbuf * 2)
+		tx_poll_stop(net);
+	hdr_size = vq->hdr_size;
+
+	for (;;) {
+		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+					 ARRAY_SIZE(vq->iov),
+					 &out, &in,
+					 NULL, NULL);
+		/* Nothing new?  Wait for eventfd to tell us they refilled. */
+		if (head == vq->num) {
+			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
+			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
+				tx_poll_start(net, sock);
+				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+				break;
+			}
+			if (unlikely(vhost_enable_notify(vq))) {
+				vhost_disable_notify(vq);
+				continue;
+			}
+			break;
+		}
+		if (in) {
+			vq_err(vq, "Unexpected descriptor format for TX: "
+			       "out %d, int %d\n", out, in);
+			break;
+		}
+		/* Skip header. TODO: support TSO. */
+		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
+		msg.msg_iovlen = out;
+		len = iov_length(vq->iov, out);
+		/* Sanity check */
+		if (!len) {
+			vq_err(vq, "Unexpected header len for TX: "
+			       "%zd expected %zd\n",
+			       iov_length(vq->hdr, s), hdr_size);
+			break;
+		}
+		/* TODO: Check specific error and bomb out unless ENOBUFS? */
+		err = sock->ops->sendmsg(NULL, sock, &msg, len);
+		if (unlikely(err < 0)) {
+			vhost_discard_vq_desc(vq);
+			tx_poll_start(net, sock);
+			break;
+		}
+		if (err != len)
+			pr_err("Truncated TX packet: "
+			       " len %d != %zd\n", err, len);
+		vhost_add_used_and_signal(&net->dev, vq, head, 0);
+		total_len += len;
+		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+			vhost_poll_queue(&vq->poll);
+			break;
+		}
+	}
+
+	mutex_unlock(&vq->mutex);
+	unuse_mm(net->dev.mm);
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_rx(struct vhost_net *net)
+{
+	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
+	unsigned head, out, in, log, s;
+	struct vhost_log *vq_log;
+	struct msghdr msg = {
+		.msg_name = NULL,
+		.msg_namelen = 0,
+		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
+		.msg_controllen = 0,
+		.msg_iov = vq->iov,
+		.msg_flags = MSG_DONTWAIT,
+	};
+
+	struct virtio_net_hdr hdr = {
+		.flags = 0,
+		.gso_type = VIRTIO_NET_HDR_GSO_NONE
+	};
+
+	size_t len, total_len = 0;
+	int err;
+	size_t hdr_size;
+	struct socket *sock = rcu_dereference(vq->private_data);
+	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
+		return;
+
+	use_mm(net->dev.mm);
+	mutex_lock(&vq->mutex);
+	vhost_disable_notify(vq);
+	hdr_size = vq->hdr_size;
+
+	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
+		vq->log : NULL;
+
+	for (;;) {
+		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+					 ARRAY_SIZE(vq->iov),
+					 &out, &in,
+					 vq_log, &log);
+		/* OK, now we need to know about added descriptors. */
+		if (head == vq->num) {
+			if (unlikely(vhost_enable_notify(vq))) {
+				/* They have slipped one in as we were
+				 * doing that: check again. */
+				vhost_disable_notify(vq);
+				continue;
+			}
+			/* Nothing new?  Wait for eventfd to tell us
+			 * they refilled. */
+			break;
+		}
+		/* We don't need to be notified again. */
+		if (out) {
+			vq_err(vq, "Unexpected descriptor format for RX: "
+			       "out %d, int %d\n",
+			       out, in);
+			break;
+		}
+		/* Skip header. TODO: support TSO/mergeable rx buffers. */
+		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
+		msg.msg_iovlen = in;
+		len = iov_length(vq->iov, in);
+		/* Sanity check */
+		if (!len) {
+			vq_err(vq, "Unexpected header len for RX: "
+			       "%zd expected %zd\n",
+			       iov_length(vq->hdr, s), hdr_size);
+			break;
+		}
+		err = sock->ops->recvmsg(NULL, sock, &msg,
+					 len, MSG_DONTWAIT | MSG_TRUNC);
+		/* TODO: Check specific error and bomb out unless EAGAIN? */
+		if (err < 0) {
+			vhost_discard_vq_desc(vq);
+			break;
+		}
+		/* TODO: Should check and handle checksum. */
+		if (err > len) {
+			pr_err("Discarded truncated rx packet: "
+			       " len %d > %zd\n", err, len);
+			vhost_discard_vq_desc(vq);
+			continue;
+		}
+		len = err;
+		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
+		if (err) {
+			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
+			       vq->iov->iov_base, err);
+			break;
+		}
+		len += hdr_size;
+		vhost_add_used_and_signal(&net->dev, vq, head, len);
+		if (unlikely(vq_log))
+			vhost_log_write(vq, vq_log, log, len);
+		total_len += len;
+		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+			vhost_poll_queue(&vq->poll);
+			break;
+		}
+	}
+
+	mutex_unlock(&vq->mutex);
+	unuse_mm(net->dev.mm);
+}
+
+static void handle_tx_kick(struct work_struct *work)
+{
+	struct vhost_virtqueue *vq;
+	struct vhost_net *net;
+	vq = container_of(work, struct vhost_virtqueue, poll.work);
+	net = container_of(vq->dev, struct vhost_net, dev);
+	handle_tx(net);
+}
+
+static void handle_rx_kick(struct work_struct *work)
+{
+	struct vhost_virtqueue *vq;
+	struct vhost_net *net;
+	vq = container_of(work, struct vhost_virtqueue, poll.work);
+	net = container_of(vq->dev, struct vhost_net, dev);
+	handle_rx(net);
+}
+
+static void handle_tx_net(struct work_struct *work)
+{
+	struct vhost_net *net;
+	net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work);
+	handle_tx(net);
+}
+
+static void handle_rx_net(struct work_struct *work)
+{
+	struct vhost_net *net;
+	net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work);
+	handle_rx(net);
+}
+
+static int vhost_net_open(struct inode *inode, struct file *f)
+{
+	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
+	int r;
+	if (!n)
+		return -ENOMEM;
+	n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
+	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
+	r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX);
+	if (r < 0) {
+		kfree(n);
+		return r;
+	}
+
+	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT);
+	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN);
+	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+
+	f->private_data = n;
+
+	return 0;
+}
+
+static void vhost_net_disable_vq(struct vhost_net *n,
+				 struct vhost_virtqueue *vq)
+{
+	if (!vq->private_data)
+		return;
+	if (vq == n->vqs + VHOST_NET_VQ_TX) {
+		tx_poll_stop(n);
+		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+	} else
+		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
+}
+
+static void vhost_net_enable_vq(struct vhost_net *n,
+				struct vhost_virtqueue *vq)
+{
+	struct socket *sock = vq->private_data;
+	if (!sock)
+		return;
+	if (vq == n->vqs + VHOST_NET_VQ_TX) {
+		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
+		tx_poll_start(n, sock);
+	} else
+		vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
+}
+
+static struct socket *vhost_net_stop_vq(struct vhost_net *n,
+					struct vhost_virtqueue *vq)
+{
+	struct socket *sock;
+
+	mutex_lock(&vq->mutex);
+	sock = vq->private_data;
+	vhost_net_disable_vq(n, vq);
+	rcu_assign_pointer(vq->private_data, NULL);
+	mutex_unlock(&vq->mutex);
+	return sock;
+}
+
+static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
+			   struct socket **rx_sock)
+{
+	*tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
+	*rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
+}
+
+static void vhost_net_flush_vq(struct vhost_net *n, int index)
+{
+	vhost_poll_flush(n->poll + index);
+	vhost_poll_flush(&n->dev.vqs[index].poll);
+}
+
+static void vhost_net_flush(struct vhost_net *n)
+{
+	vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
+	vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
+}
+
+static int vhost_net_release(struct inode *inode, struct file *f)
+{
+	struct vhost_net *n = f->private_data;
+	struct socket *tx_sock;
+	struct socket *rx_sock;
+
+	vhost_net_stop(n, &tx_sock, &rx_sock);
+	vhost_net_flush(n);
+	vhost_dev_cleanup(&n->dev);
+	if (tx_sock)
+		fput(tx_sock->file);
+	if (rx_sock)
+		fput(rx_sock->file);
+	/* We do an extra flush before freeing memory,
+	 * since jobs can re-queue themselves. */
+	vhost_net_flush(n);
+	kfree(n);
+	return 0;
+}
+
+static struct socket *get_raw_socket(int fd)
+{
+	struct {
+		struct sockaddr_ll sa;
+		char  buf[MAX_ADDR_LEN];
+	} uaddr;
+	int uaddr_len = sizeof uaddr, r;
+	struct socket *sock = sockfd_lookup(fd, &r);
+	if (!sock)
+		return ERR_PTR(-ENOTSOCK);
+
+	/* Parameter checking */
+	if (sock->sk->sk_type != SOCK_RAW) {
+		r = -ESOCKTNOSUPPORT;
+		goto err;
+	}
+
+	r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
+			       &uaddr_len, 0);
+	if (r)
+		goto err;
+
+	if (uaddr.sa.sll_family != AF_PACKET) {
+		r = -EPFNOSUPPORT;
+		goto err;
+	}
+	return sock;
+err:
+	fput(sock->file);
+	return ERR_PTR(r);
+}
+
+static struct socket *get_tun_socket(int fd)
+{
+	struct file *file = fget(fd);
+	struct socket *sock;
+	if (!file)
+		return ERR_PTR(-EBADF);
+	sock = tun_get_socket(file);
+	if (IS_ERR(sock))
+		fput(file);
+	return sock;
+}
+
+static struct socket *get_socket(int fd)
+{
+	struct socket *sock;
+	/* special case to disable backend */
+	if (fd == -1)
+		return NULL;
+	sock = get_raw_socket(fd);
+	if (!IS_ERR(sock))
+		return sock;
+	sock = get_tun_socket(fd);
+	if (!IS_ERR(sock))
+		return sock;
+	return ERR_PTR(-ENOTSOCK);
+}
+
+static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
+{
+	struct socket *sock, *oldsock;
+	struct vhost_virtqueue *vq;
+	int r;
+
+	mutex_lock(&n->dev.mutex);
+	r = vhost_dev_check_owner(&n->dev);
+	if (r)
+		goto err;
+
+	if (index >= VHOST_NET_VQ_MAX) {
+		r = -ENOBUFS;
+		goto err;
+	}
+	vq = n->vqs + index;
+	mutex_lock(&vq->mutex);
+
+	/* Verify that ring has been setup correctly. */
+	if (!vhost_vq_access_ok(vq)) {
+		r = -EFAULT;
+		goto err;
+	}
+	sock = get_socket(fd);
+	if (IS_ERR(sock)) {
+		r = PTR_ERR(sock);
+		goto err;
+	}
+
+	/* start polling new socket */
+	oldsock = vq->private_data;
+	if (sock == oldsock)
+		goto done;
+
+	vhost_net_disable_vq(n, vq);
+	rcu_assign_pointer(vq->private_data, sock);
+	vhost_net_enable_vq(n, vq);
+	mutex_unlock(&vq->mutex);
+done:
+	if (oldsock) {
+		vhost_net_flush_vq(n, index);
+		fput(oldsock->file);
+	}
+err:
+	mutex_unlock(&n->dev.mutex);
+	return r;
+}
+
+static long vhost_net_reset_owner(struct vhost_net *n)
+{
+	struct socket *tx_sock = NULL;
+	struct socket *rx_sock = NULL;
+	long err;
+	mutex_lock(&n->dev.mutex);
+	err = vhost_dev_check_owner(&n->dev);
+	if (err)
+		goto done;
+	vhost_net_stop(n, &tx_sock, &rx_sock);
+	vhost_net_flush(n);
+	err = vhost_dev_reset_owner(&n->dev);
+done:
+	mutex_unlock(&n->dev.mutex);
+	if (tx_sock)
+		fput(tx_sock->file);
+	if (rx_sock)
+		fput(rx_sock->file);
+	return err;
+}
+
+static int vhost_net_set_features(struct vhost_net *n, u64 features)
+{
+	size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ?
+		sizeof(struct virtio_net_hdr) : 0;
+	int i;
+	mutex_lock(&n->dev.mutex);
+	if ((features & (1 << VHOST_F_LOG_ALL)) &&
+	    !vhost_log_access_ok(&n->dev)) {
+		mutex_unlock(&n->dev.mutex);
+		return -EFAULT;
+	}
+	n->dev.acked_features = features;
+	smp_wmb();
+	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+		mutex_lock(&n->vqs[i].mutex);
+		n->vqs[i].hdr_size = hdr_size;
+		mutex_unlock(&n->vqs[i].mutex);
+	}
+	vhost_net_flush(n);
+	mutex_unlock(&n->dev.mutex);
+	return 0;
+}
+
+static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
+			    unsigned long arg)
+{
+	struct vhost_net *n = f->private_data;
+	void __user *argp = (void __user *)arg;
+	u64 __user *featurep = argp;
+	struct vhost_vring_file backend;
+	u64 features;
+	int r;
+	switch (ioctl) {
+	case VHOST_NET_SET_BACKEND:
+		r = copy_from_user(&backend, argp, sizeof backend);
+		if (r < 0)
+			return r;
+		return vhost_net_set_backend(n, backend.index, backend.fd);
+	case VHOST_GET_FEATURES:
+		features = VHOST_FEATURES;
+		return copy_to_user(featurep, &features, sizeof features);
+	case VHOST_SET_FEATURES:
+		r = copy_from_user(&features, featurep, sizeof features);
+		if (r < 0)
+			return r;
+		if (features & ~VHOST_FEATURES)
+			return -EOPNOTSUPP;
+		return vhost_net_set_features(n, features);
+	case VHOST_RESET_OWNER:
+		return vhost_net_reset_owner(n);
+	default:
+		mutex_lock(&n->dev.mutex);
+		r = vhost_dev_ioctl(&n->dev, ioctl, arg);
+		vhost_net_flush(n);
+		mutex_unlock(&n->dev.mutex);
+		return r;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
+				   unsigned long arg)
+{
+	return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+const static struct file_operations vhost_net_fops = {
+	.owner          = THIS_MODULE,
+	.release        = vhost_net_release,
+	.unlocked_ioctl = vhost_net_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl   = vhost_net_compat_ioctl,
+#endif
+	.open           = vhost_net_open,
+};
+
+static struct miscdevice vhost_net_misc = {
+	VHOST_NET_MINOR,
+	"vhost-net",
+	&vhost_net_fops,
+};
+
+int vhost_net_init(void)
+{
+	int r = vhost_init();
+	if (r)
+		goto err_init;
+	r = misc_register(&vhost_net_misc);
+	if (r)
+		goto err_reg;
+	return 0;
+err_reg:
+	vhost_cleanup();
+err_init:
+	return r;
+
+}
+module_init(vhost_net_init);
+
+void vhost_net_exit(void)
+{
+	misc_deregister(&vhost_net_misc);
+	vhost_cleanup();
+}
+module_exit(vhost_net_exit);
+
+MODULE_VERSION("0.0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Michael S. Tsirkin");
+MODULE_DESCRIPTION("Host kernel accelerator for virtio net");
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
new file mode 100644
index 000000000000..c8c25dbc5857
--- /dev/null
+++ b/drivers/vhost/vhost.c
@@ -0,0 +1,1098 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Inspiration, some code, and most witty comments come from
+ * Documentation/lguest/lguest.c, by Rusty Russell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Generic code for virtio server in host kernel.
+ */
+
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+
+#include <net/sock.h>
+
+#include "vhost.h"
+
+enum {
+	VHOST_MEMORY_MAX_NREGIONS = 64,
+	VHOST_MEMORY_F_LOG = 0x1,
+};
+
+static struct workqueue_struct *vhost_workqueue;
+
+static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
+			    poll_table *pt)
+{
+	struct vhost_poll *poll;
+	poll = container_of(pt, struct vhost_poll, table);
+
+	poll->wqh = wqh;
+	add_wait_queue(wqh, &poll->wait);
+}
+
+static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
+			     void *key)
+{
+	struct vhost_poll *poll;
+	poll = container_of(wait, struct vhost_poll, wait);
+	if (!((unsigned long)key & poll->mask))
+		return 0;
+
+	queue_work(vhost_workqueue, &poll->work);
+	return 0;
+}
+
+/* Init poll structure */
+void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
+		     unsigned long mask)
+{
+	INIT_WORK(&poll->work, func);
+	init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
+	init_poll_funcptr(&poll->table, vhost_poll_func);
+	poll->mask = mask;
+}
+
+/* Start polling a file. We add ourselves to file's wait queue. The caller must
+ * keep a reference to a file until after vhost_poll_stop is called. */
+void vhost_poll_start(struct vhost_poll *poll, struct file *file)
+{
+	unsigned long mask;
+	mask = file->f_op->poll(file, &poll->table);
+	if (mask)
+		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
+}
+
+/* Stop polling a file. After this function returns, it becomes safe to drop the
+ * file reference. You must also flush afterwards. */
+void vhost_poll_stop(struct vhost_poll *poll)
+{
+	remove_wait_queue(poll->wqh, &poll->wait);
+}
+
+/* Flush any work that has been scheduled. When calling this, don't hold any
+ * locks that are also used by the callback. */
+void vhost_poll_flush(struct vhost_poll *poll)
+{
+	flush_work(&poll->work);
+}
+
+void vhost_poll_queue(struct vhost_poll *poll)
+{
+	queue_work(vhost_workqueue, &poll->work);
+}
+
+static void vhost_vq_reset(struct vhost_dev *dev,
+			   struct vhost_virtqueue *vq)
+{
+	vq->num = 1;
+	vq->desc = NULL;
+	vq->avail = NULL;
+	vq->used = NULL;
+	vq->last_avail_idx = 0;
+	vq->avail_idx = 0;
+	vq->last_used_idx = 0;
+	vq->used_flags = 0;
+	vq->used_flags = 0;
+	vq->log_used = false;
+	vq->log_addr = -1ull;
+	vq->hdr_size = 0;
+	vq->private_data = NULL;
+	vq->log_base = NULL;
+	vq->error_ctx = NULL;
+	vq->error = NULL;
+	vq->kick = NULL;
+	vq->call_ctx = NULL;
+	vq->call = NULL;
+}
+
+long vhost_dev_init(struct vhost_dev *dev,
+		    struct vhost_virtqueue *vqs, int nvqs)
+{
+	int i;
+	dev->vqs = vqs;
+	dev->nvqs = nvqs;
+	mutex_init(&dev->mutex);
+	dev->log_ctx = NULL;
+	dev->log_file = NULL;
+	dev->memory = NULL;
+	dev->mm = NULL;
+
+	for (i = 0; i < dev->nvqs; ++i) {
+		dev->vqs[i].dev = dev;
+		mutex_init(&dev->vqs[i].mutex);
+		vhost_vq_reset(dev, dev->vqs + i);
+		if (dev->vqs[i].handle_kick)
+			vhost_poll_init(&dev->vqs[i].poll,
+					dev->vqs[i].handle_kick,
+					POLLIN);
+	}
+	return 0;
+}
+
+/* Caller should have device mutex */
+long vhost_dev_check_owner(struct vhost_dev *dev)
+{
+	/* Are you the owner? If not, I don't think you mean to do that */
+	return dev->mm == current->mm ? 0 : -EPERM;
+}
+
+/* Caller should have device mutex */
+static long vhost_dev_set_owner(struct vhost_dev *dev)
+{
+	/* Is there an owner already? */
+	if (dev->mm)
+		return -EBUSY;
+	/* No owner, become one */
+	dev->mm = get_task_mm(current);
+	return 0;
+}
+
+/* Caller should have device mutex */
+long vhost_dev_reset_owner(struct vhost_dev *dev)
+{
+	struct vhost_memory *memory;
+
+	/* Restore memory to default empty mapping. */
+	memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
+	if (!memory)
+		return -ENOMEM;
+
+	vhost_dev_cleanup(dev);
+
+	memory->nregions = 0;
+	dev->memory = memory;
+	return 0;
+}
+
+/* Caller should have device mutex */
+void vhost_dev_cleanup(struct vhost_dev *dev)
+{
+	int i;
+	for (i = 0; i < dev->nvqs; ++i) {
+		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
+			vhost_poll_stop(&dev->vqs[i].poll);
+			vhost_poll_flush(&dev->vqs[i].poll);
+		}
+		if (dev->vqs[i].error_ctx)
+			eventfd_ctx_put(dev->vqs[i].error_ctx);
+		if (dev->vqs[i].error)
+			fput(dev->vqs[i].error);
+		if (dev->vqs[i].kick)
+			fput(dev->vqs[i].kick);
+		if (dev->vqs[i].call_ctx)
+			eventfd_ctx_put(dev->vqs[i].call_ctx);
+		if (dev->vqs[i].call)
+			fput(dev->vqs[i].call);
+		vhost_vq_reset(dev, dev->vqs + i);
+	}
+	if (dev->log_ctx)
+		eventfd_ctx_put(dev->log_ctx);
+	dev->log_ctx = NULL;
+	if (dev->log_file)
+		fput(dev->log_file);
+	dev->log_file = NULL;
+	/* No one will access memory at this point */
+	kfree(dev->memory);
+	dev->memory = NULL;
+	if (dev->mm)
+		mmput(dev->mm);
+	dev->mm = NULL;
+}
+
+static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
+{
+	u64 a = addr / VHOST_PAGE_SIZE / 8;
+	/* Make sure 64 bit math will not overflow. */
+	if (a > ULONG_MAX - (unsigned long)log_base ||
+	    a + (unsigned long)log_base > ULONG_MAX)
+		return -EFAULT;
+
+	return access_ok(VERIFY_WRITE, log_base + a,
+			 (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
+}
+
+/* Caller should have vq mutex and device mutex. */
+static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem,
+			       int log_all)
+{
+	int i;
+	for (i = 0; i < mem->nregions; ++i) {
+		struct vhost_memory_region *m = mem->regions + i;
+		unsigned long a = m->userspace_addr;
+		if (m->memory_size > ULONG_MAX)
+			return 0;
+		else if (!access_ok(VERIFY_WRITE, (void __user *)a,
+				    m->memory_size))
+			return 0;
+		else if (log_all && !log_access_ok(log_base,
+						   m->guest_phys_addr,
+						   m->memory_size))
+			return 0;
+	}
+	return 1;
+}
+
+/* Can we switch to this memory table? */
+/* Caller should have device mutex but not vq mutex */
+static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
+			    int log_all)
+{
+	int i;
+	for (i = 0; i < d->nvqs; ++i) {
+		int ok;
+		mutex_lock(&d->vqs[i].mutex);
+		/* If ring is inactive, will check when it's enabled. */
+		if (d->vqs[i].private_data)
+			ok = vq_memory_access_ok(d->vqs[i].log_base, mem,
+						 log_all);
+		else
+			ok = 1;
+		mutex_unlock(&d->vqs[i].mutex);
+		if (!ok)
+			return 0;
+	}
+	return 1;
+}
+
+static int vq_access_ok(unsigned int num,
+			struct vring_desc __user *desc,
+			struct vring_avail __user *avail,
+			struct vring_used __user *used)
+{
+	return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
+	       access_ok(VERIFY_READ, avail,
+			 sizeof *avail + num * sizeof *avail->ring) &&
+	       access_ok(VERIFY_WRITE, used,
+			sizeof *used + num * sizeof *used->ring);
+}
+
+/* Can we log writes? */
+/* Caller should have device mutex but not vq mutex */
+int vhost_log_access_ok(struct vhost_dev *dev)
+{
+	return memory_access_ok(dev, dev->memory, 1);
+}
+
+/* Verify access for write logging. */
+/* Caller should have vq mutex and device mutex */
+static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
+{
+	return vq_memory_access_ok(log_base, vq->dev->memory,
+			    vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
+		(!vq->log_used || log_access_ok(log_base, vq->log_addr,
+					sizeof *vq->used +
+					vq->num * sizeof *vq->used->ring));
+}
+
+/* Can we start vq? */
+/* Caller should have vq mutex and device mutex */
+int vhost_vq_access_ok(struct vhost_virtqueue *vq)
+{
+	return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) &&
+		vq_log_access_ok(vq, vq->log_base);
+}
+
+static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
+{
+	struct vhost_memory mem, *newmem, *oldmem;
+	unsigned long size = offsetof(struct vhost_memory, regions);
+	long r;
+	r = copy_from_user(&mem, m, size);
+	if (r)
+		return r;
+	if (mem.padding)
+		return -EOPNOTSUPP;
+	if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS)
+		return -E2BIG;
+	newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL);
+	if (!newmem)
+		return -ENOMEM;
+
+	memcpy(newmem, &mem, size);
+	r = copy_from_user(newmem->regions, m->regions,
+			   mem.nregions * sizeof *m->regions);
+	if (r) {
+		kfree(newmem);
+		return r;
+	}
+
+	if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL)))
+		return -EFAULT;
+	oldmem = d->memory;
+	rcu_assign_pointer(d->memory, newmem);
+	synchronize_rcu();
+	kfree(oldmem);
+	return 0;
+}
+
+static int init_used(struct vhost_virtqueue *vq,
+		     struct vring_used __user *used)
+{
+	int r = put_user(vq->used_flags, &used->flags);
+	if (r)
+		return r;
+	return get_user(vq->last_used_idx, &used->idx);
+}
+
+static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
+{
+	struct file *eventfp, *filep = NULL,
+		    *pollstart = NULL, *pollstop = NULL;
+	struct eventfd_ctx *ctx = NULL;
+	u32 __user *idxp = argp;
+	struct vhost_virtqueue *vq;
+	struct vhost_vring_state s;
+	struct vhost_vring_file f;
+	struct vhost_vring_addr a;
+	u32 idx;
+	long r;
+
+	r = get_user(idx, idxp);
+	if (r < 0)
+		return r;
+	if (idx > d->nvqs)
+		return -ENOBUFS;
+
+	vq = d->vqs + idx;
+
+	mutex_lock(&vq->mutex);
+
+	switch (ioctl) {
+	case VHOST_SET_VRING_NUM:
+		/* Resizing ring with an active backend?
+		 * You don't want to do that. */
+		if (vq->private_data) {
+			r = -EBUSY;
+			break;
+		}
+		r = copy_from_user(&s, argp, sizeof s);
+		if (r < 0)
+			break;
+		if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) {
+			r = -EINVAL;
+			break;
+		}
+		vq->num = s.num;
+		break;
+	case VHOST_SET_VRING_BASE:
+		/* Moving base with an active backend?
+		 * You don't want to do that. */
+		if (vq->private_data) {
+			r = -EBUSY;
+			break;
+		}
+		r = copy_from_user(&s, argp, sizeof s);
+		if (r < 0)
+			break;
+		if (s.num > 0xffff) {
+			r = -EINVAL;
+			break;
+		}
+		vq->last_avail_idx = s.num;
+		/* Forget the cached index value. */
+		vq->avail_idx = vq->last_avail_idx;
+		break;
+	case VHOST_GET_VRING_BASE:
+		s.index = idx;
+		s.num = vq->last_avail_idx;
+		r = copy_to_user(argp, &s, sizeof s);
+		break;
+	case VHOST_SET_VRING_ADDR:
+		r = copy_from_user(&a, argp, sizeof a);
+		if (r < 0)
+			break;
+		if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) {
+			r = -EOPNOTSUPP;
+			break;
+		}
+		/* For 32bit, verify that the top 32bits of the user
+		   data are set to zero. */
+		if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
+		    (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
+		    (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) {
+			r = -EFAULT;
+			break;
+		}
+		if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) ||
+		    (a.used_user_addr & (sizeof *vq->used->ring - 1)) ||
+		    (a.log_guest_addr & (sizeof *vq->used->ring - 1))) {
+			r = -EINVAL;
+			break;
+		}
+
+		/* We only verify access here if backend is configured.
+		 * If it is not, we don't as size might not have been setup.
+		 * We will verify when backend is configured. */
+		if (vq->private_data) {
+			if (!vq_access_ok(vq->num,
+				(void __user *)(unsigned long)a.desc_user_addr,
+				(void __user *)(unsigned long)a.avail_user_addr,
+				(void __user *)(unsigned long)a.used_user_addr)) {
+				r = -EINVAL;
+				break;
+			}
+
+			/* Also validate log access for used ring if enabled. */
+			if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) &&
+			    !log_access_ok(vq->log_base, a.log_guest_addr,
+					   sizeof *vq->used +
+					   vq->num * sizeof *vq->used->ring)) {
+				r = -EINVAL;
+				break;
+			}
+		}
+
+		r = init_used(vq, (struct vring_used __user *)(unsigned long)
+			      a.used_user_addr);
+		if (r)
+			break;
+		vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
+		vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
+		vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
+		vq->log_addr = a.log_guest_addr;
+		vq->used = (void __user *)(unsigned long)a.used_user_addr;
+		break;
+	case VHOST_SET_VRING_KICK:
+		r = copy_from_user(&f, argp, sizeof f);
+		if (r < 0)
+			break;
+		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+		if (IS_ERR(eventfp))
+			return PTR_ERR(eventfp);
+		if (eventfp != vq->kick) {
+			pollstop = filep = vq->kick;
+			pollstart = vq->kick = eventfp;
+		} else
+			filep = eventfp;
+		break;
+	case VHOST_SET_VRING_CALL:
+		r = copy_from_user(&f, argp, sizeof f);
+		if (r < 0)
+			break;
+		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+		if (IS_ERR(eventfp))
+			return PTR_ERR(eventfp);
+		if (eventfp != vq->call) {
+			filep = vq->call;
+			ctx = vq->call_ctx;
+			vq->call = eventfp;
+			vq->call_ctx = eventfp ?
+				eventfd_ctx_fileget(eventfp) : NULL;
+		} else
+			filep = eventfp;
+		break;
+	case VHOST_SET_VRING_ERR:
+		r = copy_from_user(&f, argp, sizeof f);
+		if (r < 0)
+			break;
+		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+		if (IS_ERR(eventfp))
+			return PTR_ERR(eventfp);
+		if (eventfp != vq->error) {
+			filep = vq->error;
+			vq->error = eventfp;
+			ctx = vq->error_ctx;
+			vq->error_ctx = eventfp ?
+				eventfd_ctx_fileget(eventfp) : NULL;
+		} else
+			filep = eventfp;
+		break;
+	default:
+		r = -ENOIOCTLCMD;
+	}
+
+	if (pollstop && vq->handle_kick)
+		vhost_poll_stop(&vq->poll);
+
+	if (ctx)
+		eventfd_ctx_put(ctx);
+	if (filep)
+		fput(filep);
+
+	if (pollstart && vq->handle_kick)
+		vhost_poll_start(&vq->poll, vq->kick);
+
+	mutex_unlock(&vq->mutex);
+
+	if (pollstop && vq->handle_kick)
+		vhost_poll_flush(&vq->poll);
+	return r;
+}
+
+/* Caller must have device mutex */
+long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct file *eventfp, *filep = NULL;
+	struct eventfd_ctx *ctx = NULL;
+	u64 p;
+	long r;
+	int i, fd;
+
+	/* If you are not the owner, you can become one */
+	if (ioctl == VHOST_SET_OWNER) {
+		r = vhost_dev_set_owner(d);
+		goto done;
+	}
+
+	/* You must be the owner to do anything else */
+	r = vhost_dev_check_owner(d);
+	if (r)
+		goto done;
+
+	switch (ioctl) {
+	case VHOST_SET_MEM_TABLE:
+		r = vhost_set_memory(d, argp);
+		break;
+	case VHOST_SET_LOG_BASE:
+		r = copy_from_user(&p, argp, sizeof p);
+		if (r < 0)
+			break;
+		if ((u64)(unsigned long)p != p) {
+			r = -EFAULT;
+			break;
+		}
+		for (i = 0; i < d->nvqs; ++i) {
+			struct vhost_virtqueue *vq;
+			void __user *base = (void __user *)(unsigned long)p;
+			vq = d->vqs + i;
+			mutex_lock(&vq->mutex);
+			/* If ring is inactive, will check when it's enabled. */
+			if (vq->private_data && !vq_log_access_ok(vq, base))
+				r = -EFAULT;
+			else
+				vq->log_base = base;
+			mutex_unlock(&vq->mutex);
+		}
+		break;
+	case VHOST_SET_LOG_FD:
+		r = get_user(fd, (int __user *)argp);
+		if (r < 0)
+			break;
+		eventfp = fd == -1 ? NULL : eventfd_fget(fd);
+		if (IS_ERR(eventfp)) {
+			r = PTR_ERR(eventfp);
+			break;
+		}
+		if (eventfp != d->log_file) {
+			filep = d->log_file;
+			ctx = d->log_ctx;
+			d->log_ctx = eventfp ?
+				eventfd_ctx_fileget(eventfp) : NULL;
+		} else
+			filep = eventfp;
+		for (i = 0; i < d->nvqs; ++i) {
+			mutex_lock(&d->vqs[i].mutex);
+			d->vqs[i].log_ctx = d->log_ctx;
+			mutex_unlock(&d->vqs[i].mutex);
+		}
+		if (ctx)
+			eventfd_ctx_put(ctx);
+		if (filep)
+			fput(filep);
+		break;
+	default:
+		r = vhost_set_vring(d, ioctl, argp);
+		break;
+	}
+done:
+	return r;
+}
+
+static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
+						     __u64 addr, __u32 len)
+{
+	struct vhost_memory_region *reg;
+	int i;
+	/* linear search is not brilliant, but we really have on the order of 6
+	 * regions in practice */
+	for (i = 0; i < mem->nregions; ++i) {
+		reg = mem->regions + i;
+		if (reg->guest_phys_addr <= addr &&
+		    reg->guest_phys_addr + reg->memory_size - 1 >= addr)
+			return reg;
+	}
+	return NULL;
+}
+
+/* TODO: This is really inefficient.  We need something like get_user()
+ * (instruction directly accesses the data, with an exception table entry
+ * returning -EFAULT). See Documentation/x86/exception-tables.txt.
+ */
+static int set_bit_to_user(int nr, void __user *addr)
+{
+	unsigned long log = (unsigned long)addr;
+	struct page *page;
+	void *base;
+	int bit = nr + (log % PAGE_SIZE) * 8;
+	int r;
+	r = get_user_pages_fast(log, 1, 1, &page);
+	if (r)
+		return r;
+	base = kmap_atomic(page, KM_USER0);
+	set_bit(bit, base);
+	kunmap_atomic(base, KM_USER0);
+	set_page_dirty_lock(page);
+	put_page(page);
+	return 0;
+}
+
+static int log_write(void __user *log_base,
+		     u64 write_address, u64 write_length)
+{
+	int r;
+	if (!write_length)
+		return 0;
+	write_address /= VHOST_PAGE_SIZE;
+	for (;;) {
+		u64 base = (u64)(unsigned long)log_base;
+		u64 log = base + write_address / 8;
+		int bit = write_address % 8;
+		if ((u64)(unsigned long)log != log)
+			return -EFAULT;
+		r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
+		if (r < 0)
+			return r;
+		if (write_length <= VHOST_PAGE_SIZE)
+			break;
+		write_length -= VHOST_PAGE_SIZE;
+		write_address += VHOST_PAGE_SIZE;
+	}
+	return r;
+}
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+		    unsigned int log_num, u64 len)
+{
+	int i, r;
+
+	/* Make sure data written is seen before log. */
+	wmb();
+	for (i = 0; i < log_num; ++i) {
+		u64 l = min(log[i].len, len);
+		r = log_write(vq->log_base, log[i].addr, l);
+		if (r < 0)
+			return r;
+		len -= l;
+		if (!len)
+			return 0;
+	}
+	if (vq->log_ctx)
+		eventfd_signal(vq->log_ctx, 1);
+	/* Length written exceeds what we have stored. This is a bug. */
+	BUG();
+	return 0;
+}
+
+int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
+		   struct iovec iov[], int iov_size)
+{
+	const struct vhost_memory_region *reg;
+	struct vhost_memory *mem;
+	struct iovec *_iov;
+	u64 s = 0;
+	int ret = 0;
+
+	rcu_read_lock();
+
+	mem = rcu_dereference(dev->memory);
+	while ((u64)len > s) {
+		u64 size;
+		if (ret >= iov_size) {
+			ret = -ENOBUFS;
+			break;
+		}
+		reg = find_region(mem, addr, len);
+		if (!reg) {
+			ret = -EFAULT;
+			break;
+		}
+		_iov = iov + ret;
+		size = reg->memory_size - addr + reg->guest_phys_addr;
+		_iov->iov_len = min((u64)len, size);
+		_iov->iov_base = (void *)(unsigned long)
+			(reg->userspace_addr + addr - reg->guest_phys_addr);
+		s += size;
+		addr += size;
+		++ret;
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
+/* Each buffer in the virtqueues is actually a chain of descriptors.  This
+ * function returns the next descriptor in the chain,
+ * or -1U if we're at the end. */
+static unsigned next_desc(struct vring_desc *desc)
+{
+	unsigned int next;
+
+	/* If this descriptor says it doesn't chain, we're done. */
+	if (!(desc->flags & VRING_DESC_F_NEXT))
+		return -1U;
+
+	/* Check they're not leading us off end of descriptors. */
+	next = desc->next;
+	/* Make sure compiler knows to grab that: we don't want it changing! */
+	/* We will use the result as an index in an array, so most
+	 * architectures only need a compiler barrier here. */
+	read_barrier_depends();
+
+	return next;
+}
+
+static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+			     struct iovec iov[], unsigned int iov_size,
+			     unsigned int *out_num, unsigned int *in_num,
+			     struct vhost_log *log, unsigned int *log_num,
+			     struct vring_desc *indirect)
+{
+	struct vring_desc desc;
+	unsigned int i = 0, count, found = 0;
+	int ret;
+
+	/* Sanity check */
+	if (indirect->len % sizeof desc) {
+		vq_err(vq, "Invalid length in indirect descriptor: "
+		       "len 0x%llx not multiple of 0x%zx\n",
+		       (unsigned long long)indirect->len,
+		       sizeof desc);
+		return -EINVAL;
+	}
+
+	ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect,
+			     ARRAY_SIZE(vq->indirect));
+	if (ret < 0) {
+		vq_err(vq, "Translation failure %d in indirect.\n", ret);
+		return ret;
+	}
+
+	/* We will use the result as an address to read from, so most
+	 * architectures only need a compiler barrier here. */
+	read_barrier_depends();
+
+	count = indirect->len / sizeof desc;
+	/* Buffers are chained via a 16 bit next field, so
+	 * we can have at most 2^16 of these. */
+	if (count > USHORT_MAX + 1) {
+		vq_err(vq, "Indirect buffer length too big: %d\n",
+		       indirect->len);
+		return -E2BIG;
+	}
+
+	do {
+		unsigned iov_count = *in_num + *out_num;
+		if (++found > count) {
+			vq_err(vq, "Loop detected: last one at %u "
+			       "indirect size %u\n",
+			       i, count);
+			return -EINVAL;
+		}
+		if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
+				     sizeof desc)) {
+			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
+			       i, (size_t)indirect->addr + i * sizeof desc);
+			return -EINVAL;
+		}
+		if (desc.flags & VRING_DESC_F_INDIRECT) {
+			vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
+			       i, (size_t)indirect->addr + i * sizeof desc);
+			return -EINVAL;
+		}
+
+		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+				     iov_size - iov_count);
+		if (ret < 0) {
+			vq_err(vq, "Translation failure %d indirect idx %d\n",
+			       ret, i);
+			return ret;
+		}
+		/* If this is an input descriptor, increment that count. */
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			*in_num += ret;
+			if (unlikely(log)) {
+				log[*log_num].addr = desc.addr;
+				log[*log_num].len = desc.len;
+				++*log_num;
+			}
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (*in_num) {
+				vq_err(vq, "Indirect descriptor "
+				       "has out after in: idx %d\n", i);
+				return -EINVAL;
+			}
+			*out_num += ret;
+		}
+	} while ((i = next_desc(&desc)) != -1);
+	return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access.  Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which
+ * is never a valid descriptor number) if none was found. */
+unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+			   struct iovec iov[], unsigned int iov_size,
+			   unsigned int *out_num, unsigned int *in_num,
+			   struct vhost_log *log, unsigned int *log_num)
+{
+	struct vring_desc desc;
+	unsigned int i, head, found = 0;
+	u16 last_avail_idx;
+	int ret;
+
+	/* Check it isn't doing very strange things with descriptor numbers. */
+	last_avail_idx = vq->last_avail_idx;
+	if (get_user(vq->avail_idx, &vq->avail->idx)) {
+		vq_err(vq, "Failed to access avail idx at %p\n",
+		       &vq->avail->idx);
+		return vq->num;
+	}
+
+	if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) {
+		vq_err(vq, "Guest moved used index from %u to %u",
+		       last_avail_idx, vq->avail_idx);
+		return vq->num;
+	}
+
+	/* If there's nothing new since last we looked, return invalid. */
+	if (vq->avail_idx == last_avail_idx)
+		return vq->num;
+
+	/* Only get avail ring entries after they have been exposed by guest. */
+	rmb();
+
+	/* Grab the next descriptor number they're advertising, and increment
+	 * the index we've seen. */
+	if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) {
+		vq_err(vq, "Failed to read head: idx %d address %p\n",
+		       last_avail_idx,
+		       &vq->avail->ring[last_avail_idx % vq->num]);
+		return vq->num;
+	}
+
+	/* If their number is silly, that's an error. */
+	if (head >= vq->num) {
+		vq_err(vq, "Guest says index %u > %u is available",
+		       head, vq->num);
+		return vq->num;
+	}
+
+	/* When we start there are none of either input nor output. */
+	*out_num = *in_num = 0;
+	if (unlikely(log))
+		*log_num = 0;
+
+	i = head;
+	do {
+		unsigned iov_count = *in_num + *out_num;
+		if (i >= vq->num) {
+			vq_err(vq, "Desc index is %u > %u, head = %u",
+			       i, vq->num, head);
+			return vq->num;
+		}
+		if (++found > vq->num) {
+			vq_err(vq, "Loop detected: last one at %u "
+			       "vq size %u head %u\n",
+			       i, vq->num, head);
+			return vq->num;
+		}
+		ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
+		if (ret) {
+			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+			       i, vq->desc + i);
+			return vq->num;
+		}
+		if (desc.flags & VRING_DESC_F_INDIRECT) {
+			ret = get_indirect(dev, vq, iov, iov_size,
+					   out_num, in_num,
+					   log, log_num, &desc);
+			if (ret < 0) {
+				vq_err(vq, "Failure detected "
+				       "in indirect descriptor at idx %d\n", i);
+				return vq->num;
+			}
+			continue;
+		}
+
+		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+				     iov_size - iov_count);
+		if (ret < 0) {
+			vq_err(vq, "Translation failure %d descriptor idx %d\n",
+			       ret, i);
+			return vq->num;
+		}
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			/* If this is an input descriptor,
+			 * increment that count. */
+			*in_num += ret;
+			if (unlikely(log)) {
+				log[*log_num].addr = desc.addr;
+				log[*log_num].len = desc.len;
+				++*log_num;
+			}
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (*in_num) {
+				vq_err(vq, "Descriptor has out after in: "
+				       "idx %d\n", i);
+				return vq->num;
+			}
+			*out_num += ret;
+		}
+	} while ((i = next_desc(&desc)) != -1);
+
+	/* On success, increment avail index. */
+	vq->last_avail_idx++;
+	return head;
+}
+
+/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq)
+{
+	vq->last_avail_idx--;
+}
+
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+{
+	struct vring_used_elem *used;
+
+	/* The virtqueue contains a ring of used buffers.  Get a pointer to the
+	 * next entry in that used ring. */
+	used = &vq->used->ring[vq->last_used_idx % vq->num];
+	if (put_user(head, &used->id)) {
+		vq_err(vq, "Failed to write used id");
+		return -EFAULT;
+	}
+	if (put_user(len, &used->len)) {
+		vq_err(vq, "Failed to write used len");
+		return -EFAULT;
+	}
+	/* Make sure buffer is written before we update index. */
+	wmb();
+	if (put_user(vq->last_used_idx + 1, &vq->used->idx)) {
+		vq_err(vq, "Failed to increment used idx");
+		return -EFAULT;
+	}
+	if (unlikely(vq->log_used)) {
+		/* Make sure data is seen before log. */
+		wmb();
+		log_write(vq->log_base, vq->log_addr + sizeof *vq->used->ring *
+			  (vq->last_used_idx % vq->num),
+			  sizeof *vq->used->ring);
+		log_write(vq->log_base, vq->log_addr, sizeof *vq->used->ring);
+		if (vq->log_ctx)
+			eventfd_signal(vq->log_ctx, 1);
+	}
+	vq->last_used_idx++;
+	return 0;
+}
+
+/* This actually signals the guest, using eventfd. */
+void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+	__u16 flags = 0;
+	if (get_user(flags, &vq->avail->flags)) {
+		vq_err(vq, "Failed to get flags");
+		return;
+	}
+
+	/* If they don't want an interrupt, don't signal, unless empty. */
+	if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
+	    (vq->avail_idx != vq->last_avail_idx ||
+	     !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
+		return;
+
+	/* Signal the Guest tell them we used something up. */
+	if (vq->call_ctx)
+		eventfd_signal(vq->call_ctx, 1);
+}
+
+/* And here's the combo meal deal.  Supersize me! */
+void vhost_add_used_and_signal(struct vhost_dev *dev,
+			       struct vhost_virtqueue *vq,
+			       unsigned int head, int len)
+{
+	vhost_add_used(vq, head, len);
+	vhost_signal(dev, vq);
+}
+
+/* OK, now we need to know about added descriptors. */
+bool vhost_enable_notify(struct vhost_virtqueue *vq)
+{
+	u16 avail_idx;
+	int r;
+	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
+		return false;
+	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
+	r = put_user(vq->used_flags, &vq->used->flags);
+	if (r) {
+		vq_err(vq, "Failed to enable notification at %p: %d\n",
+		       &vq->used->flags, r);
+		return false;
+	}
+	/* They could have slipped one in as we were doing that: make
+	 * sure it's written, then check again. */
+	mb();
+	r = get_user(avail_idx, &vq->avail->idx);
+	if (r) {
+		vq_err(vq, "Failed to check avail idx at %p: %d\n",
+		       &vq->avail->idx, r);
+		return false;
+	}
+
+	return avail_idx != vq->last_avail_idx;
+}
+
+/* We don't need to be notified again. */
+void vhost_disable_notify(struct vhost_virtqueue *vq)
+{
+	int r;
+	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
+		return;
+	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
+	r = put_user(vq->used_flags, &vq->used->flags);
+	if (r)
+		vq_err(vq, "Failed to enable notification at %p: %d\n",
+		       &vq->used->flags, r);
+}
+
+int vhost_init(void)
+{
+	vhost_workqueue = create_singlethread_workqueue("vhost");
+	if (!vhost_workqueue)
+		return -ENOMEM;
+	return 0;
+}
+
+void vhost_cleanup(void)
+{
+	destroy_workqueue(vhost_workqueue);
+}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
new file mode 100644
index 000000000000..44591ba9b07a
--- /dev/null
+++ b/drivers/vhost/vhost.h
@@ -0,0 +1,161 @@
+#ifndef _VHOST_H
+#define _VHOST_H
+
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/skbuff.h>
+#include <linux/uio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_device;
+
+enum {
+	/* Enough place for all fragments, head, and virtio net header. */
+	VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2,
+};
+
+/* Poll a file (eventfd or socket) */
+/* Note: there's nothing vhost specific about this structure. */
+struct vhost_poll {
+	poll_table                table;
+	wait_queue_head_t        *wqh;
+	wait_queue_t              wait;
+	/* struct which will handle all actual work. */
+	struct work_struct        work;
+	unsigned long		  mask;
+};
+
+void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
+		     unsigned long mask);
+void vhost_poll_start(struct vhost_poll *poll, struct file *file);
+void vhost_poll_stop(struct vhost_poll *poll);
+void vhost_poll_flush(struct vhost_poll *poll);
+void vhost_poll_queue(struct vhost_poll *poll);
+
+struct vhost_log {
+	u64 addr;
+	u64 len;
+};
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct vhost_virtqueue {
+	struct vhost_dev *dev;
+
+	/* The actual ring of buffers. */
+	struct mutex mutex;
+	unsigned int num;
+	struct vring_desc __user *desc;
+	struct vring_avail __user *avail;
+	struct vring_used __user *used;
+	struct file *kick;
+	struct file *call;
+	struct file *error;
+	struct eventfd_ctx *call_ctx;
+	struct eventfd_ctx *error_ctx;
+	struct eventfd_ctx *log_ctx;
+
+	struct vhost_poll poll;
+
+	/* The routine to call when the Guest pings us, or timeout. */
+	work_func_t handle_kick;
+
+	/* Last available index we saw. */
+	u16 last_avail_idx;
+
+	/* Caches available index value from user. */
+	u16 avail_idx;
+
+	/* Last index we used. */
+	u16 last_used_idx;
+
+	/* Used flags */
+	u16 used_flags;
+
+	/* Log writes to used structure. */
+	bool log_used;
+	u64 log_addr;
+
+	struct iovec indirect[VHOST_NET_MAX_SG];
+	struct iovec iov[VHOST_NET_MAX_SG];
+	struct iovec hdr[VHOST_NET_MAX_SG];
+	size_t hdr_size;
+	/* We use a kind of RCU to access private pointer.
+	 * All readers access it from workqueue, which makes it possible to
+	 * flush the workqueue instead of synchronize_rcu. Therefore readers do
+	 * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
+	 * work item execution acts instead of rcu_read_lock() and the end of
+	 * work item execution acts instead of rcu_read_lock().
+	 * Writers use virtqueue mutex. */
+	void *private_data;
+	/* Log write descriptors */
+	void __user *log_base;
+	struct vhost_log log[VHOST_NET_MAX_SG];
+};
+
+struct vhost_dev {
+	/* Readers use RCU to access memory table pointer
+	 * log base pointer and features.
+	 * Writers use mutex below.*/
+	struct vhost_memory *memory;
+	struct mm_struct *mm;
+	struct mutex mutex;
+	unsigned acked_features;
+	struct vhost_virtqueue *vqs;
+	int nvqs;
+	struct file *log_file;
+	struct eventfd_ctx *log_ctx;
+};
+
+long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
+long vhost_dev_check_owner(struct vhost_dev *);
+long vhost_dev_reset_owner(struct vhost_dev *);
+void vhost_dev_cleanup(struct vhost_dev *);
+long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg);
+int vhost_vq_access_ok(struct vhost_virtqueue *vq);
+int vhost_log_access_ok(struct vhost_dev *);
+
+unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+			   struct iovec iov[], unsigned int iov_count,
+			   unsigned int *out_num, unsigned int *in_num,
+			   struct vhost_log *log, unsigned int *log_num);
+void vhost_discard_vq_desc(struct vhost_virtqueue *);
+
+int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
+void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
+void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
+			       unsigned int head, int len);
+void vhost_disable_notify(struct vhost_virtqueue *);
+bool vhost_enable_notify(struct vhost_virtqueue *);
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+		    unsigned int log_num, u64 len);
+
+int vhost_init(void);
+void vhost_cleanup(void);
+
+#define vq_err(vq, fmt, ...) do {                                  \
+		pr_debug(pr_fmt(fmt), ##__VA_ARGS__);       \
+		if ((vq)->error_ctx)                               \
+				eventfd_signal((vq)->error_ctx, 1);\
+	} while (0)
+
+enum {
+	VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
+			 (1 << VIRTIO_RING_F_INDIRECT_DESC) |
+			 (1 << VHOST_F_LOG_ALL) |
+			 (1 << VHOST_NET_F_VIRTIO_NET_HDR),
+};
+
+static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
+{
+	unsigned acked_features = rcu_dereference(dev->acked_features);
+	return acked_features & (1 << bit);
+}
+
+#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 756f831cbdd5..d93080748a91 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -362,6 +362,7 @@ unifdef-y += uio.h
 unifdef-y += unistd.h
 unifdef-y += usbdevice_fs.h
 unifdef-y += utsname.h
+unifdef-y += vhost.h
 unifdef-y += videodev2.h
 unifdef-y += videodev.h
 unifdef-y += virtio_config.h
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index adaf3c15e449..8b5f7cc0fba6 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -30,6 +30,7 @@
 #define HPET_MINOR		228
 #define FUSE_MINOR		229
 #define KVM_MINOR		232
+#define VHOST_NET_MINOR		233
 #define MISC_DYNAMIC_MINOR	255
 
 struct device;
diff --git a/include/linux/vhost.h b/include/linux/vhost.h
new file mode 100644
index 000000000000..e847f1e30756
--- /dev/null
+++ b/include/linux/vhost.h
@@ -0,0 +1,130 @@
+#ifndef _LINUX_VHOST_H
+#define _LINUX_VHOST_H
+/* Userspace interface for in-kernel virtio accelerators. */
+
+/* vhost is used to reduce the number of system calls involved in virtio.
+ *
+ * Existing virtio net code is used in the guest without modification.
+ *
+ * This header includes interface used by userspace hypervisor for
+ * device configuration.
+ */
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/ioctl.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_vring_state {
+	unsigned int index;
+	unsigned int num;
+};
+
+struct vhost_vring_file {
+	unsigned int index;
+	int fd; /* Pass -1 to unbind from file. */
+
+};
+
+struct vhost_vring_addr {
+	unsigned int index;
+	/* Option flags. */
+	unsigned int flags;
+	/* Flag values: */
+	/* Whether log address is valid. If set enables logging. */
+#define VHOST_VRING_F_LOG 0
+
+	/* Start of array of descriptors (virtually contiguous) */
+	__u64 desc_user_addr;
+	/* Used structure address. Must be 32 bit aligned */
+	__u64 used_user_addr;
+	/* Available structure address. Must be 16 bit aligned */
+	__u64 avail_user_addr;
+	/* Logging support. */
+	/* Log writes to used structure, at offset calculated from specified
+	 * address. Address must be 32 bit aligned. */
+	__u64 log_guest_addr;
+};
+
+struct vhost_memory_region {
+	__u64 guest_phys_addr;
+	__u64 memory_size; /* bytes */
+	__u64 userspace_addr;
+	__u64 flags_padding; /* No flags are currently specified. */
+};
+
+/* All region addresses and sizes must be 4K aligned. */
+#define VHOST_PAGE_SIZE 0x1000
+
+struct vhost_memory {
+	__u32 nregions;
+	__u32 padding;
+	struct vhost_memory_region regions[0];
+};
+
+/* ioctls */
+
+#define VHOST_VIRTIO 0xAF
+
+/* Features bitmask for forward compatibility.  Transport bits are used for
+ * vhost specific features. */
+#define VHOST_GET_FEATURES	_IOR(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_FEATURES	_IOW(VHOST_VIRTIO, 0x00, __u64)
+
+/* Set current process as the (exclusive) owner of this file descriptor.  This
+ * must be called before any other vhost command.  Further calls to
+ * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
+#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
+/* Give up ownership, and reset the device to default values.
+ * Allows subsequent call to VHOST_OWNER_SET to succeed. */
+#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
+
+/* Set up/modify memory layout */
+#define VHOST_SET_MEM_TABLE	_IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
+
+/* Write logging setup. */
+/* Memory writes can optionally be logged by setting bit at an offset
+ * (calculated from the physical address) from specified log base.
+ * The bit is set using an atomic 32 bit operation. */
+/* Set base address for logging. */
+#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
+/* Specify an eventfd file descriptor to signal on log write. */
+#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+
+/* Ring setup. */
+/* Set number of descriptors in ring. This parameter can not
+ * be modified while ring is running (bound to a device). */
+#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
+/* Set addresses for the ring. */
+#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
+/* Base value where queue looks for available descriptors */
+#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+/* Get accessor: reads index, writes value in num */
+#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+
+/* The following ioctls use eventfd file descriptors to signal and poll
+ * for events. */
+
+/* Set eventfd to poll for added buffers */
+#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
+/* Set eventfd to signal when buffers have beed used */
+#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
+/* Set eventfd to signal an error */
+#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+
+/* VHOST_NET specific defines */
+
+/* Attach virtio net ring to a raw socket, or tap device.
+ * The socket must be already bound to an ethernet device, this device will be
+ * used for transmit.  Pass fd -1 to unbind from the socket and the transmit
+ * device.  This can be used to stop the ring (e.g. for migration). */
+#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
+
+/* Feature bits */
+/* Log all write descriptors. Can be changed while device is active. */
+#define VHOST_F_LOG_ALL 26
+/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
+#define VHOST_NET_F_VIRTIO_NET_HDR 27
+
+#endif
-- 
cgit v1.2.3


From c955fe8e0bdd7be7a6bc2d49245d570a816f7cc5 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Thu, 15 Oct 2009 14:31:30 +0400
Subject: POWER: Add support for cycle_count

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/power/power_supply_sysfs.c | 1 +
 include/linux/power_supply.h       | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index c790e0c77d4b..ff05e6189768 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -99,6 +99,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(present),
 	POWER_SUPPLY_ATTR(online),
 	POWER_SUPPLY_ATTR(technology),
+	POWER_SUPPLY_ATTR(cycle_count),
 	POWER_SUPPLY_ATTR(voltage_max),
 	POWER_SUPPLY_ATTR(voltage_min),
 	POWER_SUPPLY_ATTR(voltage_max_design),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index b5d096d3a9be..ebd2b8fb00d0 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -82,6 +82,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
 	POWER_SUPPLY_PROP_VOLTAGE_MAX,
 	POWER_SUPPLY_PROP_VOLTAGE_MIN,
 	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
-- 
cgit v1.2.3


From 889ff0150661512d79484219612b7e2e024b6c07 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 9 Jan 2010 20:04:47 +0100
Subject: perf/core: Split context's event group list into pinned and
 non-pinned lists

Split-up struct perf_event_context::group_list into pinned_groups
and flexible_groups (non-pinned).

This first appears to be useless as it duplicates various loops around
the group list handlings.

But it scales better in the fast-path in perf_sched_in(). We don't
anymore iterate twice through the entire list to separate pinned and
non-pinned scheduling. Instead we interate through two distinct lists.

The another desired effect is that it makes easier to define distinct
scheduling rules on both.

Changes in v2:
- Respectively rename pinned_grp_list and
  volatile_grp_list into pinned_groups and flexible_groups as per
  Ingo suggestion.
- Various cleanups

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 include/linux/perf_event.h |   3 +-
 kernel/perf_event.c        | 227 ++++++++++++++++++++++++++++++---------------
 2 files changed, 153 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9a1d276db754..cdbc2aa64a0b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -683,7 +683,8 @@ struct perf_event_context {
 	 */
 	struct mutex			mutex;
 
-	struct list_head		group_list;
+	struct list_head		pinned_groups;
+	struct list_head		flexible_groups;
 	struct list_head		event_list;
 	int				nr_events;
 	int				nr_active;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 27f69a04541d..c9f8a757649d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -289,6 +289,15 @@ static void update_event_times(struct perf_event *event)
 	event->total_time_running = run_end - event->tstamp_running;
 }
 
+static struct list_head *
+ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
+{
+	if (event->attr.pinned)
+		return &ctx->pinned_groups;
+	else
+		return &ctx->flexible_groups;
+}
+
 /*
  * Add a event from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
@@ -303,9 +312,12 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	 * add it straight to the context's event list, or to the group
 	 * leader's sibling list:
 	 */
-	if (group_leader == event)
-		list_add_tail(&event->group_entry, &ctx->group_list);
-	else {
+	if (group_leader == event) {
+		struct list_head *list;
+
+		list = ctx_group_list(event, ctx);
+		list_add_tail(&event->group_entry, list);
+	} else {
 		list_add_tail(&event->group_entry, &group_leader->sibling_list);
 		group_leader->nr_siblings++;
 	}
@@ -355,8 +367,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 	 * to the context list directly:
 	 */
 	list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+		struct list_head *list;
 
-		list_move_tail(&sibling->group_entry, &ctx->group_list);
+		list = ctx_group_list(event, ctx);
+		list_move_tail(&sibling->group_entry, list);
 		sibling->group_leader = sibling;
 	}
 }
@@ -1056,7 +1070,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
 
 	perf_disable();
 	if (ctx->nr_active) {
-		list_for_each_entry(event, &ctx->group_list, group_entry)
+		list_for_each_entry(event, &ctx->pinned_groups, group_entry)
+			group_sched_out(event, cpuctx, ctx);
+
+		list_for_each_entry(event, &ctx->flexible_groups, group_entry)
 			group_sched_out(event, cpuctx, ctx);
 	}
 	perf_enable();
@@ -1271,9 +1288,8 @@ __perf_event_sched_in(struct perf_event_context *ctx,
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
 	 */
-	list_for_each_entry(event, &ctx->group_list, group_entry) {
-		if (event->state <= PERF_EVENT_STATE_OFF ||
-		    !event->attr.pinned)
+	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+		if (event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 		if (event->cpu != -1 && event->cpu != cpu)
 			continue;
@@ -1291,15 +1307,10 @@ __perf_event_sched_in(struct perf_event_context *ctx,
 		}
 	}
 
-	list_for_each_entry(event, &ctx->group_list, group_entry) {
-		/*
-		 * Ignore events in OFF or ERROR state, and
-		 * ignore pinned events since we did them already.
-		 */
-		if (event->state <= PERF_EVENT_STATE_OFF ||
-		    event->attr.pinned)
+	list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+		/* Ignore events in OFF or ERROR state */
+		if (event->state <= PERF_EVENT_STATE_OFF)
 			continue;
-
 		/*
 		 * Listen to the 'cpu' scheduling filter constraint
 		 * of events:
@@ -1453,8 +1464,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
 	 * Rotate the first entry last (works just fine for group events too):
 	 */
 	perf_disable();
-	list_for_each_entry(event, &ctx->group_list, group_entry) {
-		list_move_tail(&event->group_entry, &ctx->group_list);
+	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+		list_move_tail(&event->group_entry, &ctx->pinned_groups);
+		break;
+	}
+
+	list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+		list_move_tail(&event->group_entry, &ctx->flexible_groups);
 		break;
 	}
 	perf_enable();
@@ -1490,6 +1506,21 @@ void perf_event_task_tick(struct task_struct *curr)
 		perf_event_task_sched_in(curr);
 }
 
+static int event_enable_on_exec(struct perf_event *event,
+				struct perf_event_context *ctx)
+{
+	if (!event->attr.enable_on_exec)
+		return 0;
+
+	event->attr.enable_on_exec = 0;
+	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+		return 0;
+
+	__perf_event_mark_enabled(event, ctx);
+
+	return 1;
+}
+
 /*
  * Enable all of a task's events that have been marked enable-on-exec.
  * This expects task == current.
@@ -1500,6 +1531,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 	struct perf_event *event;
 	unsigned long flags;
 	int enabled = 0;
+	int ret;
 
 	local_irq_save(flags);
 	ctx = task->perf_event_ctxp;
@@ -1510,14 +1542,16 @@ static void perf_event_enable_on_exec(struct task_struct *task)
 
 	raw_spin_lock(&ctx->lock);
 
-	list_for_each_entry(event, &ctx->group_list, group_entry) {
-		if (!event->attr.enable_on_exec)
-			continue;
-		event->attr.enable_on_exec = 0;
-		if (event->state >= PERF_EVENT_STATE_INACTIVE)
-			continue;
-		__perf_event_mark_enabled(event, ctx);
-		enabled = 1;
+	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+		ret = event_enable_on_exec(event, ctx);
+		if (ret)
+			enabled = 1;
+	}
+
+	list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+		ret = event_enable_on_exec(event, ctx);
+		if (ret)
+			enabled = 1;
 	}
 
 	/*
@@ -1591,7 +1625,8 @@ __perf_event_init_context(struct perf_event_context *ctx,
 {
 	raw_spin_lock_init(&ctx->lock);
 	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->group_list);
+	INIT_LIST_HEAD(&ctx->pinned_groups);
+	INIT_LIST_HEAD(&ctx->flexible_groups);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
 	ctx->task = task;
@@ -5032,7 +5067,11 @@ void perf_event_exit_task(struct task_struct *child)
 	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
 
 again:
-	list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+	list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
+				 group_entry)
+		__perf_event_exit_task(child_event, child_ctx, child);
+
+	list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
 				 group_entry)
 		__perf_event_exit_task(child_event, child_ctx, child);
 
@@ -5041,7 +5080,8 @@ again:
 	 * its siblings to the list, but we obtained 'tmp' before that which
 	 * will still point to the list head terminating the iteration.
 	 */
-	if (!list_empty(&child_ctx->group_list))
+	if (!list_empty(&child_ctx->pinned_groups) ||
+	    !list_empty(&child_ctx->flexible_groups))
 		goto again;
 
 	mutex_unlock(&child_ctx->mutex);
@@ -5049,6 +5089,24 @@ again:
 	put_ctx(child_ctx);
 }
 
+static void perf_free_event(struct perf_event *event,
+			    struct perf_event_context *ctx)
+{
+	struct perf_event *parent = event->parent;
+
+	if (WARN_ON_ONCE(!parent))
+		return;
+
+	mutex_lock(&parent->child_mutex);
+	list_del_init(&event->child_list);
+	mutex_unlock(&parent->child_mutex);
+
+	fput(parent->filp);
+
+	list_del_event(event, ctx);
+	free_event(event);
+}
+
 /*
  * free an unexposed, unused context as created by inheritance by
  * init_task below, used by fork() in case of fail.
@@ -5063,36 +5121,70 @@ void perf_event_free_task(struct task_struct *task)
 
 	mutex_lock(&ctx->mutex);
 again:
-	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
-		struct perf_event *parent = event->parent;
+	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+		perf_free_event(event, ctx);
 
-		if (WARN_ON_ONCE(!parent))
-			continue;
+	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
+				 group_entry)
+		perf_free_event(event, ctx);
 
-		mutex_lock(&parent->child_mutex);
-		list_del_init(&event->child_list);
-		mutex_unlock(&parent->child_mutex);
+	if (!list_empty(&ctx->pinned_groups) ||
+	    !list_empty(&ctx->flexible_groups))
+		goto again;
 
-		fput(parent->filp);
+	mutex_unlock(&ctx->mutex);
 
-		list_del_event(event, ctx);
-		free_event(event);
+	put_ctx(ctx);
+}
+
+static int
+inherit_task_group(struct perf_event *event, struct task_struct *parent,
+		   struct perf_event_context *parent_ctx,
+		   struct task_struct *child,
+		   int *inherited_all)
+{
+	int ret;
+	struct perf_event_context *child_ctx = child->perf_event_ctxp;
+
+	if (!event->attr.inherit) {
+		*inherited_all = 0;
+		return 0;
 	}
 
-	if (!list_empty(&ctx->group_list))
-		goto again;
+	if (!child_ctx) {
+		/*
+		 * This is executed from the parent task context, so
+		 * inherit events that have been marked for cloning.
+		 * First allocate and initialize a context for the
+		 * child.
+		 */
 
-	mutex_unlock(&ctx->mutex);
+		child_ctx = kzalloc(sizeof(struct perf_event_context),
+				    GFP_KERNEL);
+		if (!child_ctx)
+			return -ENOMEM;
 
-	put_ctx(ctx);
+		__perf_event_init_context(child_ctx, child);
+		child->perf_event_ctxp = child_ctx;
+		get_task_struct(child);
+	}
+
+	ret = inherit_group(event, parent, parent_ctx,
+			    child, child_ctx);
+
+	if (ret)
+		*inherited_all = 0;
+
+	return ret;
 }
 
+
 /*
  * Initialize the perf_event context in task_struct
  */
 int perf_event_init_task(struct task_struct *child)
 {
-	struct perf_event_context *child_ctx = NULL, *parent_ctx;
+	struct perf_event_context *child_ctx, *parent_ctx;
 	struct perf_event_context *cloned_ctx;
 	struct perf_event *event;
 	struct task_struct *parent = current;
@@ -5130,41 +5222,22 @@ int perf_event_init_task(struct task_struct *child)
 	 * We dont have to disable NMIs - we are only looking at
 	 * the list, not manipulating it:
 	 */
-	list_for_each_entry(event, &parent_ctx->group_list, group_entry) {
-
-		if (!event->attr.inherit) {
-			inherited_all = 0;
-			continue;
-		}
-
-		if (!child->perf_event_ctxp) {
-			/*
-			 * This is executed from the parent task context, so
-			 * inherit events that have been marked for cloning.
-			 * First allocate and initialize a context for the
-			 * child.
-			 */
-
-			child_ctx = kzalloc(sizeof(struct perf_event_context),
-					    GFP_KERNEL);
-			if (!child_ctx) {
-				ret = -ENOMEM;
-				break;
-			}
-
-			__perf_event_init_context(child_ctx, child);
-			child->perf_event_ctxp = child_ctx;
-			get_task_struct(child);
-		}
+	list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
+		ret = inherit_task_group(event, parent, parent_ctx, child,
+					 &inherited_all);
+		if (ret)
+			break;
+	}
 
-		ret = inherit_group(event, parent, parent_ctx,
-					     child, child_ctx);
-		if (ret) {
-			inherited_all = 0;
+	list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
+		ret = inherit_task_group(event, parent, parent_ctx, child,
+					 &inherited_all);
+		if (ret)
 			break;
-		}
 	}
 
+	child_ctx = child->perf_event_ctxp;
+
 	if (child_ctx && inherited_all) {
 		/*
 		 * Mark the child context as a clone of the parent
@@ -5213,7 +5286,9 @@ static void __perf_event_exit_cpu(void *info)
 	struct perf_event_context *ctx = &cpuctx->ctx;
 	struct perf_event *event, *tmp;
 
-	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+		__perf_event_remove_from_context(event);
+	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
 		__perf_event_remove_from_context(event);
 }
 static void perf_event_exit_cpu(int cpu)
-- 
cgit v1.2.3


From 5908cdc85eb30f8d07f2cb11d4a62334d7229048 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 9 Jan 2010 20:53:14 +0100
Subject: list: Introduce list_rotate_left()

Bring a new list_rotate_left() helper that rotates a list to
the left. This is useful for codes that need to round roubin
elements which queue priority increases from tail to head.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 include/linux/list.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 969f6e92d089..5d9c6558e8ab 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -205,6 +205,20 @@ static inline int list_empty_careful(const struct list_head *head)
 	return (next == head) && (next == head->prev);
 }
 
+/**
+ * list_rotate_left - rotate the list to the left
+ * @head: the head of the list
+ */
+static inline void list_rotate_left(struct list_head *head)
+{
+	struct list_head *first;
+
+	if (!list_empty(head)) {
+		first = head->next;
+		list_move_tail(first, head);
+	}
+}
+
 /**
  * list_is_singular - tests whether a list has just one entry.
  * @head: the list to test.
-- 
cgit v1.2.3


From d6f962b57bfaab62891c7abbf1469212a56d6103 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 10 Jan 2010 01:25:51 +0100
Subject: perf: Export software-only event group characteristic as a flag

Before scheduling an event group, we first check if a group can go
on. We first check if the group is made of software only events
first, in which case it is enough to know if the group can be
scheduled in.

For that purpose, we iterate through the whole group, which is
wasteful as we could do this check when we add/delete an event to
a group.

So we create a group_flags field in perf event that can host
characteristics from a group of events, starting with a first
PERF_GROUP_SOFTWARE flag that reduces the check on the fast path.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
---
 include/linux/perf_event.h |  5 +++++
 kernel/perf_event.c        | 30 +++++++++++-------------------
 2 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cdbc2aa64a0b..c6f812e4d058 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -565,6 +565,10 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
 					struct perf_sample_data *,
 					struct pt_regs *regs);
 
+enum perf_group_flag {
+	PERF_GROUP_SOFTWARE = 0x1,
+};
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -574,6 +578,7 @@ struct perf_event {
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
 	int				nr_siblings;
+	int				group_flags;
 	struct perf_event		*group_leader;
 	struct perf_event		*output;
 	const struct pmu		*pmu;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bbebe2832639..eae6ff693604 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -315,9 +315,16 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 	if (group_leader == event) {
 		struct list_head *list;
 
+		if (is_software_event(event))
+			event->group_flags |= PERF_GROUP_SOFTWARE;
+
 		list = ctx_group_list(event, ctx);
 		list_add_tail(&event->group_entry, list);
 	} else {
+		if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
+		    !is_software_event(event))
+			group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+
 		list_add_tail(&event->group_entry, &group_leader->sibling_list);
 		group_leader->nr_siblings++;
 	}
@@ -372,6 +379,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 		list = ctx_group_list(event, ctx);
 		list_move_tail(&sibling->group_entry, list);
 		sibling->group_leader = sibling;
+
+		/* Inherit group flags from the previous leader */
+		sibling->group_flags = event->group_flags;
 	}
 }
 
@@ -699,24 +709,6 @@ group_error:
 	return -EAGAIN;
 }
 
-/*
- * Return 1 for a group consisting entirely of software events,
- * 0 if the group contains any hardware events.
- */
-static int is_software_only_group(struct perf_event *leader)
-{
-	struct perf_event *event;
-
-	if (!is_software_event(leader))
-		return 0;
-
-	list_for_each_entry(event, &leader->sibling_list, group_entry)
-		if (!is_software_event(event))
-			return 0;
-
-	return 1;
-}
-
 /*
  * Work out whether we can put this event group on the CPU now.
  */
@@ -727,7 +719,7 @@ static int group_can_go_on(struct perf_event *event,
 	/*
 	 * Groups consisting entirely of software events can always go on.
 	 */
-	if (is_software_only_group(event))
+	if (event->group_flags & PERF_GROUP_SOFTWARE)
 		return 1;
 	/*
 	 * If an exclusive group is already on, no other hardware
-- 
cgit v1.2.3


From 7e105057a34c83cea542dacc55ff0528bce67afa Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Fri, 15 Jan 2010 17:01:02 -0800
Subject: kfifo: fix kfifo_out_locked race bug

Fix a wrong optimization in include/linux/kfifo.h which could cause a race
in kfifo_out_locked.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Reported-by: Johan Hovold <jhovold@gmail.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7c6b32a1421c..c4ac88b3c302 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -228,13 +228,6 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 	ret = kfifo_out(fifo, to, n);
 
-	/*
-	 * optimization: if the FIFO is empty, set the indices to 0
-	 * so we don't wrap the next time
-	 */
-	if (kfifo_is_empty(fifo))
-		kfifo_reset(fifo);
-
 	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
-- 
cgit v1.2.3


From 2427b8e3eaea3719e53bbed7b3375382c3aa6f13 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 15 Jan 2010 17:01:11 -0800
Subject: tty.h: make tty_port_get() static inline

I get a few dozen of these warnings when using
  gcc (GCC) 4.4.1 20090725 (Red Hat 4.4.1-2):

In file included from mmotm-2010-0113-1217/init/do_mounts.c:5:
mmotm-2010-0113-1217/include/linux/tty.h: In function 'tty_port_get':
mmotm-2010-0113-1217/include/linux/tty.h:469: warning: '______f' is static but declared in inline function 'tty_port_get' which is not static

so make the function static inline.

[akpm@linux-foundation.org: may as well convert tty_port_users() also]
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index ef3a2947b102..6abfcf5b5887 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -464,7 +464,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_put(struct tty_port *port);
 
-extern inline struct tty_port *tty_port_get(struct tty_port *port)
+static inline struct tty_port *tty_port_get(struct tty_port *port)
 {
 	if (port)
 		kref_get(&port->kref);
@@ -486,7 +486,7 @@ extern void tty_port_close(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
 extern int tty_port_open(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
-extern inline int tty_port_users(struct tty_port *port)
+static inline int tty_port_users(struct tty_port *port)
 {
 	return port->count + port->blocked_open;
 }
-- 
cgit v1.2.3


From 8ecc2951534af10e04ddb5e5ff5c6d217b79f5c2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:12 -0800
Subject: kfifo: use void * pointers for user buffers

The pointers to user buffers are currently unsigned char *, which requires
a lot of casting in the caller for any non-char typed buffers.  Use void *
instead.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 +++++-----
 kernel/kfifo.c        |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c4ac88b3c302..6fb495ea956a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -104,15 +104,15 @@ union { \
 
 #undef __kfifo_initializer
 
-extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+extern void kfifo_init(struct kfifo *fifo, void *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int kfifo_in(struct kfifo *fifo,
-				const unsigned char *from, unsigned int len);
+				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
-				unsigned char *to, unsigned int len);
+				void *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -194,7 +194,7 @@ static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
  * bytes copied.
  */
 static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
-		const unsigned char *from, unsigned int n, spinlock_t *lock)
+		const void *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
@@ -219,7 +219,7 @@ static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
  * @to buffer and returns the number of copied bytes.
  */
 static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
-	unsigned char *to, unsigned int n, spinlock_t *lock)
+	void *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index e92d519f93b1..ab615e695052 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,7 +28,7 @@
 #include <linux/log2.h>
 #include <linux/uaccess.h>
 
-static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+static void _kfifo_init(struct kfifo *fifo, void *buffer,
 		unsigned int size)
 {
 	fifo->buffer = buffer;
@@ -44,7 +44,7 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @size: the size of the internal buffer, this have to be a power of 2.
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
+void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(__kfifo_in_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+unsigned int kfifo_in(struct kfifo *fifo, const void *from,
 				unsigned int len)
 {
 	len = min(kfifo_avail(fifo), len);
@@ -277,7 +277,7 @@ EXPORT_SYMBOL(__kfifo_out_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 {
 	len = min(kfifo_len(fifo), len);
 
-- 
cgit v1.2.3


From 64ce1037c5434b1d036cd99ecaee6e00496bc2e9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:15 -0800
Subject: kfifo: sanitize *_user error handling

Right now for kfifo_*_user it's not easily possible to distingush between
a user copy failing and the FIFO not containing enough data.  The problem
is that both conditions are multiplexed into the same return code.

Avoid this by moving the "copy length" into a separate output parameter
and only return 0/-EFAULT in the main return value.

I didn't fully adapt the weird "record" variants, those seem
to be unused anyways and were rather messy (should they be just removed?)

I would appreciate some double checking if I did all the conversions
correctly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  8 +++---
 kernel/kfifo.c        | 76 +++++++++++++++++++++++++++++++++------------------
 2 files changed, 53 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6fb495ea956a..86ad50a900c8 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -235,11 +235,11 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
 
-extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int n);
+extern __must_check int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned *lenout);
 
-extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int n);
+extern __must_check int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned *lenout);
 
 /*
  * __kfifo_add_out internal helper function for updating the out offset
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index ab615e695052..b50bb622e8b0 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -159,8 +159,9 @@ static inline void __kfifo_out_data(struct kfifo *fifo,
 	memcpy(to + l, fifo->buffer, len - l);
 }
 
-static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
-	 const void __user *from, unsigned int len, unsigned int off)
+static inline int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off,
+	 unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -177,16 +178,20 @@ static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	if (unlikely(ret)) {
+		*lenout = ret;
+		return -EFAULT;
+	}
+	*lenout = l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	return copy_from_user(fifo->buffer, from + l, len - l);
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	*lenout += ret ? ret : len - l;
+	return ret ? -EFAULT : 0;
 }
 
-static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
-		void __user *to, unsigned int len, unsigned int off)
+static inline int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -203,12 +208,21 @@ static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
 	ret = copy_to_user(to, fifo->buffer + off, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	*lenout = l;
+	if (unlikely(ret)) {
+		*lenout -= ret;
+		return -EFAULT;
+	}
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	return copy_to_user(to + l, fifo->buffer, len - l);
+	len -= l;
+	ret = copy_to_user(to + l, fifo->buffer, len);
+	if (unlikely(ret)) {
+		*lenout += len - ret;
+		return -EFAULT;
+	}
+	*lenout += len;
+	return 0;
 }
 
 unsigned int __kfifo_in_n(struct kfifo *fifo,
@@ -299,10 +313,13 @@ EXPORT_SYMBOL(__kfifo_out_generic);
 unsigned int __kfifo_from_user_n(struct kfifo *fifo,
 	const void __user *from, unsigned int len, unsigned int recsize)
 {
+	unsigned total;
+
 	if (kfifo_avail(fifo) < len + recsize)
 		return len + 1;
 
-	return __kfifo_from_user_data(fifo, from, len, recsize);
+	__kfifo_from_user_data(fifo, from, len, recsize, &total);
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_from_user_n);
 
@@ -313,18 +330,21 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
  * @len: the length of the data to be added.
  *
  * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
+ * FIFO depending and returns -EFAULT/0.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+int kfifo_from_user(struct kfifo *fifo,
+        const void __user *from, unsigned int len, unsigned *total)
 {
+	int ret;
 	len = min(kfifo_avail(fifo), len);
-	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	ret = __kfifo_from_user_data(fifo, from, len, 0, total);
+	if (ret)
+		return ret;
 	__kfifo_add_in(fifo, len);
-	return len;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_from_user);
 
@@ -339,17 +359,17 @@ unsigned int __kfifo_to_user_n(struct kfifo *fifo,
 	void __user *to, unsigned int len, unsigned int reclen,
 	unsigned int recsize)
 {
-	unsigned int ret;
+	unsigned int ret, total;
 
 	if (kfifo_len(fifo) < reclen + recsize)
 		return len;
 
-	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
 
 	if (likely(ret == 0))
 		__kfifo_add_out(fifo, reclen + recsize);
 
-	return ret;
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_to_user_n);
 
@@ -358,20 +378,22 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
+ @ @lenout: pointer to output variable with copied data
  *
  * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
+ * @to buffer and 0 or -EFAULT.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned *lenout)
 {
+	int ret;
 	len = min(kfifo_len(fifo), len);
-	len -= __kfifo_to_user_data(fifo, to, len, 0);
-	__kfifo_add_out(fifo, len);
-	return len;
+	ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
+	__kfifo_add_out(fifo, *lenout);
+	return ret;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
-- 
cgit v1.2.3


From a5b9e2c1063046421ce01dcf5ddd7ec12567f3e1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:16 -0800
Subject: kfifo: add kfifo_out_peek

In some upcoming code it's useful to peek into a FIFO without permanentely
removing data.  This patch implements a new kfifo_out_peek() to do this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  3 +++
 kernel/kfifo.c        | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 86ad50a900c8..7ad6d32dd673 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -113,6 +113,9 @@ extern unsigned int kfifo_in(struct kfifo *fifo,
 				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 				void *to, unsigned int len);
+extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
+				void *to, unsigned int len, unsigned offset);
+
 
 /**
  * kfifo_reset - removes the entire FIFO contents
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index b50bb622e8b0..7384f120be87 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -302,6 +302,27 @@ unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_out);
 
+/**
+ * kfifo_out_peek - copy some data from the FIFO, but do not remove it
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ * @offset: offset into the fifo
+ *
+ * This function copies at most @len bytes at @offset from the FIFO
+ * into the @to buffer and returns the number of copied bytes.
+ * The data is not removed from the FIFO.
+ */
+unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
+			    unsigned offset)
+{
+	len = min(kfifo_len(fifo), len + offset);
+
+	__kfifo_out_data(fifo, to, len, offset);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out_peek);
+
 unsigned int __kfifo_out_generic(struct kfifo *fifo,
 	void *to, unsigned int len, unsigned int recsize,
 	unsigned int *total)
-- 
cgit v1.2.3


From d994ffc247f7c4a48b848f10c4c01c9b06411ada Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: add kfifo_initialized

Simple inline that checks if kfifo_init() has been executed on a fifo.

This is useful for walking all per CPU fifos, when some of them might not
have been brought up yet.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7ad6d32dd673..c8618243ca5a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -116,6 +116,16 @@ extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
 				void *to, unsigned int len, unsigned offset);
 
+/**
+ * kfifo_initialized - Check if kfifo is initialized.
+ * @fifo: fifo to check
+ * Return %true if FIFO is initialized, otherwise %false.
+ * Assumes the fifo was 0 before.
+ */
+static inline bool kfifo_initialized(struct kfifo *fifo)
+{
+	return fifo->buffer != 0;
+}
 
 /**
  * kfifo_reset - removes the entire FIFO contents
-- 
cgit v1.2.3


From 5dab600e6a153ceb64832f608069e6c08185411a Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: document everywhere that size has to be power of two

On my first try using them I missed that the fifos need to be power of
two, resulting in a runtime bug.  Document that requirement everywhere
(and fix one grammar bug)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 4 ++--
 kernel/kfifo.c        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c8618243ca5a..6f6c5f300af6 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -67,7 +67,7 @@ struct kfifo {
 /**
  * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used inside struct or union declaration
  * Note2: the macro creates two objects:
@@ -91,7 +91,7 @@ union { \
 /**
  * DEFINE_KFIFO - macro to define and initialize a kfifo
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used for global and local kfifo data type variables
  * Note2: the macro creates two objects:
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 7384f120be87..32c5c15d750d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -41,7 +41,7 @@ static void _kfifo_init(struct kfifo *fifo, void *buffer,
  * kfifo_init - initialize a FIFO using a preallocated buffer
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
- * @size: the size of the internal buffer, this have to be a power of 2.
+ * @size: the size of the internal buffer, this has to be a power of 2.
  *
  */
 void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
-- 
cgit v1.2.3


From cc8ef6eb21e964b1c5eb97b2d0e8ac9893e1bf86 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Fri, 15 Jan 2010 17:01:22 -0800
Subject: kernel.h: add BUILD_BUG_ON_NOT_POWER_OF_2()

Add BUILD_BUG_ON_NOT_POWER_OF_2()

When code relies on a constant being a power of 2:

	#define FOO	512	/* must be a power of 2 */

it would be nice to be able to do:

	BUILD_BUG_ON(!is_power_of_2(FOO));

However applying an inline function does not result in a compile-time
constant that can be used with BUILD_BUG_ON(), so trying that gives
results in:

	error: bit-field '<anonymous>' width not an integer constant

As suggested by akpm, rather than monkeying around with is_power_of_2()
and risking gcc warts about constant expressions, just create a macro
BUILD_BUG_ON_NOT_POWER_OF_2() to encapsulate this common requirement.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: David Dillow <dave@thedillows.org>
Cc: "Robert P. J. Day" <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fc9f5aab5f8..328bca609b9b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -734,6 +734,10 @@ struct sysinfo {
 /* Force a compilation error if condition is constant and true */
 #define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
 
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
+	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
-- 
cgit v1.2.3


From 1e2ae599d37e60958c03ca5e46b1f657619a30cd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:33 -0800
Subject: nommu: struct vm_region's vm_usage count need not be atomic

The vm_usage count field in struct vm_region does not need to be atomic as
it's only even modified whilst nommu_region_sem is write locked.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 +-
 mm/nommu.c               | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84d020bed083..80cfa78a8cf6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -122,7 +122,7 @@ struct vm_region {
 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
 	struct file	*vm_file;	/* the backing file or NULL */
 
-	atomic_t	vm_usage;	/* region usage count */
+	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
 						* this region */
 };
diff --git a/mm/nommu.c b/mm/nommu.c
index 17773862619b..5e39294f8ea8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -552,11 +552,11 @@ static void free_page_series(unsigned long from, unsigned long to)
 static void __put_nommu_region(struct vm_region *region)
 	__releases(nommu_region_sem)
 {
-	kenter("%p{%d}", region, atomic_read(&region->vm_usage));
+	kenter("%p{%d}", region, region->vm_usage);
 
 	BUG_ON(!nommu_region_tree.rb_node);
 
-	if (atomic_dec_and_test(&region->vm_usage)) {
+	if (--region->vm_usage == 0) {
 		if (region->vm_top > region->vm_start)
 			delete_nommu_region(region);
 		up_write(&nommu_region_sem);
@@ -1205,7 +1205,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	if (!vma)
 		goto error_getting_vma;
 
-	atomic_set(&region->vm_usage, 1);
+	region->vm_usage = 1;
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
@@ -1272,7 +1272,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 			}
 
 			/* we've found a region we can share */
-			atomic_inc(&pregion->vm_usage);
+			pregion->vm_usage++;
 			vma->vm_region = pregion;
 			start = pregion->vm_start;
 			start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
@@ -1289,7 +1289,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 					vma->vm_region = NULL;
 					vma->vm_start = 0;
 					vma->vm_end = 0;
-					atomic_dec(&pregion->vm_usage);
+					pregion->vm_usage--;
 					pregion = NULL;
 					goto error_just_free;
 				}
@@ -1444,7 +1444,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* we're only permitted to split anonymous regions that have a single
 	 * owner */
 	if (vma->vm_file ||
-	    atomic_read(&vma->vm_region->vm_usage) != 1)
+	    vma->vm_region->vm_usage != 1)
 		return -ENOMEM;
 
 	if (mm->map_count >= sysctl_max_map_count)
@@ -1518,7 +1518,7 @@ static int shrink_vma(struct mm_struct *mm,
 
 	/* cut the backing region down to size */
 	region = vma->vm_region;
-	BUG_ON(atomic_read(&region->vm_usage) != 1);
+	BUG_ON(region->vm_usage != 1);
 
 	down_write(&nommu_region_sem);
 	delete_nommu_region(region);
-- 
cgit v1.2.3


From efc1a3b16930c41d64ffefde16b87d82f603a8a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:35 -0800
Subject: nommu: don't need get_unmapped_area() for NOMMU

get_unmapped_area() is unnecessary for NOMMU as no-one calls it.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 include/linux/sched.h    |  7 +++++--
 mm/nommu.c               | 21 ---------------------
 mm/util.c                |  2 +-
 4 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 80cfa78a8cf6..36f96271306c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -205,10 +205,12 @@ struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
 	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+#ifdef CONFIG_MMU
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+#endif
 	unsigned long mmap_base;		/* base of mmap area */
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d4991be9d53..6f7bba93929b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -377,6 +377,8 @@ extern int sysctl_max_map_count;
 
 #include <linux/aio.h>
 
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
@@ -386,6 +388,9 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long flags);
 extern void arch_unmap_area(struct mm_struct *, unsigned long);
 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
 
 #if USE_SPLIT_PTLOCKS
 /*
@@ -2491,8 +2496,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
-
 #ifdef CONFIG_TRACING
 extern void
 __trace_special(void *__tr, void *__data,
diff --git a/mm/nommu.c b/mm/nommu.c
index d6dd656264a2..32be0cf51ba6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1760,27 +1760,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/*
- * ask for an unmapped area at which to create a mapping on a file
- */
-unsigned long get_unmapped_area(struct file *file, unsigned long addr,
-				unsigned long len, unsigned long pgoff,
-				unsigned long flags)
-{
-	unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
-				  unsigned long, unsigned long);
-
-	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op && file->f_op->get_unmapped_area)
-		get_area = file->f_op->get_unmapped_area;
-
-	if (!get_area)
-		return -ENOSYS;
-
-	return get_area(file, addr, len, pgoff, flags);
-}
-EXPORT_SYMBOL(get_unmapped_area);
-
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..834db7be240f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -220,7 +220,7 @@ char *strndup_user(const char __user *s, long n)
 }
 EXPORT_SYMBOL(strndup_user);
 
-#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	mm->mmap_base = TASK_UNMAPPED_BASE;
-- 
cgit v1.2.3


From 7e6608724c640924aad1d556d17df33ebaa6124d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:39 -0800
Subject: nommu: fix shared mmap after truncate shrinkage problems

Fix a problem in NOMMU mmap with ramfs whereby a shared mmap can happen
over the end of a truncation.  The problem is that
ramfs_nommu_check_mappings() checks that the reduced file size against the
VMA tree, but not the vm_region tree.

The following sequence of events can cause the problem:

	fd = open("/tmp/x", O_RDWR|O_TRUNC|O_CREAT, 0600);
	ftruncate(fd, 32 * 1024);
	a = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	b = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	munmap(a, 32 * 1024);
	ftruncate(fd, 16 * 1024);
	c = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'a' creates a vm_region covering 32KB of the file.  Mapping 'b'
sees that the vm_region from 'a' is covering the region it wants and so
shares it, pinning it in memory.

Mapping 'a' then goes away and the file is truncated to the end of VMA
'b'.  However, the region allocated by 'a' is still in effect, and has
_not_ been reduced.

Mapping 'c' is then created, and because there's a vm_region covering the
desired region, get_unmapped_area() is _not_ called to repeat the check,
and the mapping is granted, even though the pages from the latter half of
the mapping have been discarded.

However:

	d = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'd' should work, and should end up sharing the region allocated by
'a'.

To deal with this, we shrink the vm_region struct during the truncation,
lest do_mmap_pgoff() take it as licence to share the full region
automatically without calling the get_unmapped_area() file op again.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/file-nommu.c | 31 +-------------------------
 include/linux/mm.h    |  1 +
 mm/nommu.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 266531343aae..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -121,35 +121,6 @@ add_error:
 	return ret;
 }
 
-/*****************************************************************************/
-/*
- * check that file shrinkage doesn't leave any VMAs dangling in midair
- */
-static int ramfs_nommu_check_mappings(struct inode *inode,
-				      size_t newsize, size_t size)
-{
-	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
-
-	down_write(&nommu_region_sem);
-
-	/* search for VMAs that fall within the dead zone */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      newsize >> PAGE_SHIFT,
-			      (size + PAGE_SIZE - 1) >> PAGE_SHIFT
-			      ) {
-		/* found one - only interested if it's shared out of the page
-		 * cache */
-		if (vma->vm_flags & VM_SHARED) {
-			up_write(&nommu_region_sem);
-			return -ETXTBSY; /* not quite true, but near enough */
-		}
-	}
-
-	up_write(&nommu_region_sem);
-	return 0;
-}
-
 /*****************************************************************************/
 /*
  *
@@ -169,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
 
 	/* check that a decrease in size doesn't cut off any shared mappings */
 	if (newsize < size) {
-		ret = ramfs_nommu_check_mappings(inode, newsize, size);
+		ret = nommu_shrink_inode_mappings(inode, size, newsize);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2265f28eb47a..60c467bfbabd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1089,6 +1089,7 @@ extern void zone_pcp_update(struct zone *zone);
 
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
+extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
 
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
diff --git a/mm/nommu.c b/mm/nommu.c
index 32be0cf51ba6..48a2ecfaf059 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1914,3 +1914,65 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 	mmput(mm);
 	return len;
 }
+
+/**
+ * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
+ * @inode: The inode to check
+ * @size: The current filesize of the inode
+ * @newsize: The proposed filesize of the inode
+ *
+ * Check the shared mappings on an inode on behalf of a shrinking truncate to
+ * make sure that that any outstanding VMAs aren't broken and then shrink the
+ * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't
+ * automatically grant mappings that are too large.
+ */
+int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
+				size_t newsize)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	struct vm_region *region;
+	pgoff_t low, high;
+	size_t r_size, r_top;
+
+	low = newsize >> PAGE_SHIFT;
+	high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	down_write(&nommu_region_sem);
+
+	/* search for VMAs that fall within the dead zone */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      low, high) {
+		/* found one - only interested if it's shared out of the page
+		 * cache */
+		if (vma->vm_flags & VM_SHARED) {
+			up_write(&nommu_region_sem);
+			return -ETXTBSY; /* not quite true, but near enough */
+		}
+	}
+
+	/* reduce any regions that overlap the dead zone - if in existence,
+	 * these will be pointed to by VMAs that don't overlap the dead zone
+	 *
+	 * we don't check for any regions that start beyond the EOF as there
+	 * shouldn't be any
+	 */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      0, ULONG_MAX) {
+		if (!(vma->vm_flags & VM_SHARED))
+			continue;
+
+		region = vma->vm_region;
+		r_size = region->vm_top - region->vm_start;
+		r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size;
+
+		if (r_top > newsize) {
+			region->vm_top -= r_top - newsize;
+			if (region->vm_end > region->vm_top)
+				region->vm_end = region->vm_top;
+		}
+	}
+
+	up_write(&nommu_region_sem);
+	return 0;
+}
-- 
cgit v1.2.3


From 73c89c15b959adf06366722c4be8d2eddec0a529 Mon Sep 17 00:00:00 2001
From: Tobias Brunner <tobias@strongswan.org>
Date: Sun, 17 Jan 2010 21:52:11 +1100
Subject: crypto: gcm - Add RFC4543 wrapper for GCM

This patch adds the RFC4543 (GMAC) wrapper for GCM similar to the
existing RFC4106 wrapper. The main differences between GCM and GMAC are
the contents of the AAD and that the plaintext is empty for the latter.

Signed-off-by: Tobias Brunner <tobias@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/gcm.c            | 287 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pfkeyv2.h |   1 +
 net/xfrm/xfrm_algo.c    |  16 +++
 3 files changed, 304 insertions(+)

(limited to 'include/linux')

diff --git a/crypto/gcm.c b/crypto/gcm.c
index c6547130624c..2f5fbba6576c 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -37,6 +37,19 @@ struct crypto_rfc4106_ctx {
 	u8 nonce[4];
 };
 
+struct crypto_rfc4543_ctx {
+	struct crypto_aead *child;
+	u8 nonce[4];
+};
+
+struct crypto_rfc4543_req_ctx {
+	u8 auth_tag[16];
+	struct scatterlist cipher[1];
+	struct scatterlist payload[2];
+	struct scatterlist assoc[2];
+	struct aead_request subreq;
+};
+
 struct crypto_gcm_ghash_ctx {
 	unsigned int cryptlen;
 	struct scatterlist *src;
@@ -1047,6 +1060,272 @@ static struct crypto_template crypto_rfc4106_tmpl = {
 	.module = THIS_MODULE,
 };
 
+static inline struct crypto_rfc4543_req_ctx *crypto_rfc4543_reqctx(
+	struct aead_request *req)
+{
+	unsigned long align = crypto_aead_alignmask(crypto_aead_reqtfm(req));
+
+	return (void *)PTR_ALIGN((u8 *)aead_request_ctx(req), align + 1);
+}
+
+static int crypto_rfc4543_setkey(struct crypto_aead *parent, const u8 *key,
+				 unsigned int keylen)
+{
+	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(parent);
+	struct crypto_aead *child = ctx->child;
+	int err;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+	keylen -= 4;
+	memcpy(ctx->nonce, key + keylen, 4);
+
+	crypto_aead_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_aead_set_flags(child, crypto_aead_get_flags(parent) &
+				     CRYPTO_TFM_REQ_MASK);
+	err = crypto_aead_setkey(child, key, keylen);
+	crypto_aead_set_flags(parent, crypto_aead_get_flags(child) &
+				      CRYPTO_TFM_RES_MASK);
+
+	return err;
+}
+
+static int crypto_rfc4543_setauthsize(struct crypto_aead *parent,
+				      unsigned int authsize)
+{
+	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(parent);
+
+	if (authsize != 16)
+		return -EINVAL;
+
+	return crypto_aead_setauthsize(ctx->child, authsize);
+}
+
+/* this is the same as crypto_authenc_chain */
+static void crypto_rfc4543_chain(struct scatterlist *head,
+				 struct scatterlist *sg, int chain)
+{
+	if (chain) {
+		head->length += sg->length;
+		sg = scatterwalk_sg_next(sg);
+	}
+
+	if (sg)
+		scatterwalk_sg_chain(head, 2, sg);
+	else
+		sg_mark_end(head);
+}
+
+static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
+						 int enc)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead);
+	struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req);
+	struct aead_request *subreq = &rctx->subreq;
+	struct scatterlist *dst = req->dst;
+	struct scatterlist *cipher = rctx->cipher;
+	struct scatterlist *payload = rctx->payload;
+	struct scatterlist *assoc = rctx->assoc;
+	unsigned int authsize = crypto_aead_authsize(aead);
+	unsigned int assoclen = req->assoclen;
+	struct page *dstp;
+	u8 *vdst;
+	u8 *iv = PTR_ALIGN((u8 *)(rctx + 1) + crypto_aead_reqsize(ctx->child),
+			   crypto_aead_alignmask(ctx->child) + 1);
+
+	memcpy(iv, ctx->nonce, 4);
+	memcpy(iv + 4, req->iv, 8);
+
+	/* construct cipher/plaintext */
+	if (enc)
+		memset(rctx->auth_tag, 0, authsize);
+	else
+		scatterwalk_map_and_copy(rctx->auth_tag, dst,
+					 req->cryptlen - authsize,
+					 authsize, 0);
+
+	sg_init_one(cipher, rctx->auth_tag, authsize);
+
+	/* construct the aad */
+	dstp = sg_page(dst);
+	vdst = PageHighMem(dstp) ? NULL : page_address(dstp) + dst->offset;
+
+	sg_init_table(payload, 2);
+	sg_set_buf(payload, req->iv, 8);
+	crypto_rfc4543_chain(payload, dst, vdst == req->iv + 8);
+	assoclen += 8 + req->cryptlen - (enc ? 0 : authsize);
+
+	sg_init_table(assoc, 2);
+	sg_set_page(assoc, sg_page(req->assoc), req->assoc->length,
+		    req->assoc->offset);
+	crypto_rfc4543_chain(assoc, payload, 0);
+
+	aead_request_set_tfm(subreq, ctx->child);
+	aead_request_set_callback(subreq, req->base.flags, req->base.complete,
+				  req->base.data);
+	aead_request_set_crypt(subreq, cipher, cipher, enc ? 0 : authsize, iv);
+	aead_request_set_assoc(subreq, assoc, assoclen);
+
+	return subreq;
+}
+
+static int crypto_rfc4543_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req);
+	struct aead_request *subreq;
+	int err;
+
+	subreq = crypto_rfc4543_crypt(req, 1);
+	err = crypto_aead_encrypt(subreq);
+	if (err)
+		return err;
+
+	scatterwalk_map_and_copy(rctx->auth_tag, req->dst, req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int crypto_rfc4543_decrypt(struct aead_request *req)
+{
+	req = crypto_rfc4543_crypt(req, 0);
+
+	return crypto_aead_decrypt(req);
+}
+
+static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_aead_spawn *spawn = crypto_instance_ctx(inst);
+	struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_aead *aead;
+	unsigned long align;
+
+	aead = crypto_spawn_aead(spawn);
+	if (IS_ERR(aead))
+		return PTR_ERR(aead);
+
+	ctx->child = aead;
+
+	align = crypto_aead_alignmask(aead);
+	align &= ~(crypto_tfm_ctx_alignment() - 1);
+	tfm->crt_aead.reqsize = sizeof(struct crypto_rfc4543_req_ctx) +
+				ALIGN(crypto_aead_reqsize(aead),
+				      crypto_tfm_ctx_alignment()) +
+				align + 16;
+
+	return 0;
+}
+
+static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_aead(ctx->child);
+}
+
+static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
+{
+	struct crypto_attr_type *algt;
+	struct crypto_instance *inst;
+	struct crypto_aead_spawn *spawn;
+	struct crypto_alg *alg;
+	const char *ccm_name;
+	int err;
+
+	algt = crypto_get_attr_type(tb);
+	err = PTR_ERR(algt);
+	if (IS_ERR(algt))
+		return ERR_PTR(err);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
+		return ERR_PTR(-EINVAL);
+
+	ccm_name = crypto_attr_alg_name(tb[1]);
+	err = PTR_ERR(ccm_name);
+	if (IS_ERR(ccm_name))
+		return ERR_PTR(err);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return ERR_PTR(-ENOMEM);
+
+	spawn = crypto_instance_ctx(inst);
+	crypto_set_aead_spawn(spawn, inst);
+	err = crypto_grab_aead(spawn, ccm_name, 0,
+			       crypto_requires_sync(algt->type, algt->mask));
+	if (err)
+		goto out_free_inst;
+
+	alg = crypto_aead_spawn_alg(spawn);
+
+	err = -EINVAL;
+
+	/* We only support 16-byte blocks. */
+	if (alg->cra_aead.ivsize != 16)
+		goto out_drop_alg;
+
+	/* Not a stream cipher? */
+	if (alg->cra_blocksize != 1)
+		goto out_drop_alg;
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "rfc4543(%s)", alg->cra_name) >= CRYPTO_MAX_ALG_NAME ||
+	    snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "rfc4543(%s)", alg->cra_driver_name) >=
+	    CRYPTO_MAX_ALG_NAME)
+		goto out_drop_alg;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
+	inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.cra_priority = alg->cra_priority;
+	inst->alg.cra_blocksize = 1;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+	inst->alg.cra_type = &crypto_nivaead_type;
+
+	inst->alg.cra_aead.ivsize = 8;
+	inst->alg.cra_aead.maxauthsize = 16;
+
+	inst->alg.cra_ctxsize = sizeof(struct crypto_rfc4543_ctx);
+
+	inst->alg.cra_init = crypto_rfc4543_init_tfm;
+	inst->alg.cra_exit = crypto_rfc4543_exit_tfm;
+
+	inst->alg.cra_aead.setkey = crypto_rfc4543_setkey;
+	inst->alg.cra_aead.setauthsize = crypto_rfc4543_setauthsize;
+	inst->alg.cra_aead.encrypt = crypto_rfc4543_encrypt;
+	inst->alg.cra_aead.decrypt = crypto_rfc4543_decrypt;
+
+	inst->alg.cra_aead.geniv = "seqiv";
+
+out:
+	return inst;
+
+out_drop_alg:
+	crypto_drop_aead(spawn);
+out_free_inst:
+	kfree(inst);
+	inst = ERR_PTR(err);
+	goto out;
+}
+
+static void crypto_rfc4543_free(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(inst);
+}
+
+static struct crypto_template crypto_rfc4543_tmpl = {
+	.name = "rfc4543",
+	.alloc = crypto_rfc4543_alloc,
+	.free = crypto_rfc4543_free,
+	.module = THIS_MODULE,
+};
+
 static int __init crypto_gcm_module_init(void)
 {
 	int err;
@@ -1067,8 +1346,14 @@ static int __init crypto_gcm_module_init(void)
 	if (err)
 		goto out_undo_gcm;
 
+	err = crypto_register_template(&crypto_rfc4543_tmpl);
+	if (err)
+		goto out_undo_rfc4106;
+
 	return 0;
 
+out_undo_rfc4106:
+	crypto_unregister_template(&crypto_rfc4106_tmpl);
 out_undo_gcm:
 	crypto_unregister_template(&crypto_gcm_tmpl);
 out_undo_base:
@@ -1081,6 +1366,7 @@ out:
 static void __exit crypto_gcm_module_exit(void)
 {
 	kfree(gcm_zeroes);
+	crypto_unregister_template(&crypto_rfc4543_tmpl);
 	crypto_unregister_template(&crypto_rfc4106_tmpl);
 	crypto_unregister_template(&crypto_gcm_tmpl);
 	crypto_unregister_template(&crypto_gcm_base_tmpl);
@@ -1094,3 +1380,4 @@ MODULE_DESCRIPTION("Galois/Counter Mode");
 MODULE_AUTHOR("Mikko Herranen <mh1@iki.fi>");
 MODULE_ALIAS("gcm_base");
 MODULE_ALIAS("rfc4106");
+MODULE_ALIAS("rfc4543");
diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index 228b0b6306b0..0b80c806631f 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -315,6 +315,7 @@ struct sadb_x_kmaddress {
 #define SADB_X_EALG_AES_GCM_ICV12	19
 #define SADB_X_EALG_AES_GCM_ICV16	20
 #define SADB_X_EALG_CAMELLIACBC		22
+#define SADB_X_EALG_NULL_AES_GMAC	23
 #define SADB_EALG_MAX                   253 /* last EALG */
 /* private allocations should use 249-255 (RFC2407) */
 #define SADB_X_EALG_SERPENTCBC  252     /* draft-ietf-ipsec-ciph-aes-cbc-00 */
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 743c0134a6a9..8b4d6e3246e5 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -125,6 +125,22 @@ static struct xfrm_algo_desc aead_list[] = {
 		.sadb_alg_maxbits = 256
 	}
 },
+{
+	.name = "rfc4543(gcm(aes))",
+
+	.uinfo = {
+		.aead = {
+			.icv_truncbits = 128,
+		}
+	},
+
+	.desc = {
+		.sadb_alg_id = SADB_X_EALG_NULL_AES_GMAC,
+		.sadb_alg_ivlen = 8,
+		.sadb_alg_minbits = 128,
+		.sadb_alg_maxbits = 256
+	}
+},
 };
 
 static struct xfrm_algo_desc aalg_list[] = {
-- 
cgit v1.2.3


From 8b0e58a70a7a41443c779de074288035b014cb94 Mon Sep 17 00:00:00 2001
From: Stephane Chatty <chatty@lii-enac.fr>
Date: Wed, 13 Jan 2010 21:52:34 +0100
Subject: HID: let hid-input accept digitizers

Extended IS_INPUT_APPLICATION to accept digitzers that are actual input
devices (touchscreens, light pens, touch pads, white boards)

Signed-off-by: Stephane Chatty <chatty@enac.fr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 87093652dda8..b978c1e2e74d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -663,7 +663,7 @@ struct hid_ll_driver {
 
 /* Applications from HID Usage Tables 4/8/99 Version 1.1 */
 /* We ignore a few input applications that are not widely used */
-#define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || (a == 0x000d0002))
+#define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || ((a >= 0x000d0002) && (a <= 0x000d0006)))
 
 /* HID core API */
 
-- 
cgit v1.2.3


From a83d8e8d099fc373a5ca7112ad08c553bb2c180f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:21:13 +0100
Subject: netfilter: xtables: add struct xt_mtchk_param::net

Some complex match modules (like xt_hashlimit/xt_recent) want netns
information at constructor and destructor time. We propably can play
games at match destruction time, because netns can be passed in object,
but I think it's cleaner to explicitly pass netns.

Add ->net, make sure it's set from ebtables/iptables/ip6tables code.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |  1 +
 net/bridge/netfilter/ebtables.c    | 14 +++++++++-----
 net/ipv4/netfilter/ip_tables.c     | 24 ++++++++++++++----------
 net/ipv6/netfilter/ip6_tables.c    | 14 ++++++++------
 4 files changed, 32 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 378f27ae7772..88261b9829a7 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -205,6 +205,7 @@ struct xt_match_param {
  * @hook_mask:	via which hooks the new rule is reachable
  */
 struct xt_mtchk_param {
+	struct net *net;
 	const char *table;
 	const void *entryinfo;
 	const struct xt_match *match;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bd1c65425d4f..c77bab986696 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -619,7 +619,9 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 }
 
 static inline int
-ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
+ebt_check_entry(struct ebt_entry *e,
+   struct net *net,
+   struct ebt_table_info *newinfo,
    const char *name, unsigned int *cnt,
    struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
 {
@@ -671,6 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
 	}
 	i = 0;
 
+	mtpar.net	= net;
 	mtpar.table     = tgpar.table     = name;
 	mtpar.entryinfo = tgpar.entryinfo = e;
 	mtpar.hook_mask = tgpar.hook_mask = hookmask;
@@ -808,7 +811,8 @@ letscontinue:
 }
 
 /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */
-static int translate_table(char *name, struct ebt_table_info *newinfo)
+static int translate_table(struct net *net, char *name,
+			   struct ebt_table_info *newinfo)
 {
 	unsigned int i, j, k, udc_cnt;
 	int ret;
@@ -917,7 +921,7 @@ static int translate_table(char *name, struct ebt_table_info *newinfo)
 	/* used to know what we need to clean up if something goes wrong */
 	i = 0;
 	ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
-	   ebt_check_entry, newinfo, name, &i, cl_s, udc_cnt);
+	   ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt);
 	if (ret != 0) {
 		EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
 		   ebt_cleanup_entry, &i);
@@ -1017,7 +1021,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
 	if (ret != 0)
 		goto free_counterstmp;
 
-	ret = translate_table(tmp.name, newinfo);
+	ret = translate_table(net, tmp.name, newinfo);
 
 	if (ret != 0)
 		goto free_counterstmp;
@@ -1154,7 +1158,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table)
 			newinfo->hook_entry[i] = p +
 				((char *)repl->hook_entry[i] - repl->entries);
 	}
-	ret = translate_table(repl->name, newinfo);
+	ret = translate_table(net, repl->name, newinfo);
 	if (ret != 0) {
 		BUGPRINT("Translate_table failed\n");
 		goto free_chainstack;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 572330a552ef..a069d72d9482 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -661,8 +661,8 @@ static int check_target(struct ipt_entry *e, const char *name)
 }
 
 static int
-find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
-		 unsigned int *i)
+find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
+		 unsigned int size, unsigned int *i)
 {
 	struct ipt_entry_target *t;
 	struct xt_target *target;
@@ -675,6 +675,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
 		return ret;
 
 	j = 0;
+	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
@@ -798,7 +799,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
-translate_table(const char *name,
+translate_table(struct net *net,
+		const char *name,
 		unsigned int valid_hooks,
 		struct xt_table_info *newinfo,
 		void *entry0,
@@ -860,7 +862,7 @@ translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				find_check_entry, name, size, &i);
+				find_check_entry, net, name, size, &i);
 
 	if (ret != 0) {
 		IPT_ENTRY_ITERATE(entry0, newinfo->size,
@@ -1303,7 +1305,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
+	ret = translate_table(net, tmp.name, tmp.valid_hooks,
 			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
@@ -1655,7 +1657,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 }
 
 static int
-compat_check_entry(struct ipt_entry *e, const char *name,
+compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 				     unsigned int *i)
 {
 	struct xt_mtchk_param mtpar;
@@ -1663,6 +1665,7 @@ compat_check_entry(struct ipt_entry *e, const char *name,
 	int ret;
 
 	j = 0;
+	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
@@ -1684,7 +1687,8 @@ compat_check_entry(struct ipt_entry *e, const char *name,
 }
 
 static int
-translate_compat_table(const char *name,
+translate_compat_table(struct net *net,
+		       const char *name,
 		       unsigned int valid_hooks,
 		       struct xt_table_info **pinfo,
 		       void **pentry0,
@@ -1773,7 +1777,7 @@ translate_compat_table(const char *name,
 
 	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				name, &i);
+				net, name, &i);
 	if (ret) {
 		j -= i;
 		COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
@@ -1833,7 +1837,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+	ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
 				     &newinfo, &loc_cpu_entry, tmp.size,
 				     tmp.num_entries, tmp.hook_entry,
 				     tmp.underflow);
@@ -2086,7 +2090,7 @@ struct xt_table *ipt_register_table(struct net *net,
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
-	ret = translate_table(table->name, table->valid_hooks,
+	ret = translate_table(net, table->name, table->valid_hooks,
 			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 480d7f8c9802..a825940a92ef 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -693,8 +693,8 @@ static int check_target(struct ip6t_entry *e, const char *name)
 }
 
 static int
-find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
-		 unsigned int *i)
+find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
+		 unsigned int size, unsigned int *i)
 {
 	struct ip6t_entry_target *t;
 	struct xt_target *target;
@@ -707,6 +707,7 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
 		return ret;
 
 	j = 0;
+	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
@@ -830,7 +831,8 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 /* Checks and translates the user-supplied table segment (held in
    newinfo) */
 static int
-translate_table(const char *name,
+translate_table(struct net *net,
+		const char *name,
 		unsigned int valid_hooks,
 		struct xt_table_info *newinfo,
 		void *entry0,
@@ -892,7 +894,7 @@ translate_table(const char *name,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				find_check_entry, name, size, &i);
+				find_check_entry, net, name, size, &i);
 
 	if (ret != 0) {
 		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
@@ -1336,7 +1338,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_table(tmp.name, tmp.valid_hooks,
+	ret = translate_table(net, tmp.name, tmp.valid_hooks,
 			      newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
 			      tmp.hook_entry, tmp.underflow);
 	if (ret != 0)
@@ -2121,7 +2123,7 @@ struct xt_table *ip6t_register_table(struct net *net,
 	loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
-	ret = translate_table(table->name, table->valid_hooks,
+	ret = translate_table(net, table->name, table->valid_hooks,
 			      newinfo, loc_cpu_entry, repl->size,
 			      repl->num_entries,
 			      repl->hook_entry,
-- 
cgit v1.2.3


From f54e9367f8499a9bf6b2afbc0dce63e1d53c525a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:25:47 +0100
Subject: netfilter: xtables: add struct xt_mtdtor_param::net

Add ->net to match destructor list like ->net in constructor list.

Make sure it's set in ebtables/iptables/ip6tables, this requires to
propagate netns up to *_unregister_table().

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h        |  1 +
 include/linux/netfilter_bridge/ebtables.h |  2 +-
 include/linux/netfilter_ipv4/ip_tables.h  |  2 +-
 include/linux/netfilter_ipv6/ip6_tables.h |  2 +-
 net/bridge/netfilter/ebtable_broute.c     |  2 +-
 net/bridge/netfilter/ebtable_filter.c     |  2 +-
 net/bridge/netfilter/ebtable_nat.c        |  2 +-
 net/bridge/netfilter/ebtables.c           | 19 ++++++++--------
 net/ipv4/netfilter/ip_tables.c            | 25 +++++++++++----------
 net/ipv4/netfilter/iptable_filter.c       |  2 +-
 net/ipv4/netfilter/iptable_mangle.c       |  2 +-
 net/ipv4/netfilter/iptable_raw.c          |  2 +-
 net/ipv4/netfilter/iptable_security.c     |  2 +-
 net/ipv4/netfilter/nf_nat_rule.c          |  2 +-
 net/ipv6/netfilter/ip6_tables.c           | 37 +++++++++++++++++--------------
 net/ipv6/netfilter/ip6table_filter.c      |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c      |  2 +-
 net/ipv6/netfilter/ip6table_raw.c         |  2 +-
 net/ipv6/netfilter/ip6table_security.c    |  2 +-
 19 files changed, 59 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 88261b9829a7..3caf5e151102 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -216,6 +216,7 @@ struct xt_mtchk_param {
 
 /* Match destructor parameters */
 struct xt_mtdtor_param {
+	struct net *net;
 	const struct xt_match *match;
 	void *matchinfo;
 	u_int8_t family;
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 3cc40c131cc3..1c6f0c5f530e 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -289,7 +289,7 @@ struct ebt_table {
 		     ~(__alignof__(struct ebt_replace)-1))
 extern struct ebt_table *ebt_register_table(struct net *net,
 					    const struct ebt_table *table);
-extern void ebt_unregister_table(struct ebt_table *table);
+extern void ebt_unregister_table(struct net *net, struct ebt_table *table);
 extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
    struct ebt_table *table);
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 27b3f5807305..8d1f273d350b 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -242,7 +242,7 @@ extern void ipt_init(void) __init;
 extern struct xt_table *ipt_register_table(struct net *net,
 					   const struct xt_table *table,
 					   const struct ipt_replace *repl);
-extern void ipt_unregister_table(struct xt_table *table);
+extern void ipt_unregister_table(struct net *net, struct xt_table *table);
 
 /* Standard entry. */
 struct ipt_standard {
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index b31050d20ae4..d2952d2fa658 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -300,7 +300,7 @@ extern void ip6t_init(void) __init;
 extern struct xt_table *ip6t_register_table(struct net *net,
 					    const struct xt_table *table,
 					    const struct ip6t_replace *repl);
-extern void ip6t_unregister_table(struct xt_table *table);
+extern void ip6t_unregister_table(struct net *net, struct xt_table *table);
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  unsigned int hook,
 				  const struct net_device *in,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d32ab13e728c..ae3f106c3908 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -71,7 +71,7 @@ static int __net_init broute_net_init(struct net *net)
 
 static void __net_exit broute_net_exit(struct net *net)
 {
-	ebt_unregister_table(net->xt.broute_table);
+	ebt_unregister_table(net, net->xt.broute_table);
 }
 
 static struct pernet_operations broute_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 60b1a6ca7185..42e6bd094574 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -107,7 +107,7 @@ static int __net_init frame_filter_net_init(struct net *net)
 
 static void __net_exit frame_filter_net_exit(struct net *net)
 {
-	ebt_unregister_table(net->xt.frame_filter);
+	ebt_unregister_table(net, net->xt.frame_filter);
 }
 
 static struct pernet_operations frame_filter_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 4a98804203b0..6dc2f878ae05 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -107,7 +107,7 @@ static int __net_init frame_nat_net_init(struct net *net)
 
 static void __net_exit frame_nat_net_exit(struct net *net)
 {
-	ebt_unregister_table(net->xt.frame_nat);
+	ebt_unregister_table(net, net->xt.frame_nat);
 }
 
 static struct pernet_operations frame_nat_net_ops = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index c77bab986696..1aa0e4c1f52d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -561,13 +561,14 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo,
 }
 
 static inline int
-ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i)
+ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i)
 {
 	struct xt_mtdtor_param par;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
+	par.net       = net;
 	par.match     = m->u.match;
 	par.matchinfo = m->data;
 	par.family    = NFPROTO_BRIDGE;
@@ -595,7 +596,7 @@ ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
 }
 
 static inline int
-ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
+ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
 {
 	struct xt_tgdtor_param par;
 	struct ebt_entry_target *t;
@@ -606,7 +607,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt)
 	if (cnt && (*cnt)-- == 0)
 		return 1;
 	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL);
-	EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL);
+	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 
 	par.target   = t->u.target;
@@ -731,7 +732,7 @@ ebt_check_entry(struct ebt_entry *e,
 cleanup_watchers:
 	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j);
 cleanup_matches:
-	EBT_MATCH_ITERATE(e, ebt_cleanup_match, &i);
+	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i);
 	return ret;
 }
 
@@ -924,7 +925,7 @@ static int translate_table(struct net *net, char *name,
 	   ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt);
 	if (ret != 0) {
 		EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
-		   ebt_cleanup_entry, &i);
+				  ebt_cleanup_entry, net, &i);
 	}
 	vfree(cl_s);
 	return ret;
@@ -1074,7 +1075,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
 
 	/* decrease module count and free resources */
 	EBT_ENTRY_ITERATE(table->entries, table->entries_size,
-	   ebt_cleanup_entry, NULL);
+			  ebt_cleanup_entry, net, NULL);
 
 	vfree(table->entries);
 	if (table->chainstack) {
@@ -1091,7 +1092,7 @@ free_unlock:
 	mutex_unlock(&ebt_mutex);
 free_iterate:
 	EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
-	   ebt_cleanup_entry, NULL);
+			  ebt_cleanup_entry, net, NULL);
 free_counterstmp:
 	vfree(counterstmp);
 	/* can be initialized in translate_table() */
@@ -1208,7 +1209,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-void ebt_unregister_table(struct ebt_table *table)
+void ebt_unregister_table(struct net *net, struct ebt_table *table)
 {
 	int i;
 
@@ -1220,7 +1221,7 @@ void ebt_unregister_table(struct ebt_table *table)
 	list_del(&table->list);
 	mutex_unlock(&ebt_mutex);
 	EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size,
-			  ebt_cleanup_entry, NULL);
+			  ebt_cleanup_entry, net, NULL);
 	if (table->private->nentries)
 		module_put(table->me);
 	vfree(table->private->entries);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a069d72d9482..cfaba0e2e6fc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -553,13 +553,14 @@ mark_source_chains(struct xt_table_info *newinfo,
 }
 
 static int
-cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i)
 {
 	struct xt_mtdtor_param par;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
+	par.net       = net;
 	par.match     = m->u.kernel.match;
 	par.matchinfo = m->data;
 	par.family    = NFPROTO_IPV4;
@@ -705,7 +706,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
 	return ret;
 }
 
@@ -775,7 +776,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
 }
 
 static int
-cleanup_entry(struct ipt_entry *e, unsigned int *i)
+cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i)
 {
 	struct xt_tgdtor_param par;
 	struct ipt_entry_target *t;
@@ -784,7 +785,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i)
 		return 1;
 
 	/* Cleanup all matches */
-	IPT_MATCH_ITERATE(e, cleanup_match, NULL);
+	IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ipt_get_target(e);
 
 	par.target   = t->u.kernel.target;
@@ -866,7 +867,7 @@ translate_table(struct net *net,
 
 	if (ret != 0) {
 		IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				cleanup_entry, &i);
+				cleanup_entry, net, &i);
 		return ret;
 	}
 
@@ -1260,7 +1261,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			  NULL);
+			  net, NULL);
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1320,7 +1321,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1682,7 +1683,7 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	return 0;
 
  cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
 	return ret;
 }
 
@@ -1782,7 +1783,7 @@ translate_compat_table(struct net *net,
 		j -= i;
 		COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
 						  compat_release_entry, &j);
-		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1853,7 +1854,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -2112,7 +2113,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-void ipt_unregister_table(struct xt_table *table)
+void ipt_unregister_table(struct net *net, struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
@@ -2122,7 +2123,7 @@ void ipt_unregister_table(struct xt_table *table)
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index df566cbd68e5..dee90eb8aa47 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -138,7 +138,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
 
 static void __net_exit iptable_filter_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.iptable_filter);
+	ipt_unregister_table(net, net->ipv4.iptable_filter);
 }
 
 static struct pernet_operations iptable_filter_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index fae78c3076c4..e07bf242343a 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -208,7 +208,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
 
 static void __net_exit iptable_mangle_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.iptable_mangle);
+	ipt_unregister_table(net, net->ipv4.iptable_mangle);
 }
 
 static struct pernet_operations iptable_mangle_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 993edc23be09..40f2b9f611a2 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -100,7 +100,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
 
 static void __net_exit iptable_raw_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.iptable_raw);
+	ipt_unregister_table(net, net->ipv4.iptable_raw);
 }
 
 static struct pernet_operations iptable_raw_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 3bd3d6388da5..7ce2366e4305 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -138,7 +138,7 @@ static int __net_init iptable_security_net_init(struct net *net)
 
 static void __net_exit iptable_security_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.iptable_security);
+	ipt_unregister_table(net, net->ipv4.iptable_security);
 }
 
 static struct pernet_operations iptable_security_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 9e81e0dfb4ec..85da34fdc755 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -195,7 +195,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net)
 
 static void __net_exit nf_nat_rule_net_exit(struct net *net)
 {
-	ipt_unregister_table(net->ipv4.nat_table);
+	ipt_unregister_table(net, net->ipv4.nat_table);
 }
 
 static struct pernet_operations nf_nat_rule_net_ops = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index a825940a92ef..9f1d45f2ba8f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -585,13 +585,14 @@ mark_source_chains(struct xt_table_info *newinfo,
 }
 
 static int
-cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
+cleanup_match(struct ip6t_entry_match *m, struct net *net, unsigned int *i)
 {
 	struct xt_mtdtor_param par;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
+	par.net       = net;
 	par.match     = m->u.kernel.match;
 	par.matchinfo = m->data;
 	par.family    = NFPROTO_IPV6;
@@ -737,7 +738,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+	IP6T_MATCH_ITERATE(e, cleanup_match, net, &j);
 	return ret;
 }
 
@@ -807,7 +808,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
 }
 
 static int
-cleanup_entry(struct ip6t_entry *e, unsigned int *i)
+cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i)
 {
 	struct xt_tgdtor_param par;
 	struct ip6t_entry_target *t;
@@ -816,7 +817,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i)
 		return 1;
 
 	/* Cleanup all matches */
-	IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
+	IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ip6t_get_target(e);
 
 	par.target   = t->u.kernel.target;
@@ -898,7 +899,7 @@ translate_table(struct net *net,
 
 	if (ret != 0) {
 		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				   cleanup_entry, &i);
+				   cleanup_entry, net, &i);
 		return ret;
 	}
 
@@ -1293,7 +1294,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			   NULL);
+			   net, NULL);
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1353,7 +1354,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1692,14 +1693,15 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 	return ret;
 }
 
-static int compat_check_entry(struct ip6t_entry *e, const char *name,
-				     unsigned int *i)
+static int compat_check_entry(struct ip6t_entry *e, struct net *net,
+			      const char *name, unsigned int *i)
 {
 	unsigned int j;
 	int ret;
 	struct xt_mtchk_param mtpar;
 
 	j = 0;
+	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
@@ -1716,12 +1718,13 @@ static int compat_check_entry(struct ip6t_entry *e, const char *name,
 	return 0;
 
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+	IP6T_MATCH_ITERATE(e, cleanup_match, net, &j);
 	return ret;
 }
 
 static int
-translate_compat_table(const char *name,
+translate_compat_table(struct net *net,
+		       const char *name,
 		       unsigned int valid_hooks,
 		       struct xt_table_info **pinfo,
 		       void **pentry0,
@@ -1810,12 +1813,12 @@ translate_compat_table(const char *name,
 
 	i = 0;
 	ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				 name, &i);
+				 net, name, &i);
 	if (ret) {
 		j -= i;
 		COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
 						   compat_release_entry, &j);
-		IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1870,7 +1873,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 		goto free_newinfo;
 	}
 
-	ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+	ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
 				     &newinfo, &loc_cpu_entry, tmp.size,
 				     tmp.num_entries, tmp.hook_entry,
 				     tmp.underflow);
@@ -1886,7 +1889,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -2144,7 +2147,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-void ip6t_unregister_table(struct xt_table *table)
+void ip6t_unregister_table(struct net *net, struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
@@ -2154,7 +2157,7 @@ void ip6t_unregister_table(struct xt_table *table)
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index ad378efd0eb8..33ddfe53e18d 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -131,7 +131,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
 
 static void __net_exit ip6table_filter_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net->ipv6.ip6table_filter);
+	ip6t_unregister_table(net, net->ipv6.ip6table_filter);
 }
 
 static struct pernet_operations ip6table_filter_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a929c19d30e3..9bc483f000e5 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -182,7 +182,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
 
 static void __net_exit ip6table_mangle_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net->ipv6.ip6table_mangle);
+	ip6t_unregister_table(net, net->ipv6.ip6table_mangle);
 }
 
 static struct pernet_operations ip6table_mangle_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index ed1a1180f3b3..4c90b552e433 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -94,7 +94,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
 
 static void __net_exit ip6table_raw_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net->ipv6.ip6table_raw);
+	ip6t_unregister_table(net, net->ipv6.ip6table_raw);
 }
 
 static struct pernet_operations ip6table_raw_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 41b444c60934..baa8d4ef3b0a 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -134,7 +134,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
 
 static void __net_exit ip6table_security_net_exit(struct net *net)
 {
-	ip6t_unregister_table(net->ipv6.ip6table_security);
+	ip6t_unregister_table(net, net->ipv6.ip6table_security);
 }
 
 static struct pernet_operations ip6table_security_net_ops = {
-- 
cgit v1.2.3


From 9dffe2a32b0deef52605d50527c0d240b15cabf7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 4 Jan 2010 18:05:00 +0000
Subject: mfd: Correct WM835x ISINK ramp time defines

The constants used to specify ISINK ramp times for WM835x had the
wrong shifts so that the on times applied to the off ramp and vice
versa. The masks for the bitfields are correct.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8350/pmic.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index be3264e286e0..e786fe9841ef 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -666,20 +666,20 @@
 #define WM8350_ISINK_FLASH_DUR_64MS		(1 << 8)
 #define WM8350_ISINK_FLASH_DUR_96MS		(2 << 8)
 #define WM8350_ISINK_FLASH_DUR_1024MS		(3 << 8)
-#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 4)
-#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 4)
-#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 4)
-#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 0)
+#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 4)
 
 /*
  * Regulator Interrupts.
-- 
cgit v1.2.3


From 64e8867ba8098b69889c1af94997a5ba2348fb26 Mon Sep 17 00:00:00 2001
From: Ian Molton <ian@mnementh.co.uk>
Date: Wed, 6 Jan 2010 13:51:48 +0100
Subject: mfd: tmio_mmc hardware abstraction for CNF area

This patch abstracts out the CNF area code from tmio_mmc which
is not present in all hardware that can use this driver. This
is required so that we can support non-toshiba based hardware.

ASIC3 support by Philipp Zabel

Signed-off-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile        |   6 +--
 drivers/mfd/asic3.c         |  40 ++++++++++++---
 drivers/mfd/t7l66xb.c       |  55 +++++++++++++-------
 drivers/mfd/tc6387xb.c      | 119 ++++++++++++++++++++++++++++++++------------
 drivers/mfd/tc6393xb.c      |  56 +++++++++++++++++----
 drivers/mfd/tmio_core.c     |  52 +++++++++++++++++++
 drivers/mmc/host/tmio_mmc.c |  59 +++++++---------------
 drivers/mmc/host/tmio_mmc.h |  46 +++--------------
 include/linux/mfd/tmio.h    |  39 +++++++++++++++
 9 files changed, 323 insertions(+), 149 deletions(-)
 create mode 100644 drivers/mfd/tmio_core.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index ca2f2c4ff05e..8f0d18409ede 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -11,9 +11,9 @@ obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 
-obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o
-obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o
-obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o
+obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o tmio_core.o
 
 obj-$(CONFIG_MFD_WM8400)	+= wm8400-core.o
 wm831x-objs			:= wm831x-core.o wm831x-irq.o wm831x-otp.o
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index e22128c3e9a8..95c1e6bd1729 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -80,6 +80,7 @@ struct asic3 {
 	u16 irq_bothedge[4];
 	struct gpio_chip gpio;
 	struct device *dev;
+	void __iomem *tmio_cnf;
 
 	struct asic3_clk clocks[ARRAY_SIZE(asic3_clk_init)];
 };
@@ -685,8 +686,24 @@ static struct mfd_cell asic3_cell_ds1wm = {
 	.resources     = ds1wm_resources,
 };
 
+static void asic3_mmc_pwr(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_pwr(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
+static void asic3_mmc_clk_div(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_clk_div(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
 static struct tmio_mmc_data asic3_mmc_data = {
-	.hclk = 24576000,
+	.hclk           = 24576000,
+	.set_pwr        = asic3_mmc_pwr,
+	.set_clk_div    = asic3_mmc_clk_div,
 };
 
 static struct resource asic3_mmc_resources[] = {
@@ -695,11 +712,6 @@ static struct resource asic3_mmc_resources[] = {
 		.end   = ASIC3_SD_CTRL_BASE + 0x3ff,
 		.flags = IORESOURCE_MEM,
 	},
-	{
-		.start = ASIC3_SD_CONFIG_BASE,
-		.end   = ASIC3_SD_CONFIG_BASE + 0x1ff,
-		.flags = IORESOURCE_MEM,
-	},
 	{
 		.start = 0,
 		.end   = 0,
@@ -743,6 +755,10 @@ static int asic3_mmc_enable(struct platform_device *pdev)
 	asic3_set_register(asic, ASIC3_OFFSET(SDHWCTRL, SDCONF),
 			   ASIC3_SDHWCTRL_SDPWR, 1);
 
+	/* ASIC3_SD_CTRL_BASE assumes 32-bit addressing, TMIO is 16-bit */
+	tmio_core_mmc_enable(asic->tmio_cnf, 1 - asic->bus_shift,
+			     ASIC3_SD_CTRL_BASE >> 1);
+
 	return 0;
 }
 
@@ -797,10 +813,15 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 	asic3_cell_ds1wm.data_size = sizeof(asic3_cell_ds1wm);
 
 	/* MMC */
+	asic->tmio_cnf = ioremap((ASIC3_SD_CONFIG_BASE >> asic->bus_shift) +
+				 mem_sdio->start, 0x400 >> asic->bus_shift);
+	if (!asic->tmio_cnf) {
+		ret = -ENOMEM;
+		dev_dbg(asic->dev, "Couldn't ioremap SD_CONFIG\n");
+		goto out;
+	}
 	asic3_mmc_resources[0].start >>= asic->bus_shift;
 	asic3_mmc_resources[0].end   >>= asic->bus_shift;
-	asic3_mmc_resources[1].start >>= asic->bus_shift;
-	asic3_mmc_resources[1].end   >>= asic->bus_shift;
 
 	asic3_cell_mmc.platform_data = &asic3_cell_mmc;
 	asic3_cell_mmc.data_size = sizeof(asic3_cell_mmc);
@@ -820,7 +841,10 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 
 static void asic3_mfd_remove(struct platform_device *pdev)
 {
+	struct asic3 *asic = platform_get_drvdata(pdev);
+
 	mfd_remove_devices(&pdev->dev);
+	iounmap(asic->tmio_cnf);
 }
 
 /* Core */
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 0a255c1f1ce7..bcf4687d4af5 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -38,6 +38,19 @@ enum {
 	T7L66XB_CELL_MMC,
 };
 
+static const struct resource t7l66xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end	= 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_T7L66XB_MMC,
+		.end	= IRQ_T7L66XB_MMC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 #define SCR_REVID	0x08		/* b Revision ID	*/
 #define SCR_IMR		0x42		/* b Interrupt Mask	*/
 #define SCR_DEV_CTL	0xe0		/* b Device control	*/
@@ -83,6 +96,9 @@ static int t7l66xb_mmc_enable(struct platform_device *mmc)
 
 	spin_unlock_irqrestore(&t7l66xb->lock, flags);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 
@@ -106,28 +122,28 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 	return 0;
 }
 
+static void t7l66xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(t7l66xb->scr + 0x200, 0, state);
+}
+
+static void t7l66xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(t7l66xb->scr + 0x200, 0, state);
+}
+
 /*--------------------------------------------------------------------------*/
 
 static struct tmio_mmc_data t7166xb_mmc_data = {
 	.hclk = 24000000,
-};
-
-static const struct resource t7l66xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end	= 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end	= 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = IRQ_T7L66XB_MMC,
-		.end	= IRQ_T7L66XB_MMC,
-		.flags = IORESOURCE_IRQ,
-	},
+	.set_pwr = t7l66xb_mmc_pwr,
+	.set_clk_div = t7l66xb_mmc_clk_div,
 };
 
 static const struct resource t7l66xb_nand_resources[] = {
@@ -282,6 +298,9 @@ static int t7l66xb_resume(struct platform_device *dev)
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 3280ab33f88a..5c7f04343d5c 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -22,28 +22,52 @@ enum {
 	TC6387XB_CELL_MMC,
 };
 
+struct tc6387xb {
+	void __iomem *scr;
+	struct clk *clk32k;
+	struct resource rscr;
+};
+
+static struct resource tc6387xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end   = 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = 0,
+		.end   = 0,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/*--------------------------------------------------------------------------*/
+
 #ifdef CONFIG_PM
 static int tc6387xb_suspend(struct platform_device *dev, pm_message_t state)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
 static int tc6387xb_resume(struct platform_device *dev)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_resume(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
@@ -53,12 +77,32 @@ static int tc6387xb_resume(struct platform_device *dev)
 
 /*--------------------------------------------------------------------------*/
 
+static void tc6387xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6387xb->scr + 0x200, 0, state);
+}
+
+static void tc6387xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6387xb->scr + 0x200, 0, state);
+}
+
+
 static int tc6387xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
+
+	tmio_core_mmc_enable(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
 
 	return 0;
 }
@@ -66,36 +110,20 @@ static int tc6387xb_mmc_enable(struct platform_device *mmc)
 static int tc6387xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
-/*--------------------------------------------------------------------------*/
-
 static struct tmio_mmc_data tc6387xb_mmc_data = {
 	.hclk = 24000000,
+	.set_pwr = tc6387xb_mmc_pwr,
+	.set_clk_div = tc6387xb_mmc_clk_div,
 };
 
-static struct resource tc6387xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end   = 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end   = 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0,
-		.end   = 0,
-		.flags = IORESOURCE_IRQ,
-	},
-};
+/*--------------------------------------------------------------------------*/
 
 static struct mfd_cell tc6387xb_cells[] = {
 	[TC6387XB_CELL_MMC] = {
@@ -111,8 +139,9 @@ static struct mfd_cell tc6387xb_cells[] = {
 static int tc6387xb_probe(struct platform_device *dev)
 {
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
-	struct resource *iomem;
+	struct resource *iomem, *rscr;
 	struct clk *clk32k;
+	struct tc6387xb *tc6387xb;
 	int irq, ret;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
@@ -120,18 +149,40 @@ static int tc6387xb_probe(struct platform_device *dev)
 		return -EINVAL;
 	}
 
+	tc6387xb = kzalloc(sizeof *tc6387xb, GFP_KERNEL);
+	if (!tc6387xb)
+		return -ENOMEM;
+
 	ret  = platform_get_irq(dev, 0);
 	if (ret >= 0)
 		irq = ret;
 	else
-		goto err_resource;
+		goto err_no_irq;
 
 	clk32k = clk_get(&dev->dev, "CLK_CK32K");
 	if (IS_ERR(clk32k)) {
 		ret = PTR_ERR(clk32k);
+		goto err_no_clk;
+	}
+
+	rscr = &tc6387xb->rscr;
+	rscr->name = "tc6387xb-core";
+	rscr->start = iomem->start;
+	rscr->end = iomem->start + 0xff;
+	rscr->flags = IORESOURCE_MEM;
+
+	ret = request_resource(iomem, rscr);
+	if (ret)
 		goto err_resource;
+
+	tc6387xb->scr = ioremap(rscr->start, rscr->end - rscr->start + 1);
+	if (!tc6387xb->scr) {
+		ret = -ENOMEM;
+		goto err_ioremap;
 	}
-	platform_set_drvdata(dev, clk32k);
+
+	tc6387xb->clk32k = clk32k;
+	platform_set_drvdata(dev, tc6387xb);
 
 	if (pdata && pdata->enable)
 		pdata->enable(dev);
@@ -149,8 +200,13 @@ static int tc6387xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
-	clk_put(clk32k);
+err_ioremap:
+	release_resource(&tc6387xb->rscr);
 err_resource:
+	clk_put(clk32k);
+err_no_clk:
+err_no_irq:
+	kfree(tc6387xb);
 	return ret;
 }
 
@@ -195,3 +251,4 @@ MODULE_DESCRIPTION("Toshiba TC6387XB core driver");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton");
 MODULE_ALIAS("platform:tc6387xb");
+
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 1429a7341a9a..4bc5a08a2b09 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -136,10 +136,6 @@ static int tc6393xb_nand_enable(struct platform_device *nand)
 	return 0;
 }
 
-static struct tmio_mmc_data tc6393xb_mmc_data = {
-	.hclk = 24000000,
-};
-
 static struct resource __devinitdata tc6393xb_nand_resources[] = {
 	{
 		.start	= 0x1000,
@@ -164,11 +160,6 @@ static struct resource __devinitdata tc6393xb_mmc_resources[] = {
 		.end	= 0x9ff,
 		.flags	= IORESOURCE_MEM,
 	},
-	{
-		.start	= 0x200,
-		.end	= 0x2ff,
-		.flags	= IORESOURCE_MEM,
-	},
 	{
 		.start	= IRQ_TC6393_MMC,
 		.end	= IRQ_TC6393_MMC,
@@ -346,6 +337,50 @@ int tc6393xb_lcd_mode(struct platform_device *fb,
 }
 EXPORT_SYMBOL(tc6393xb_lcd_mode);
 
+static int tc6393xb_mmc_enable(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_enable(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static int tc6393xb_mmc_resume(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_resume(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static void tc6393xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6393xb->scr + 0x200, 0, state);
+}
+
+static void tc6393xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6393xb->scr + 0x200, 0, state);
+}
+
+static struct tmio_mmc_data tc6393xb_mmc_data = {
+	.hclk = 24000000,
+	.set_pwr = tc6393xb_mmc_pwr,
+	.set_clk_div = tc6393xb_mmc_clk_div,
+};
+
 static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	[TC6393XB_CELL_NAND] = {
 		.name = "tmio-nand",
@@ -355,6 +390,8 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	},
 	[TC6393XB_CELL_MMC] = {
 		.name = "tmio-mmc",
+		.enable = tc6393xb_mmc_enable,
+		.resume = tc6393xb_mmc_resume,
 		.driver_data = &tc6393xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
@@ -836,3 +873,4 @@ MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov and Dirk Opfer");
 MODULE_DESCRIPTION("tc6393xb Toshiba Mobile IO Controller");
 MODULE_ALIAS("platform:tc6393xb");
+
diff --git a/drivers/mfd/tmio_core.c b/drivers/mfd/tmio_core.c
new file mode 100644
index 000000000000..eddc19ae464b
--- /dev/null
+++ b/drivers/mfd/tmio_core.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright(c) 2009 Ian Molton <spyro@f2s.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mfd/tmio.h>
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base)
+{
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	/* Disable SD power during suspend */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_3, 0x01);
+
+	/* The below is required but why? FIXME */
+	sd_config_write8(cnf, shift, CNF_STOP_CLK_CTL, 0x1f);
+
+	/* Power down SD bus */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, 0x00);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_enable);
+
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base)
+{
+
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_resume);
+
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, state ? 0x02 : 0x00);
+}
+EXPORT_SYMBOL(tmio_core_mmc_pwr);
+
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_SD_CLK_MODE, state ? 1 : 0);
+}
+EXPORT_SYMBOL(tmio_core_mmc_clk_div);
+
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 7cccc8523747..e22c3fa3516a 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -46,7 +46,9 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 		clk |= 0x100;
 	}
 
-	sd_config_write8(host, CNF_SD_CLK_MODE, clk >> 22);
+	if (host->set_clk_div)
+		host->set_clk_div(host->pdev, (clk>>22) & 1);
+
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
 }
 
@@ -427,12 +429,13 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	/* Power sequence - OFF -> ON -> UP */
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF: /* power down SD bus */
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 0);
 		tmio_mmc_clk_stop(host);
 		break;
 	case MMC_POWER_ON: /* power up SD bus */
-
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x02);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 1);
 		break;
 	case MMC_POWER_UP: /* start bus clock */
 		tmio_mmc_clk_start(host);
@@ -485,21 +488,15 @@ static int tmio_mmc_resume(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct mmc_host *mmc = platform_get_drvdata(dev);
-	struct tmio_mmc_host *host = mmc_priv(mmc);
 	int ret = 0;
 
 	/* Tell the MFD core we are ready to be enabled */
-	if (cell->enable) {
-		ret = cell->enable(dev);
+	if (cell->resume) {
+		ret = cell->resume(dev);
 		if (ret)
 			goto out;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
 	mmc_resume_host(mmc);
 
 out:
@@ -514,17 +511,16 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct tmio_mmc_data *pdata;
-	struct resource *res_ctl, *res_cnf;
+	struct resource *res_ctl;
 	struct tmio_mmc_host *host;
 	struct mmc_host *mmc;
 	int ret = -EINVAL;
 
-	if (dev->num_resources != 3)
+	if (dev->num_resources != 2)
 		goto out;
 
 	res_ctl = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	res_cnf = platform_get_resource(dev, IORESOURCE_MEM, 1);
-	if (!res_ctl || !res_cnf)
+	if (!res_ctl)
 		goto out;
 
 	pdata = cell->driver_data;
@@ -539,8 +535,12 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;
+	host->pdev = dev;
 	platform_set_drvdata(dev, mmc);
 
+	host->set_pwr = pdata->set_pwr;
+	host->set_clk_div = pdata->set_clk_div;
+
 	/* SD control register space size is 0x200, 0x400 for bus_shift=1 */
 	host->bus_shift = resource_size(res_ctl) >> 10;
 
@@ -548,10 +548,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (!host->ctl)
 		goto host_free;
 
-	host->cnf = ioremap(res_cnf->start, resource_size(res_cnf));
-	if (!host->cnf)
-		goto unmap_ctl;
-
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
 	mmc->f_max = pdata->hclk;
@@ -562,23 +558,9 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (cell->enable) {
 		ret = cell->enable(dev);
 		if (ret)
-			goto unmap_cnf;
+			goto unmap_ctl;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
-	/* Disable SD power during suspend */
-	sd_config_write8(host, CNF_PWR_CTL_3, 0x01);
-
-	/* The below is required but why? FIXME */
-	sd_config_write8(host, CNF_STOP_CLK_CTL, 0x1f);
-
-	/* Power down SD bus*/
-	sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
-
 	tmio_mmc_clk_stop(host);
 	reset(host);
 
@@ -586,14 +568,14 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (ret >= 0)
 		host->irq = ret;
 	else
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	disable_mmc_irqs(host, TMIO_MASK_ALL);
 
 	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
 		IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host);
 	if (ret)
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	mmc_add_host(mmc);
 
@@ -605,8 +587,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	return 0;
 
-unmap_cnf:
-	iounmap(host->cnf);
 unmap_ctl:
 	iounmap(host->ctl);
 host_free:
@@ -626,7 +606,6 @@ static int __devexit tmio_mmc_remove(struct platform_device *dev)
 		mmc_remove_host(mmc);
 		free_irq(host->irq, host);
 		iounmap(host->ctl);
-		iounmap(host->cnf);
 		mmc_free_host(mmc);
 	}
 
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 9fa998594974..692dc23363b9 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -11,26 +11,6 @@
 
 #include <linux/highmem.h>
 
-#define CNF_CMD     0x04
-#define CNF_CTL_BASE   0x10
-#define CNF_INT_PIN  0x3d
-#define CNF_STOP_CLK_CTL 0x40
-#define CNF_GCLK_CTL 0x41
-#define CNF_SD_CLK_MODE 0x42
-#define CNF_PIN_STATUS 0x44
-#define CNF_PWR_CTL_1 0x48
-#define CNF_PWR_CTL_2 0x49
-#define CNF_PWR_CTL_3 0x4a
-#define CNF_CARD_DETECT_MODE 0x4c
-#define CNF_SD_SLOT 0x50
-#define CNF_EXT_GCLK_CTL_1 0xf0
-#define CNF_EXT_GCLK_CTL_2 0xf1
-#define CNF_EXT_GCLK_CTL_3 0xf9
-#define CNF_SD_LED_EN_1 0xfa
-#define CNF_SD_LED_EN_2 0xfe
-
-#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
-
 #define CTL_SD_CMD 0x00
 #define CTL_ARG_REG 0x04
 #define CTL_STOP_INTERNAL_ACTION 0x08
@@ -110,7 +90,6 @@
 
 
 struct tmio_mmc_host {
-	void __iomem *cnf;
 	void __iomem *ctl;
 	unsigned long bus_shift;
 	struct mmc_command      *cmd;
@@ -119,10 +98,16 @@ struct tmio_mmc_host {
 	struct mmc_host         *mmc;
 	int                     irq;
 
+	/* Callbacks for clock / power control */
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
+
 	/* pio related stuff */
 	struct scatterlist      *sg_ptr;
 	unsigned int            sg_len;
 	unsigned int            sg_off;
+
+	struct platform_device *pdev;
 };
 
 #include <linux/io.h>
@@ -163,25 +148,6 @@ static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr,
 	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
 }
 
-static inline void sd_config_write8(struct tmio_mmc_host *host, int addr,
-		u8 val)
-{
-	writeb(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write16(struct tmio_mmc_host *host, int addr,
-		u16 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write32(struct tmio_mmc_host *host, int addr,
-		u32 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-	writew(val >> 16, host->cnf + ((addr + 2) << host->bus_shift));
-}
-
 #include <linux/scatterlist.h>
 #include <linux/blkdev.h>
 
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 6b9c5d06690c..9cb1834deffa 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -2,6 +2,8 @@
 #define MFD_TMIO_H
 
 #include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
 
 #define tmio_ioread8(addr) readb(addr)
 #define tmio_ioread16(addr) readw(addr)
@@ -18,11 +20,48 @@
 	writew((val) >> 16, (addr) + 2); \
 	} while (0)
 
+#define CNF_CMD     0x04
+#define CNF_CTL_BASE   0x10
+#define CNF_INT_PIN  0x3d
+#define CNF_STOP_CLK_CTL 0x40
+#define CNF_GCLK_CTL 0x41
+#define CNF_SD_CLK_MODE 0x42
+#define CNF_PIN_STATUS 0x44
+#define CNF_PWR_CTL_1 0x48
+#define CNF_PWR_CTL_2 0x49
+#define CNF_PWR_CTL_3 0x4a
+#define CNF_CARD_DETECT_MODE 0x4c
+#define CNF_SD_SLOT 0x50
+#define CNF_EXT_GCLK_CTL_1 0xf0
+#define CNF_EXT_GCLK_CTL_2 0xf1
+#define CNF_EXT_GCLK_CTL_3 0xf9
+#define CNF_SD_LED_EN_1 0xfa
+#define CNF_SD_LED_EN_2 0xfe
+
+#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
+
+#define sd_config_write8(base, shift, reg, val) \
+	tmio_iowrite8((val), (base) + ((reg) << (shift)))
+#define sd_config_write16(base, shift, reg, val) \
+	tmio_iowrite16((val), (base) + ((reg) << (shift)))
+#define sd_config_write32(base, shift, reg, val) \
+	do { \
+		tmio_iowrite16((val), (base) + ((reg) << (shift)));   \
+		tmio_iowrite16((val) >> 16, (base) + ((reg + 2) << (shift))); \
+	} while (0)
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
+
 /*
  * data for the MMC controller
  */
 struct tmio_mmc_data {
 	const unsigned int		hclk;
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
 };
 
 /*
-- 
cgit v1.2.3


From ec51b7f538c440bfa5a4d538133c659071c02155 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 19 Jan 2010 00:27:58 -0800
Subject: Input: ad7879 - support auxiliary GPIOs via gpiolib

Drop the simple fancy sysfs hooks for the aux GPIOs and expose these via
the gpiolib interface so that other drivers can use them.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ad7879.c | 197 ++++++++++++++++++++++++++-----------
 include/linux/spi/ad7879.h         |  12 ++-
 2 files changed, 149 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index c21e6d3a8844..794d070c6900 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -47,6 +47,7 @@
 #include <linux/workqueue.h>
 #include <linux/spi/spi.h>
 #include <linux/i2c.h>
+#include <linux/gpio.h>
 
 #include <linux/spi/ad7879.h>
 
@@ -132,7 +133,9 @@ struct ad7879 {
 	struct input_dev	*input;
 	struct work_struct	work;
 	struct timer_list	timer;
-
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip	gc;
+#endif
 	struct mutex		mutex;
 	unsigned		disabled:1;	/* P: mutex */
 
@@ -150,11 +153,9 @@ struct ad7879 {
 	u8			median;
 	u16			x_plate_ohms;
 	u16			pressure_max;
-	u16			gpio_init;
 	u16			cmd_crtl1;
 	u16			cmd_crtl2;
 	u16			cmd_crtl3;
-	unsigned		gpio:1;
 };
 
 static int ad7879_read(bus_device *, u8);
@@ -237,24 +238,6 @@ static irqreturn_t ad7879_irq(int irq, void *handle)
 
 static void ad7879_setup(struct ad7879 *ts)
 {
-	ts->cmd_crtl3 = AD7879_YPLUS_BIT |
-			AD7879_XPLUS_BIT |
-			AD7879_Z2_BIT |
-			AD7879_Z1_BIT |
-			AD7879_TEMPMASK_BIT |
-			AD7879_AUXVBATMASK_BIT |
-			AD7879_GPIOALERTMASK_BIT;
-
-	ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR |
-			AD7879_AVG(ts->averaging) |
-			AD7879_MFS(ts->median) |
-			AD7879_FCD(ts->first_conversion_delay) |
-			ts->gpio_init;
-
-	ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 |
-			AD7879_ACQ(ts->acquisition_time) |
-			AD7879_TMR(ts->pen_down_acc_interval);
-
 	ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
 	ad7879_write(ts->bus, AD7879_REG_CTRL3, ts->cmd_crtl3);
 	ad7879_write(ts->bus, AD7879_REG_CTRL1, ts->cmd_crtl1);
@@ -324,48 +307,132 @@ static ssize_t ad7879_disable_store(struct device *dev,
 
 static DEVICE_ATTR(disable, 0664, ad7879_disable_show, ad7879_disable_store);
 
-static ssize_t ad7879_gpio_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
+static struct attribute *ad7879_attributes[] = {
+	&dev_attr_disable.attr,
+	NULL
+};
+
+static const struct attribute_group ad7879_attr_group = {
+	.attrs = ad7879_attributes,
+};
+
+#ifdef CONFIG_GPIOLIB
+static int ad7879_gpio_direction_input(struct gpio_chip *chip,
+					unsigned gpio)
 {
-	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	int err;
 
-	return sprintf(buf, "%u\n", ts->gpio);
+	mutex_lock(&ts->mutex);
+	ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIODIR | AD7879_GPIOPOL;
+	err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+
+	return err;
 }
 
-static ssize_t ad7879_gpio_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
+static int ad7879_gpio_direction_output(struct gpio_chip *chip,
+					unsigned gpio, int level)
 {
-	struct ad7879 *ts = dev_get_drvdata(dev);
-	unsigned long val;
-	int error;
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	int err;
 
-	error = strict_strtoul(buf, 10, &val);
-	if (error)
-		return error;
+	mutex_lock(&ts->mutex);
+	ts->cmd_crtl2 &= ~AD7879_GPIODIR;
+	ts->cmd_crtl2 |= AD7879_GPIO_EN | AD7879_GPIOPOL;
+	if (level)
+		ts->cmd_crtl2 |= AD7879_GPIO_DATA;
+	else
+		ts->cmd_crtl2 &= ~AD7879_GPIO_DATA;
+
+	err = ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+
+	return err;
+}
+
+static int ad7879_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
+	u16 val;
 
 	mutex_lock(&ts->mutex);
-	ts->gpio = !!val;
-	error = ad7879_write(ts->bus, AD7879_REG_CTRL2,
-			   ts->gpio ?
-				ts->cmd_crtl2 & ~AD7879_GPIO_DATA :
-				ts->cmd_crtl2 | AD7879_GPIO_DATA);
+	val = ad7879_read(ts->bus, AD7879_REG_CTRL2);
 	mutex_unlock(&ts->mutex);
 
-	return error ? : count;
+	return !!(val & AD7879_GPIO_DATA);
 }
 
-static DEVICE_ATTR(gpio, 0664, ad7879_gpio_show, ad7879_gpio_store);
+static void ad7879_gpio_set_value(struct gpio_chip *chip,
+				  unsigned gpio, int value)
+{
+	struct ad7879 *ts = container_of(chip, struct ad7879, gc);
 
-static struct attribute *ad7879_attributes[] = {
-	&dev_attr_disable.attr,
-	&dev_attr_gpio.attr,
-	NULL
-};
+	mutex_lock(&ts->mutex);
+	if (value)
+		ts->cmd_crtl2 |= AD7879_GPIO_DATA;
+	else
+		ts->cmd_crtl2 &= ~AD7879_GPIO_DATA;
 
-static const struct attribute_group ad7879_attr_group = {
-	.attrs = ad7879_attributes,
-};
+	ad7879_write(ts->bus, AD7879_REG_CTRL2, ts->cmd_crtl2);
+	mutex_unlock(&ts->mutex);
+}
+
+static int __devinit ad7879_gpio_add(struct device *dev)
+{
+	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879_platform_data *pdata = dev->platform_data;
+	int ret = 0;
+
+	if (pdata->gpio_export) {
+		ts->gc.direction_input = ad7879_gpio_direction_input;
+		ts->gc.direction_output = ad7879_gpio_direction_output;
+		ts->gc.get = ad7879_gpio_get_value;
+		ts->gc.set = ad7879_gpio_set_value;
+		ts->gc.can_sleep = 1;
+		ts->gc.base = pdata->gpio_base;
+		ts->gc.ngpio = 1;
+		ts->gc.label = "AD7879-GPIO";
+		ts->gc.owner = THIS_MODULE;
+		ts->gc.dev = dev;
+
+		ret = gpiochip_add(&ts->gc);
+		if (ret)
+			dev_err(dev, "failed to register gpio %d\n",
+				ts->gc.base);
+	}
+
+	return ret;
+}
+
+/*
+ * We mark ad7879_gpio_remove inline so there is a chance the code
+ * gets discarded when not needed. We can't do __devinit/__devexit
+ * markup since it is used in both probe and remove methods.
+ */
+static inline void ad7879_gpio_remove(struct device *dev)
+{
+	struct ad7879 *ts = dev_get_drvdata(dev);
+	struct ad7879_platform_data *pdata = dev->platform_data;
+	int ret;
+
+	if (pdata->gpio_export) {
+		ret = gpiochip_remove(&ts->gc);
+		if (ret)
+			dev_err(dev, "failed to remove gpio %d\n",
+				ts->gc.base);
+	}
+}
+#else
+static inline int ad7879_gpio_add(struct device *dev)
+{
+	return 0;
+}
+
+static inline void ad7879_gpio_remove(struct device *dev)
+{
+}
+#endif
 
 static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 {
@@ -403,12 +470,6 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 	ts->pen_down_acc_interval = pdata->pen_down_acc_interval;
 	ts->median = pdata->median;
 
-	if (pdata->gpio_output)
-		ts->gpio_init = AD7879_GPIO_EN |
-				(pdata->gpio_default ? 0 : AD7879_GPIO_DATA);
-	else
-		ts->gpio_init = AD7879_GPIO_EN | AD7879_GPIODIR;
-
 	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&bus->dev));
 
 	input_dev->name = "AD7879 Touchscreen";
@@ -446,6 +507,23 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 		goto err_free_mem;
 	}
 
+	ts->cmd_crtl3 = AD7879_YPLUS_BIT |
+			AD7879_XPLUS_BIT |
+			AD7879_Z2_BIT |
+			AD7879_Z1_BIT |
+			AD7879_TEMPMASK_BIT |
+			AD7879_AUXVBATMASK_BIT |
+			AD7879_GPIOALERTMASK_BIT;
+
+	ts->cmd_crtl2 = AD7879_PM(AD7879_PM_DYN) | AD7879_DFR |
+			AD7879_AVG(ts->averaging) |
+			AD7879_MFS(ts->median) |
+			AD7879_FCD(ts->first_conversion_delay);
+
+	ts->cmd_crtl1 = AD7879_MODE_INT | AD7879_MODE_SEQ1 |
+			AD7879_ACQ(ts->acquisition_time) |
+			AD7879_TMR(ts->pen_down_acc_interval);
+
 	ad7879_setup(ts);
 
 	err = request_irq(bus->irq, ad7879_irq,
@@ -460,15 +538,21 @@ static int __devinit ad7879_construct(bus_device *bus, struct ad7879 *ts)
 	if (err)
 		goto err_free_irq;
 
-	err = input_register_device(input_dev);
+	err = ad7879_gpio_add(&bus->dev);
 	if (err)
 		goto err_remove_attr;
 
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_remove_gpio;
+
 	dev_info(&bus->dev, "Rev.%d touchscreen, irq %d\n",
 		 revid >> 8, bus->irq);
 
 	return 0;
 
+err_remove_gpio:
+	ad7879_gpio_remove(&bus->dev);
 err_remove_attr:
 	sysfs_remove_group(&bus->dev.kobj, &ad7879_attr_group);
 err_free_irq:
@@ -481,6 +565,7 @@ err_free_mem:
 
 static int __devexit ad7879_destroy(bus_device *bus, struct ad7879 *ts)
 {
+	ad7879_gpio_remove(&bus->dev);
 	ad7879_disable(ts);
 	sysfs_remove_group(&ts->bus->dev.kobj, &ad7879_attr_group);
 	free_irq(ts->bus->irq, ts);
diff --git a/include/linux/spi/ad7879.h b/include/linux/spi/ad7879.h
index 4231104c9afa..6334cee1a3be 100644
--- a/include/linux/spi/ad7879.h
+++ b/include/linux/spi/ad7879.h
@@ -28,8 +28,12 @@ struct ad7879_platform_data {
 	 * 1 = 4, 2 = 8, 3 = 16 (median > averaging)
 	 */
 	u8	median;
-	/* 1 = AUX/VBAT/GPIO set to GPIO Output */
-	u8	gpio_output;
-	/* Initial GPIO pin state (valid if gpio_output = 1) */
-	u8	gpio_default;
+	/* 1 = AUX/VBAT/GPIO export GPIO to gpiolib
+	 * requires CONFIG_GPIOLIB
+	 */
+	bool	gpio_export;
+	/* identifies the first GPIO number handled by this chip;
+	 * or, if negative, requests dynamic ID allocation.
+	 */
+	s32	gpio_base;
 };
-- 
cgit v1.2.3


From d2d4e780aff2fab46a792ebc89f80d1a6872b325 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 18 Jan 2010 07:20:28 +0000
Subject: ide: add drive->pio_mode field

Add pio_mode field to ide_drive_t matching pio_mode field used in
struct ata_device.

The validity of the field is restricted to ->set_pio_mode method
only currently in IDE subsystem.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-devsets.c   | 2 ++
 drivers/ide/ide-probe.c     | 2 ++
 drivers/ide/ide-xfer-mode.c | 3 +++
 include/linux/ide.h         | 1 +
 4 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 1099bf7cf968..cb3341ce655c 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -105,6 +105,8 @@ static int set_pio_mode(ide_drive_t *drive, int arg)
 		return -ENOSYS;
 
 	if (set_pio_mode_abuse(drive->hwif, arg)) {
+		drive->pio_mode = arg + XFER_PIO_0;
+
 		if (arg == 8 || arg == 9) {
 			unsigned long flags;
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 4d76ba473097..9a9f10f4cf9f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1043,6 +1043,8 @@ static void ide_port_init_devices(ide_hwif_t *hwif)
 		if (hwif->host_flags & IDE_HFLAG_NO_UNMASK_IRQS)
 			drive->dev_flags |= IDE_DFLAG_NO_UNMASK;
 
+		drive->pio_mode = XFER_PIO_0;
+
 		if (port_ops && port_ops->init_dev)
 			port_ops->init_dev(drive);
 	}
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index 46d203ce60cc..cdae463f6b41 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -135,6 +135,7 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 	 * set transfer mode on the device in ->set_pio_mode method...
 	 */
 	if (port_ops->set_dma_mode == NULL) {
+		drive->pio_mode = mode;
 		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
 		return 0;
 	}
@@ -142,9 +143,11 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 	if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) {
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
+		drive->pio_mode = mode;
 		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
 		return 0;
 	} else {
+		drive->pio_mode = mode;
 		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
 		return ide_config_drive_speed(drive, mode);
 	}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 0ec612959042..b5d2e9655059 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -515,6 +515,7 @@ struct ide_drive_s {
         u8	init_speed;	/* transfer rate set at boot */
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
+	u8	pio_mode;	/* for ->set_pio_mode _only_ */
         u8	dn;		/* now wide spread use */
 	u8	acoustic;	/* acoustic management */
 	u8	media;		/* disk, cdrom, tape, floppy, ... */
-- 
cgit v1.2.3


From 3fccaa192b9501e79a57e02e62b6bf420d2b461e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 18 Jan 2010 07:20:35 +0000
Subject: ide: add drive->dma_mode field

Add dma_mode field to ide_drive_t matching dma_mode field used in
struct ata_device.

The validity of the field is restricted to ->dma_pio_mode method
only currently in IDE subsystem.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/aec62xx.c       | 1 +
 drivers/ide/ide-xfer-mode.c | 2 ++
 include/linux/ide.h         | 1 +
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 878f8ec6dbe1..4c869872eb9a 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -136,6 +136,7 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
+	drive->dma_mode = pio + XFER_PIO_0;
 	drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0);
 }
 
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index cdae463f6b41..c2323869d92a 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -167,9 +167,11 @@ int ide_set_dma_mode(ide_drive_t *drive, const u8 mode)
 	if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) {
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
+		drive->dma_mode = mode;
 		port_ops->set_dma_mode(drive, mode);
 		return 0;
 	} else {
+		drive->dma_mode = mode;
 		port_ops->set_dma_mode(drive, mode);
 		return ide_config_drive_speed(drive, mode);
 	}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index b5d2e9655059..746ef9fdabcb 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -516,6 +516,7 @@ struct ide_drive_s {
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
 	u8	pio_mode;	/* for ->set_pio_mode _only_ */
+	u8	dma_mode;	/* for ->dma_pio_mode _only_ */
         u8	dn;		/* now wide spread use */
 	u8	acoustic;	/* acoustic management */
 	u8	media;		/* disk, cdrom, tape, floppy, ... */
-- 
cgit v1.2.3


From e085b3cae85af47eb0a3eda3186bd898310fb322 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 19 Jan 2010 01:44:41 -0800
Subject: ide: change ->set_pio_mode method parameters

Change ->set_pio_mode method parameters to match ->set_piomode method
used in struct ata_port_operations.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/aec62xx.c       |  6 +++---
 drivers/ide/ali14xx.c       |  3 ++-
 drivers/ide/alim15x3.c      |  7 +++----
 drivers/ide/amd74xx.c       |  4 ++--
 drivers/ide/at91_ide.c      |  5 +++--
 drivers/ide/atiixp.c        |  7 ++++---
 drivers/ide/au1xxx-ide.c    |  5 ++---
 drivers/ide/cmd640.c        |  3 ++-
 drivers/ide/cmd64x.c        |  4 +++-
 drivers/ide/cs5520.c        |  7 ++++---
 drivers/ide/cs5530.c        |  7 ++++---
 drivers/ide/cs5535.c        |  6 +++---
 drivers/ide/cs5536.c        |  7 ++++---
 drivers/ide/cy82c693.c      |  5 ++---
 drivers/ide/dtc2278.c       |  4 ++--
 drivers/ide/hpt366.c        |  4 ++--
 drivers/ide/ht6560b.c       |  3 ++-
 drivers/ide/ide-devsets.c   |  4 ++--
 drivers/ide/ide-xfer-mode.c |  6 +++---
 drivers/ide/it8172.c        | 10 +++++-----
 drivers/ide/it8213.c        | 14 +++++++-------
 drivers/ide/it821x.c        |  6 +++---
 drivers/ide/jmicron.c       |  2 +-
 drivers/ide/opti621.c       |  6 +++---
 drivers/ide/palm_bk3710.c   |  5 +++--
 drivers/ide/pdc202xx_new.c  |  4 ++--
 drivers/ide/pdc202xx_old.c  |  4 ++--
 drivers/ide/piix.c          | 14 +++++++-------
 drivers/ide/pmac.c          |  5 ++---
 drivers/ide/qd65xx.c        | 10 ++++------
 drivers/ide/sc1200.c        |  4 ++--
 drivers/ide/scc_pata.c      |  6 +++---
 drivers/ide/serverworks.c   |  5 +++--
 drivers/ide/siimage.c       |  6 +++---
 drivers/ide/sis5513.c       |  4 ++--
 drivers/ide/sl82c105.c      |  5 +++--
 drivers/ide/slc90e66.c      | 13 +++++++------
 drivers/ide/tc86c001.c      |  4 ++--
 drivers/ide/triflex.c       |  4 ++--
 drivers/ide/tx4938ide.c     |  5 ++---
 drivers/ide/tx4939ide.c     |  4 ++--
 drivers/ide/umc8672.c       |  5 +++--
 drivers/ide/via82cxxx.c     |  6 +++---
 include/linux/ide.h         |  2 +-
 44 files changed, 129 insertions(+), 121 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 4c869872eb9a..3790847361c3 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -134,10 +134,10 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 	local_irq_restore(flags);
 }
 
-static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void aec_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	drive->dma_mode = pio + XFER_PIO_0;
-	drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0);
+	drive->dma_mode = drive->pio_mode;
+	hwif->port_ops->set_dma_mode(drive, drive->dma_mode);
 }
 
 static int init_chipset_aec62xx(struct pci_dev *dev)
diff --git a/drivers/ide/ali14xx.c b/drivers/ide/ali14xx.c
index 90da1f953ed0..25b9fe3a9f8e 100644
--- a/drivers/ide/ali14xx.c
+++ b/drivers/ide/ali14xx.c
@@ -109,13 +109,14 @@ static DEFINE_SPINLOCK(ali14xx_lock);
  * This function computes timing parameters
  * and sets controller registers accordingly.
  */
-static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ali14xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int driveNum;
 	int time1, time2;
 	u8 param1, param2, param3, param4;
 	unsigned long flags;
 	int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
 
 	/* calculate timing, according to PIO mode */
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 8f03cce055fa..28cee1055f76 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -63,15 +63,14 @@ static void ali_fifo_control(ide_hwif_t *hwif, ide_drive_t *drive, int on)
 
 /**
  *	ali_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Program the controller for the given PIO mode.
  */
 
-static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ali_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
 	unsigned long T =  1000000 / bus_speed; /* PCI clock based */
@@ -79,7 +78,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	u8 unit = drive->dn & 1;
 	struct ide_timing t;
 
-	ide_timing_compute(drive, XFER_PIO_0 + pio, &t, T, 1);
+	ide_timing_compute(drive, drive->pio_mode, &t, T, 1);
 
 	t.setup = clamp_val(t.setup, 1, 8) & 7;
 	t.active = clamp_val(t.active, 1, 8) & 7;
diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c
index 108e9b676859..3eee7be7ca6f 100644
--- a/drivers/ide/amd74xx.c
+++ b/drivers/ide/amd74xx.c
@@ -108,9 +108,9 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed)
  * amd_set_pio_mode() is a callback from upper layers for PIO-only tuning.
  */
 
-static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void amd_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	amd_set_drive(drive, XFER_PIO_0 + pio);
+	amd_set_drive(drive, drive->pio_mode);
 }
 
 static void amd7409_cable_detect(struct pci_dev *dev)
diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 248219a89a68..000a78e5246c 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -172,11 +172,12 @@ static void at91_ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd,
 	leave_16bit(chipselect, mode);
 }
 
-static void at91_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void at91_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	struct ide_timing *timing;
-	u8 chipselect = drive->hwif->select_data;
+	u8 chipselect = hwif->select_data;
 	int use_iordy = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	pdbg("chipselect %u pio %u\n", chipselect, pio);
 
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
index 837322b10a4c..b6848dfb93b0 100644
--- a/drivers/ide/atiixp.c
+++ b/drivers/ide/atiixp.c
@@ -42,19 +42,20 @@ static DEFINE_SPINLOCK(atiixp_lock);
 
 /**
  *	atiixp_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode.
  */
 
-static void atiixp_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void atiixp_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long flags;
 	int timing_shift = (drive->dn ^ 1) * 8;
 	u32 pio_timing_data;
 	u16 pio_mode_data;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	spin_lock_irqsave(&atiixp_lock, flags);
 
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 87cef0c440ad..c90e9b0a9f6e 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -99,12 +99,11 @@ static void au1xxx_output_data(ide_drive_t *drive, struct ide_cmd *cmd,
 }
 #endif
 
-static void au1xxx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void au1xxx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int mem_sttime = 0, mem_stcfg = au_readl(MEM_STCFG2);
 
-	/* set pio mode! */
-	switch(pio) {
+	switch (drive->pio_mode - XFER_PIO_0) {
 	case 0:
 		mem_sttime = SBC_IDE_TIMING(PIO0);
 
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index 1a32d62ed86b..c7d46a3d347a 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -572,9 +572,10 @@ static void cmd640_set_mode(ide_drive_t *drive, unsigned int index,
 	program_drive_counts(drive, index);
 }
 
-static void cmd640_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cmd640_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned int index = 0, cycle_time;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 b;
 
 	switch (pio) {
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 9f89f3116df0..0b11745937e7 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -127,8 +127,10 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode)
  * Special cases are 8: prefetch off, 9: prefetch on (both never worked)
  */
 
-static void cmd64x_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cmd64x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
+
 	/*
 	 * Filter out the prefetch control values
 	 * to prevent PIO5 from being programmed
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 09f98ed0731f..b8094f049f3e 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -57,11 +57,11 @@ static struct pio_clocks cs5520_pio_clocks[]={
 	{1, 2, 1}
 };
 
-static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	int controller = drive->dn > 1 ? 1 : 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/* 8bit CAT/CRT - 8bit command timing for channel */
 	pci_write_config_byte(pdev, 0x62 + controller, 
@@ -85,7 +85,8 @@ static void cs5520_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
 	printk(KERN_ERR "cs55x0: bad ide timing.\n");
 
-	cs5520_set_pio_mode(drive, 0);
+	drive->pio_mode = XFER_PIO_0 + 0;
+	cs5520_set_pio_mode(drive->hwif, drive);
 }
 
 static const struct ide_port_ops cs5520_port_ops = {
diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c
index 40bf05eddf6e..4ced40255ad6 100644
--- a/drivers/ide/cs5530.c
+++ b/drivers/ide/cs5530.c
@@ -41,8 +41,8 @@ static unsigned int cs5530_pio_timings[2][5] = {
 
 /**
  *	cs5530_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Handles setting of PIO mode for the chipset.
  *
@@ -50,10 +50,11 @@ static unsigned int cs5530_pio_timings[2][5] = {
  *	will have valid default PIO timings set up before we get here.
  */
 
-static void cs5530_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5530_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	unsigned long basereg = CS5530_BASEREG(drive->hwif);
+	unsigned long basereg = CS5530_BASEREG(hwif);
 	unsigned int format = (inl(basereg + 4) >> 31) & 1;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	outl(cs5530_pio_timings[format][pio], basereg + ((drive->dn & 1)<<3));
 }
diff --git a/drivers/ide/cs5535.c b/drivers/ide/cs5535.c
index b883838adc24..7974415ea89f 100644
--- a/drivers/ide/cs5535.c
+++ b/drivers/ide/cs5535.c
@@ -142,15 +142,15 @@ static void cs5535_set_dma_mode(ide_drive_t *drive, const u8 speed)
 
 /**
  *	cs5535_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	A callback from the upper layers for PIO-only tuning.
  */
 
-static void cs5535_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5535_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	cs5535_set_speed(drive, XFER_PIO_0 + pio);
+	cs5535_set_speed(drive, drive->pio_mode);
 }
 
 static u8 cs5535_cable_detect(ide_hwif_t *hwif)
diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c
index 9623b852c616..b518ef0e9a35 100644
--- a/drivers/ide/cs5536.c
+++ b/drivers/ide/cs5536.c
@@ -125,11 +125,11 @@ static u8 cs5536_cable_detect(ide_hwif_t *hwif)
 
 /**
  *	cs5536_set_pio_mode		-	PIO timing setup
+ *	@hwif: ATA port
  *	@drive: ATA device
- *	@pio: PIO mode number
  */
 
-static void cs5536_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5536_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 drv_timings[5] = {
 		0x98, 0x55, 0x32, 0x21, 0x20,
@@ -143,11 +143,12 @@ static void cs5536_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		0x99, 0x92, 0x90, 0x22, 0x20,
 	};
 
-	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	ide_drive_t *pair = ide_get_pair_dev(drive);
 	int cshift = (drive->dn & 1) ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT;
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u32 cast;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 cmd_pio = pio;
 
 	if (pair)
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index fbf3dcc26577..ead65c394f00 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -80,9 +80,8 @@ static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode)
 	outb(data, CY82_DATA_PORT);
 }
 
-static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cy82c693_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
 	const unsigned long T = 1000000 / bus_speed;
@@ -101,7 +100,7 @@ static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		}
 	}
 
-	ide_timing_compute(drive, XFER_PIO_0 + pio, &t, T, 1);
+	ide_timing_compute(drive, drive->pio_mode, &t, T, 1);
 
 	time_16 = clamp_val(t.recover - 1, 0, 15) |
 		  (clamp_val(t.active - 1, 0, 15) << 4);
diff --git a/drivers/ide/dtc2278.c b/drivers/ide/dtc2278.c
index c6b138122981..6929f7fce93a 100644
--- a/drivers/ide/dtc2278.c
+++ b/drivers/ide/dtc2278.c
@@ -68,11 +68,11 @@ static void sub22 (char b, char c)
 
 static DEFINE_SPINLOCK(dtc2278_lock);
 
-static void dtc2278_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void dtc2278_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long flags;
 
-	if (pio >= 3) {
+	if (drive->pio_mode >= XFER_PIO_3) {
 		spin_lock_irqsave(&dtc2278_lock, flags);
 		/*
 		 * This enables PIO mode4 (3?) on the first interface
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 4d90ac2dbb1b..f1dec519a9e6 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -651,9 +651,9 @@ static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed)
 	pci_write_config_dword(dev, itr_addr, new_itr);
 }
 
-static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void hpt3xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	hpt3xx_set_mode(drive, XFER_PIO_0 + pio);
+	hpt3xx_set_mode(drive, drive->pio_mode);
 }
 
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c
index aafed8060e17..d81e49680c3f 100644
--- a/drivers/ide/ht6560b.c
+++ b/drivers/ide/ht6560b.c
@@ -279,9 +279,10 @@ static void ht_set_prefetch(ide_drive_t *drive, u8 state)
 #endif
 }
 
-static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ht6560b_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long flags, config;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 timing;
 	
 	switch (pio) {
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index cb3341ce655c..c6935c78757c 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -112,10 +112,10 @@ static int set_pio_mode(ide_drive_t *drive, int arg)
 
 			/* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
 			spin_lock_irqsave(&hwif->lock, flags);
-			port_ops->set_pio_mode(drive, arg);
+			port_ops->set_pio_mode(hwif, drive);
 			spin_unlock_irqrestore(&hwif->lock, flags);
 		} else
-			port_ops->set_pio_mode(drive, arg);
+			port_ops->set_pio_mode(hwif, drive);
 	} else {
 		int keep_dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index c2323869d92a..a62fb03fc1cc 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -136,7 +136,7 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 	 */
 	if (port_ops->set_dma_mode == NULL) {
 		drive->pio_mode = mode;
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		port_ops->set_pio_mode(hwif, drive);
 		return 0;
 	}
 
@@ -144,11 +144,11 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
 		drive->pio_mode = mode;
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		port_ops->set_pio_mode(hwif, drive);
 		return 0;
 	} else {
 		drive->pio_mode = mode;
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		port_ops->set_pio_mode(hwif, drive);
 		return ide_config_drive_speed(drive, mode);
 	}
 }
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
index 0d266a5b524d..9dfdc8741a7b 100644
--- a/drivers/ide/it8172.c
+++ b/drivers/ide/it8172.c
@@ -37,12 +37,12 @@
 
 #define DRV_NAME "IT8172"
 
-static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it8172_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u16 drive_enables;
 	u32 drive_timing;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * The highest value of DIOR/DIOW pulse width and recovery time
@@ -98,14 +98,14 @@ static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed)
 		pci_write_config_byte(dev, 0x4a, reg4a | u_speed);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
 		pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed);
 
-		pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+		drive->pio_mode =
+			mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 
-		it8172_set_pio_mode(drive, pio);
+		it8172_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index 47976167796a..492c07d5f4f3 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -17,15 +17,14 @@
 
 /**
  *	it8213_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode.
  */
 
-static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it8213_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= 0x40;
@@ -35,6 +34,7 @@ static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	u8 slave_data;
 	static DEFINE_SPINLOCK(tune_lock);
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	static const u8 timings[][2] = {
 					{ 0, 0 },
@@ -120,7 +120,6 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
@@ -132,11 +131,12 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		it8213_set_pio_mode(drive, pio);
+		it8213_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index 51aa745246dc..69becb7b9656 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -228,18 +228,18 @@ static void it821x_clock_strategy(ide_drive_t *drive)
 
 /**
  *	it821x_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Tune the host to the desired PIO mode taking into the consideration
  *	the maximum PIO mode supported by the other device on the cable.
  */
 
-static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it821x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
 	ide_drive_t *pair = ide_get_pair_dev(drive);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 unit = drive->dn & 1, set_pio = pio;
 
 	/* Spec says 89 ref driver uses 88 */
diff --git a/drivers/ide/jmicron.c b/drivers/ide/jmicron.c
index bf2be6431b20..ebffb904ed24 100644
--- a/drivers/ide/jmicron.c
+++ b/drivers/ide/jmicron.c
@@ -80,7 +80,7 @@ static u8 jmicron_cable_detect(ide_hwif_t *hwif)
 	return ATA_CBL_PATA80;
 }
 
-static void jmicron_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void jmicron_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
diff --git a/drivers/ide/opti621.c b/drivers/ide/opti621.c
index 2052788fab7a..1a53a4c375ed 100644
--- a/drivers/ide/opti621.c
+++ b/drivers/ide/opti621.c
@@ -62,12 +62,12 @@ static u8 read_reg(int reg)
 	return ret;
 }
 
-static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void opti621_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	ide_drive_t *pair = ide_get_pair_dev(drive);
 	unsigned long flags;
-	unsigned long mode = XFER_PIO_0 + pio, pair_mode;
+	unsigned long mode = drive->pio_mode, pair_mode;
+	const u8 pio = mode - XFER_PIO_0;
 	u8 tim, misc, addr_pio = pio, clk;
 
 	/* DRDY is default 2 (by OPTi Databook) */
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index f8eddf05ecb8..0f262d07c378 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -203,12 +203,13 @@ static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed)
 	}
 }
 
-static void palm_bk3710_set_pio_mode(ide_drive_t *drive, u8 pio)
+static void palm_bk3710_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned int cycle_time;
 	int is_slave = drive->dn & 1;
 	ide_drive_t *mate;
-	void __iomem *base = (void *)drive->hwif->dma_base;
+	void __iomem *base = (void *)hwif->dma_base;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * Obtain the drive PIO data for tuning the Palm Chip registers
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index 65ba8239e7b5..874acd2bb6e6 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -167,11 +167,11 @@ static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed)
  	}
 }
 
-static void pdcnew_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pdcnew_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u8 adj = (drive->dn & 1) ? 0x08 : 0x00;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	if (max_dma_rate(dev) == 4) {
 		set_indexed_reg(hwif, 0x0c + adj, pio_timings[pio].reg0c);
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 1d20594ee420..402aab7f3baa 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -76,9 +76,9 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
 	}
 }
 
-static void pdc202xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pdc202xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	pdc202xx_set_mode(drive, XFER_PIO_0 + pio);
+	pdc202xx_set_mode(drive, drive->pio_mode);
 }
 
 static int pdc202xx_test_irq(ide_hwif_t *hwif)
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index bf14f39bd3a7..64b3041daa60 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -59,15 +59,14 @@ static int no_piix_dma;
 
 /**
  *	piix_set_pio_mode	-	set host controller for PIO mode
+ *	@port: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode based upon the settings done by AMI BIOS.
  */
 
-static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void piix_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -77,6 +76,7 @@ static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	u8 slave_data;
 	static DEFINE_SPINLOCK(tune_lock);
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 				     /* ISP  RTC */
 	static const u8 timings[][2]= {
@@ -176,7 +176,6 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
@@ -188,11 +187,12 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		piix_set_pio_mode(drive, pio);
+		piix_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 7a4e788cab2f..a167968a2d42 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -496,12 +496,11 @@ static void pmac_write_devctl(ide_hwif_t *hwif, u8 ctl)
 /*
  * Old tuning functions (called on hdparm -p), sets up drive PIO timings
  */
-static void
-pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pmac_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *tim = ide_timing_find_mode(XFER_PIO_0 + pio);
 	u32 *timings, t;
 	unsigned accessTicks, recTicks;
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index 74696edc8d1d..3f0244fd8e62 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -189,15 +189,13 @@ static void qd_set_timing (ide_drive_t *drive, u8 timing)
 	printk(KERN_DEBUG "%s: %#x\n", drive->name, timing);
 }
 
-static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void qd6500_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	u16 *id = drive->id;
 	int active_time   = 175;
 	int recovery_time = 415; /* worst case values from the dos driver */
 
-	/*
-	 * FIXME: use "pio" value
-	 */
+	/* FIXME: use drive->pio_mode value */
 	if (!qd_find_disk_type(drive, &active_time, &recovery_time) &&
 	    (id[ATA_ID_OLD_PIO_MODES] & 0xff) && (id[ATA_ID_FIELD_VALID] & 2) &&
 	    id[ATA_ID_EIDE_PIO] >= 240) {
@@ -211,9 +209,9 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
 				active_time, recovery_time));
 }
 
-static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void qd6580_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
 	unsigned int cycle_time;
 	int active_time   = 175;
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index d467478d68da..bb0166e460ab 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -193,10 +193,10 @@ static int sc1200_dma_end(ide_drive_t *drive)
  * will have valid default PIO timings set up before we get here.
  */
 
-static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sc1200_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t	*hwif = drive->hwif;
 	int		mode = -1;
+	const u8	pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * bad abuse of ->set_pio_mode interface
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 1104bb301eb9..23e16e4460ee 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -199,16 +199,15 @@ scc_ide_outsl(unsigned long port, void *addr, u32 count)
 
 /**
  *	scc_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void scc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -216,6 +215,7 @@ static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	unsigned long pioct_port = ctl_base + 0x004;
 	unsigned long reg;
 	int offset;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	reg = in_be32((void __iomem *)cckctrl_port);
 	if (reg & CCKCTRL_ATACLKOEN) {
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
index 657f0433ec50..a56bc51ae032 100644
--- a/drivers/ide/serverworks.c
+++ b/drivers/ide/serverworks.c
@@ -106,12 +106,13 @@ static u8 svwks_csb_check (struct pci_dev *dev)
 	return 0;
 }
 
-static void svwks_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 pio_modes[] = { 0x5d, 0x47, 0x34, 0x22, 0x20 };
 	static const u8 drive_pci[] = { 0x41, 0x40, 0x43, 0x42 };
 
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]);
 
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index d95df528562f..97266958f744 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -229,19 +229,18 @@ static u8 sil_sata_udma_filter(ide_drive_t *drive)
 
 /**
  *	sil_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
+static void sil_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u16 tf_speed[]   = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 };
 	static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	ide_drive_t *pair	= ide_get_pair_dev(drive);
 	u32 speedt		= 0;
@@ -249,6 +248,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
 	unsigned long addr	= siimage_seldev(drive, 0x04);
 	unsigned long tfaddr	= siimage_selreg(hwif,	0x02);
 	unsigned long base	= (unsigned long)hwif->hwif_data;
+	const u8 pio		= drive->pio_mode - XFER_PIO_0;
 	u8 tf_pio		= pio;
 	u8 mmio			= (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
 	u8 addr_mask		= hwif->channel ? (mmio ? 0xF4 : 0x84)
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index 468706082fb5..5a0192060531 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -290,10 +290,10 @@ static void config_drive_art_rwp(ide_drive_t *drive)
 		pci_write_config_byte(dev, 0x4b, rw_prefetch);
 }
 
-static void sis_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sis_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	config_drive_art_rwp(drive);
-	sis_program_timings(drive, XFER_PIO_0 + pio);
+	sis_program_timings(drive, drive->pio_mode);
 }
 
 static void sis_ata133_program_udma_timings(ide_drive_t *drive, const u8 mode)
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 3c2bbf0057ea..419cd3bc6c84 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -63,12 +63,13 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio)
 /*
  * Configure the chipset for PIO mode.
  */
-static void sl82c105_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sl82c105_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev	= to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	unsigned long timings	= (unsigned long)ide_get_drivedata(drive);
 	int reg			= 0x44 + drive->dn * 4;
 	u16 drv_ctrl;
+	const u8 pio		= drive->pio_mode - XFER_PIO_0;
 
 	drv_ctrl = get_pio_timings(drive, pio);
 
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index 1ccfb40e7215..019777522cd2 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -18,9 +18,8 @@
 
 static DEFINE_SPINLOCK(slc90e66_lock);
 
-static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void slc90e66_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -29,6 +28,8 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	u16 master_data;
 	u8 slave_data;
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
+
 				     /* ISP  RTC */
 	static const u8 timings[][2] = {
 					{ 0, 0 },
@@ -98,7 +99,6 @@ static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
 		}
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_word(dev, 0x48, reg48 & ~u_flag);
@@ -106,11 +106,12 @@ static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		slc90e66_set_pio_mode(drive, pio);
+		slc90e66_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index 05a93d6baecc..f2cb62bf3f22 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -41,9 +41,9 @@ static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
 	outw(scr, scr_port);
 }
 
-static void tc86c001_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tc86c001_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	tc86c001_set_mode(drive, XFER_PIO_0 + pio);
+	tc86c001_set_mode(drive, drive->pio_mode);
 }
 
 /*
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index 8773c3ba7462..d34a7eecdea5 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -82,9 +82,9 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 	pci_write_config_dword(dev, channel_offset, triflex_timings);
 }
 
-static void triflex_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void triflex_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	triflex_set_mode(drive, XFER_PIO_0 + pio);
+	triflex_set_mode(drive, drive->pio_mode);
 }
 
 static const struct ide_port_ops triflex_port_ops = {
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index fd59c0d235b5..326d4683488b 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -56,11 +56,10 @@ static void tx4938ide_tune_ebusc(unsigned int ebus_ch,
 		     &tx4938_ebuscptr->cr[ebus_ch]);
 }
 
-static void tx4938ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tx4938ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct tx4938ide_platform_info *pdata = hwif->dev->platform_data;
-	u8 safe = pio;
+	u8 safe = drive->pio_mode - XFER_PIO_0;
 	ide_drive_t *pair;
 
 	pair = ide_get_pair_dev(drive);
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 64b58ecc3f0e..5228a4786de5 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -104,11 +104,11 @@ static void tx4939ide_writeb(u8 val, void __iomem *base, u32 reg)
 
 #define TX4939IDE_BASE(hwif)	((void __iomem *)(hwif)->extra_base)
 
-static void tx4939ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tx4939ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	int is_slave = drive->dn;
 	u32 mask, val;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 safe = pio;
 	ide_drive_t *pair;
 
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 60f936e2319c..47adcd09cb26 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -104,10 +104,11 @@ static void umc_set_speeds(u8 speeds[])
 		speeds[0], speeds[1], speeds[2], speeds[3]);
 }
 
-static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void umc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif, *mate = hwif->mate;
+	ide_hwif_t *mate = hwif->mate;
 	unsigned long uninitialized_var(flags);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
 		drive->name, pio, pio_to_umc[pio]);
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index fbecf8ea8207..6d995fc9d4f5 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -208,15 +208,15 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed)
 
 /**
  *	via_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	A callback from the upper layers for PIO-only tuning.
  */
 
-static void via_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void via_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	via_set_drive(drive, XFER_PIO_0 + pio);
+	via_set_drive(drive, drive->pio_mode);
 }
 
 static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 746ef9fdabcb..803ec306883c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -624,7 +624,7 @@ extern const struct ide_tp_ops default_tp_ops;
  */
 struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
-	void	(*set_pio_mode)(ide_drive_t *, const u8);
+	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
 	void	(*set_dma_mode)(ide_drive_t *, const u8);
 	int	(*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
-- 
cgit v1.2.3


From 8776168ca2151850164af1de5565d01f7b8b2c53 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 19 Jan 2010 01:45:29 -0800
Subject: ide: change ->set_dma_mode method parameters

Change ->set_dma_mode method parameters to match ->set_dmamode method
used in struct ata_port_operations.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/aec62xx.c       | 10 +++++-----
 drivers/ide/alim15x3.c      |  6 +++---
 drivers/ide/amd74xx.c       |  7 ++++---
 drivers/ide/atiixp.c        |  7 ++++---
 drivers/ide/au1xxx-ide.c    |  4 ++--
 drivers/ide/cmd64x.c        |  4 ++--
 drivers/ide/cs5520.c        |  4 ++--
 drivers/ide/cs5530.c        |  6 +++---
 drivers/ide/cs5535.c        |  6 +++---
 drivers/ide/cs5536.c        |  7 ++++---
 drivers/ide/cy82c693.c      |  4 ++--
 drivers/ide/hpt366.c        |  7 ++++---
 drivers/ide/icside.c        |  3 ++-
 drivers/ide/ide-xfer-mode.c |  4 ++--
 drivers/ide/it8172.c        |  4 ++--
 drivers/ide/it8213.c        |  6 +++---
 drivers/ide/it821x.c        |  6 ++++--
 drivers/ide/jmicron.c       |  4 ++--
 drivers/ide/palm_bk3710.c   |  5 +++--
 drivers/ide/pdc202xx_new.c  |  4 ++--
 drivers/ide/pdc202xx_old.c  |  7 ++++---
 drivers/ide/piix.c          |  6 +++---
 drivers/ide/pmac.c          |  4 ++--
 drivers/ide/sc1200.c        |  4 ++--
 drivers/ide/scc_pata.c      |  6 +++---
 drivers/ide/serverworks.c   |  4 ++--
 drivers/ide/sgiioc4.c       |  2 +-
 drivers/ide/siimage.c       |  6 +++---
 drivers/ide/sis5513.c       |  4 +++-
 drivers/ide/sl82c105.c      |  3 ++-
 drivers/ide/slc90e66.c      |  4 ++--
 drivers/ide/tc86c001.c      |  7 ++++---
 drivers/ide/triflex.c       |  8 ++++----
 drivers/ide/tx4939ide.c     |  4 ++--
 drivers/ide/via82cxxx.c     |  9 +++++----
 include/linux/ide.h         |  2 +-
 36 files changed, 101 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 3790847361c3..57d00caefc86 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -81,15 +81,15 @@ static u8 pci_bus_clock_list_ultra (u8 speed, struct chipset_bus_clock_list_entr
 	return chipset_table->ultra_settings;
 }
 
-static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
+static void aec6210_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
 	u16 d_conf		= 0;
 	u8 ultra = 0, ultra_conf = 0;
 	u8 tmp0 = 0, tmp1 = 0, tmp2 = 0;
+	const u8 speed = drive->dma_mode;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -109,15 +109,15 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
 	local_irq_restore(flags);
 }
 
-static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
+static void aec6260_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
 	u8 unit			= drive->dn & 1;
 	u8 tmp1 = 0, tmp2 = 0;
 	u8 ultra = 0, drive_conf = 0, ultra_conf = 0;
+	const u8 speed = drive->dma_mode;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -137,7 +137,7 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 static void aec_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	drive->dma_mode = drive->pio_mode;
-	hwif->port_ops->set_dma_mode(drive, drive->dma_mode);
+	hwif->port_ops->set_dma_mode(hwif, drive);
 }
 
 static int init_chipset_aec62xx(struct pci_dev *dev)
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 28cee1055f76..6f0debae4e27 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -121,16 +121,16 @@ static u8 ali_udma_filter(ide_drive_t *drive)
 
 /**
  *	ali_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Configure the hardware for the desired IDE transfer mode.
  */
 
-static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void ali_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	const u8 speed		= drive->dma_mode;
 	u8 speed1		= speed;
 	u8 unit			= drive->dn & 1;
 	u8 tmpbyte		= 0x00;
diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c
index 3eee7be7ca6f..b7e105338205 100644
--- a/drivers/ide/amd74xx.c
+++ b/drivers/ide/amd74xx.c
@@ -79,14 +79,14 @@ static void amd_set_speed(struct pci_dev *dev, u8 dn, u8 udma_mask,
  * to a desired transfer mode.  It also can be called by upper layers.
  */
 
-static void amd_set_drive(ide_drive_t *drive, const u8 speed)
+static void amd_set_drive(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	ide_drive_t *peer = ide_get_pair_dev(drive);
 	struct ide_timing t, p;
 	int T, UT;
 	u8 udma_mask = hwif->ultra_mask;
+	const u8 speed = drive->dma_mode;
 
 	T = 1000000000 / amd_clock;
 	UT = (udma_mask == ATA_UDMA2) ? T : (T / 2);
@@ -110,7 +110,8 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed)
 
 static void amd_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	amd_set_drive(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	amd_set_drive(hwif, drive);
 }
 
 static void amd7409_cable_detect(struct pci_dev *dev)
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
index b6848dfb93b0..15f0ead89f5c 100644
--- a/drivers/ide/atiixp.c
+++ b/drivers/ide/atiixp.c
@@ -75,21 +75,22 @@ static void atiixp_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	atiixp_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Set a ATIIXP host controller to the desired DMA mode.  This involves
  *	programming the right timing data into the PCI configuration space.
  */
 
-static void atiixp_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void atiixp_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long flags;
 	int timing_shift = (drive->dn ^ 1) * 8;
 	u32 tmp32;
 	u16 tmp16;
 	u16 udma_ctl = 0;
+	const u8 speed = drive->dma_mode;
 
 	spin_lock_irqsave(&atiixp_lock, flags);
 
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index c90e9b0a9f6e..e2fd378ba9de 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -160,11 +160,11 @@ static void au1xxx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	au_writel(mem_stcfg,MEM_STCFG2);
 }
 
-static void auide_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void auide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int mem_sttime = 0, mem_stcfg = au_readl(MEM_STCFG2);
 
-	switch(speed) {
+	switch (drive->dma_mode) {
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 	case XFER_MW_DMA_2:
 		mem_sttime = SBC_IDE_TIMING(MDMA2);
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 0b11745937e7..a65a69171250 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -141,12 +141,12 @@ static void cmd64x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	cmd64x_program_timings(drive, XFER_PIO_0 + pio);
 }
 
-static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cmd64x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 unit			= drive->dn & 0x01;
 	u8 regU = 0, pciU	= hwif->channel ? UDIDETCR1 : UDIDETCR0;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_byte(dev, pciU, &regU);
 	regU &= ~(unit ? 0xCA : 0x35);
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index b8094f049f3e..2c1e5f7cd261 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -81,12 +81,12 @@ static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 		(cs5520_pio_clocks[pio].assert));
 }
 
-static void cs5520_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cs5520_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	printk(KERN_ERR "cs55x0: bad ide timing.\n");
 
 	drive->pio_mode = XFER_PIO_0 + 0;
-	cs5520_set_pio_mode(drive->hwif, drive);
+	cs5520_set_pio_mode(hwif, drive);
 }
 
 static const struct ide_port_ops cs5520_port_ops = {
diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c
index 4ced40255ad6..4dc4eb92b076 100644
--- a/drivers/ide/cs5530.c
+++ b/drivers/ide/cs5530.c
@@ -100,12 +100,12 @@ out:
 	return mask;
 }
 
-static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cs5530_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long basereg;
 	unsigned int reg, timings = 0;
 
-	switch (mode) {
+	switch (drive->dma_mode) {
 		case XFER_UDMA_0:	timings = 0x00921250; break;
 		case XFER_UDMA_1:	timings = 0x00911140; break;
 		case XFER_UDMA_2:	timings = 0x00911030; break;
@@ -113,7 +113,7 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
 		case XFER_MW_DMA_1:	timings = 0x00012121; break;
 		case XFER_MW_DMA_2:	timings = 0x00002020; break;
 	}
-	basereg = CS5530_BASEREG(drive->hwif);
+	basereg = CS5530_BASEREG(hwif);
 	reg = inl(basereg + 4);			/* get drive0 config register */
 	timings |= reg & 0x80000000;		/* preserve PIO format bit */
 	if ((drive-> dn & 1) == 0) {		/* are we configuring drive0? */
diff --git a/drivers/ide/cs5535.c b/drivers/ide/cs5535.c
index 7974415ea89f..740002b2f3e8 100644
--- a/drivers/ide/cs5535.c
+++ b/drivers/ide/cs5535.c
@@ -129,15 +129,15 @@ static void cs5535_set_speed(ide_drive_t *drive, const u8 speed)
 
 /**
  *	cs5535_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Programs the chipset for DMA mode.
  */
 
-static void cs5535_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cs5535_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	cs5535_set_speed(drive, speed);
+	cs5535_set_speed(drive, drive->dma_mode);
 }
 
 /**
diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c
index b518ef0e9a35..70871fbc3c0a 100644
--- a/drivers/ide/cs5536.c
+++ b/drivers/ide/cs5536.c
@@ -173,11 +173,11 @@ static void cs5536_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	cs5536_set_dma_mode		-	DMA timing setup
+ *	@hwif: ATA port
  *	@drive: ATA device
- *	@mode: DMA mode
  */
 
-static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cs5536_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 udma_timings[6] = {
 		0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6,
@@ -187,10 +187,11 @@ static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode)
 		0x67, 0x21, 0x20,
 	};
 
-	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	int dshift = (drive->dn & 1) ? IDE_D1_SHIFT : IDE_D0_SHIFT;
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u32 etc;
+	const u8 mode = drive->dma_mode;
 
 	cs5536_read(pdev, ETC, &etc);
 
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index ead65c394f00..9383f67deae1 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -53,9 +53,9 @@
  * set DMA mode a specific channel for CY82C693
  */
 
-static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cy82c693_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
+	const u8 mode = drive->dma_mode;
 	u8 single = (mode & 0x10) >> 4, index = 0, data = 0;
 
 	index = hwif->channel ? CY82_INDEX_CHANNEL1 : CY82_INDEX_CHANNEL0;
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index f1dec519a9e6..b885c1d548f5 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -627,14 +627,14 @@ static u32 get_speed_setting(u8 speed, struct hpt_info *info)
 	return info->timings->clock_table[info->clock][i];
 }
 
-static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed)
+static void hpt3xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 	struct hpt_timings *t	= info->timings;
 	u8  itr_addr		= 0x40 + (drive->dn * 4);
 	u32 old_itr		= 0;
+	const u8 speed		= drive->dma_mode;
 	u32 new_itr		= get_speed_setting(speed, info);
 	u32 itr_mask		= speed < XFER_MW_DMA_0 ? t->pio_mask :
 				 (speed < XFER_UDMA_0   ? t->dma_mask :
@@ -653,7 +653,8 @@ static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void hpt3xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	hpt3xx_set_mode(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	hpt3xx_set_mode(hwif, drive);
 }
 
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 0f67f1abbbd3..26b6c0a1f772 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -185,10 +185,11 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
  *	MW1	80	50	50	150	C
  *	MW2	70	25	25	120	C
  */
-static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
+static void icside_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long cycle_time;
 	int use_dma_info = 0;
+	const u8 xfer_mode = drive->dma_mode;
 
 	switch (xfer_mode) {
 	case XFER_MW_DMA_2:
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index a62fb03fc1cc..9b549e4d1848 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -168,11 +168,11 @@ int ide_set_dma_mode(ide_drive_t *drive, const u8 mode)
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
 		drive->dma_mode = mode;
-		port_ops->set_dma_mode(drive, mode);
+		port_ops->set_dma_mode(hwif, drive);
 		return 0;
 	} else {
 		drive->dma_mode = mode;
-		port_ops->set_dma_mode(drive, mode);
+		port_ops->set_dma_mode(hwif, drive);
 		return ide_config_drive_speed(drive, mode);
 	}
 }
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
index 9dfdc8741a7b..560e66d07659 100644
--- a/drivers/ide/it8172.c
+++ b/drivers/ide/it8172.c
@@ -77,14 +77,14 @@ static void it8172_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	pci_write_config_dword(dev, 0x44, drive_timing);
 }
 
-static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it8172_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int a_speed		= 3 << (drive->dn * 4);
 	int u_flag		= 1 << drive->dn;
 	int u_speed		= 0;
 	u8 reg48, reg4a;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_byte(dev, 0x48, &reg48);
 	pci_read_config_byte(dev, 0x4a, &reg4a);
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index 492c07d5f4f3..46816ba26416 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -74,15 +74,14 @@ static void it8213_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	it8213_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the ITE chipset for the DMA mode.
  */
 
-static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it8213_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
@@ -92,6 +91,7 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	int u_speed		= 0;
 	u16			reg4042, reg4a;
 	u8			reg48, reg54, reg55;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	pci_read_config_byte(dev, 0x48, &reg48);
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index 69becb7b9656..56b79194156b 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -393,14 +393,16 @@ static int it821x_dma_end(ide_drive_t *drive)
 
 /**
  *	it821x_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the ITE chipset for the desired DMA mode.
  */
 
-static void it821x_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it821x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 speed = drive->dma_mode;
+
 	/*
 	 * MWDMA tuning is really hard because our MWDMA and PIO
 	 * timings are kept in the same place.  We can switch in the
diff --git a/drivers/ide/jmicron.c b/drivers/ide/jmicron.c
index ebffb904ed24..74c2c4a6d909 100644
--- a/drivers/ide/jmicron.c
+++ b/drivers/ide/jmicron.c
@@ -86,13 +86,13 @@ static void jmicron_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	jmicron_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@mode: DMA mode
  *
  *	As the JMicron snoops for timings we don't need to do anything here.
  */
 
-static void jmicron_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void jmicron_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 0f262d07c378..35448c91b8c8 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -188,10 +188,11 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate,
 	writel(val32, base + BK3710_REGRCVR);
 }
 
-static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed)
+static void palm_bk3710_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int is_slave = drive->dn & 1;
-	void __iomem *base = (void *)drive->hwif->dma_base;
+	void __iomem *base = (void *)hwif->dma_base;
+	const u8 xferspeed = drive->dma_mode;
 
 	if (xferspeed >= XFER_UDMA_0) {
 		palm_bk3710_setudmamode(base, is_slave,
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index 874acd2bb6e6..9546fe2a93f7 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -129,11 +129,11 @@ static struct udma_timing {
 	{ 0x1a, 0x01, 0xcb },	/* UDMA mode 6 */
 };
 
-static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void pdcnew_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 adj			= (drive->dn & 1) ? 0x08 : 0x00;
+	const u8 speed		= drive->dma_mode;
 
 	/*
 	 * IDE core issues SETFEATURES_XFER to the drive first (thanks to
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 402aab7f3baa..07cd37516ba6 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -21,11 +21,11 @@
 
 #define DRV_NAME "pdc202xx_old"
 
-static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
+static void pdc202xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 drive_pci		= 0x60 + (drive->dn << 2);
+	const u8 speed		= drive->dma_mode;
 
 	u8			AP = 0, BP = 0, CP = 0;
 	u8			TA = 0, TB = 0, TC = 0;
@@ -78,7 +78,8 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void pdc202xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	pdc202xx_set_mode(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	pdc202xx_set_mode(hwif, drive);
 }
 
 static int pdc202xx_test_irq(ide_hwif_t *hwif)
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index 64b3041daa60..1bdca49e5a03 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -127,16 +127,15 @@ static void piix_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	piix_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Set a PIIX host controller to the desired DMA mode.  This involves
  *	programming the right timing data into the PCI configuration space.
  */
 
-static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void piix_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
@@ -147,6 +146,7 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	int			sitre;
 	u16			reg4042, reg4a;
 	u8			reg48, reg54, reg55;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	sitre = (reg4042 & 0x4000) ? 1 : 0;
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index a167968a2d42..9fae1fb1468b 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -777,14 +777,14 @@ set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2,
 #endif	
 }
 
-static void pmac_ide_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void pmac_ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
 	int ret = 0;
 	u32 *timings, *timings2, tl[2];
 	u8 unit = drive->dn & 1;
+	const u8 speed = drive->dma_mode;
 
 	timings = &pmif->timings[unit];
 	timings2 = &pmif->timings[unit+2];
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index bb0166e460ab..134f1fd13866 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -122,13 +122,13 @@ out:
 	return mask;
 }
 
-static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void sc1200_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t		*hwif = drive->hwif;
 	struct pci_dev		*dev = to_pci_dev(hwif->dev);
 	unsigned int		reg, timings;
 	unsigned short		pci_clock;
 	unsigned int		basereg = hwif->channel ? 0x50 : 0x40;
+	const u8		mode = drive->dma_mode;
 
 	static const u32 udma_timing[3][3] = {
 		{ 0x00921250, 0x00911140, 0x00911030 },
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 23e16e4460ee..e9d4b441d1c3 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -231,16 +231,15 @@ static void scc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	scc_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void scc_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -254,6 +253,7 @@ static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	int offset, idx;
 	unsigned long reg;
 	unsigned long jcactsel;
+	const u8 speed = drive->dma_mode;
 
 	reg = in_be32((void __iomem *)cckctrl_port);
 	if (reg & CCKCTRL_ATACLKOEN) {
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
index a56bc51ae032..35fb8dabb55d 100644
--- a/drivers/ide/serverworks.c
+++ b/drivers/ide/serverworks.c
@@ -128,14 +128,14 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	}
 }
 
-static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 udma_modes[]		= { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05 };
 	static const u8 dma_modes[]		= { 0x77, 0x21, 0x20 };
 	static const u8 drive_pci2[]		= { 0x45, 0x44, 0x47, 0x46 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	const u8 speed		= drive->dma_mode;
 	u8 unit			= drive->dn & 1;
 
 	u8 ultra_enable	 = 0, ultra_timing = 0, dma_timing = 0;
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index b7d61dc64096..e3ea591f66d3 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -255,7 +255,7 @@ static int sgiioc4_dma_end(ide_drive_t *drive)
 	return dma_stat;
 }
 
-static void sgiioc4_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sgiioc4_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 97266958f744..2009ac2ff658 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -289,19 +289,18 @@ static void sil_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	sil_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the SiI chipset for the desired DMA mode.
  */
 
-static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sil_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 ultra6[] = { 0x0F, 0x0B, 0x07, 0x05, 0x03, 0x02, 0x01 };
 	static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 };
 	static const u16 dma[]	 = { 0x2208, 0x10C2, 0x10C1 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	unsigned long base	= (unsigned long)hwif->hwif_data;
 	u16 ultra = 0, multi	= 0;
@@ -311,6 +310,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
 						: (mmio ? 0xB4 : 0x80);
 	unsigned long ma	= siimage_seldev(drive, 0x08);
 	unsigned long ua	= siimage_seldev(drive, 0x0C);
+	const u8 speed		= drive->dma_mode;
 
 	scsc  = sil_ioread8 (dev, base + (mmio ? 0x4A : 0x8A));
 	mode  = sil_ioread8 (dev, base + addr_mask);
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index 5a0192060531..db7f4e761dbc 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -340,8 +340,10 @@ static void sis_program_udma_timings(ide_drive_t *drive, const u8 mode)
 		sis_ata33_program_udma_timings(drive, mode);
 }
 
-static void sis_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sis_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 speed = drive->dma_mode;
+
 	if (speed >= XFER_UDMA_0)
 		sis_program_udma_timings(drive, speed);
 	else
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 419cd3bc6c84..f21dc2ad7682 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -92,11 +92,12 @@ static void sl82c105_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 /*
  * Configure the chipset for DMA mode.
  */
-static void sl82c105_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sl82c105_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static u16 mwdma_timings[] = {0x0707, 0x0201, 0x0200};
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u16 drv_ctrl;
+	const u8 speed = drive->dma_mode;
 
 	drv_ctrl = mwdma_timings[speed - XFER_MW_DMA_0];
 
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index 019777522cd2..864ffe0e26d9 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -72,14 +72,14 @@ static void slc90e66_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	spin_unlock_irqrestore(&slc90e66_lock, flags);
 }
 
-static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void slc90e66_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int sitre = 0, a_speed	= 7 << (drive->dn * 4);
 	int u_speed = 0, u_flag = 1 << drive->dn;
 	u16			reg4042, reg44, reg48, reg4a;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	sitre = (reg4042 & 0x4000) ? 1 : 0;
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index f2cb62bf3f22..e444d24934b3 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -13,11 +13,11 @@
 
 #define DRV_NAME "tc86c001"
 
-static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
+static void tc86c001_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long scr_port	= hwif->config_data + (drive->dn ? 0x02 : 0x00);
 	u16 mode, scr		= inw(scr_port);
+	const u8 speed		= drive->dma_mode;
 
 	switch (speed) {
 	case XFER_UDMA_4:	mode = 0x00c0; break;
@@ -43,7 +43,8 @@ static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void tc86c001_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	tc86c001_set_mode(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	tc86c001_set_mode(hwif, drive);
 }
 
 /*
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index d34a7eecdea5..7953447eae0f 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -34,9 +34,8 @@
 
 #define DRV_NAME "triflex"
 
-static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
+static void triflex_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u32 triflex_timings = 0;
 	u16 timing = 0;
@@ -44,7 +43,7 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 
 	pci_read_config_dword(dev, channel_offset, &triflex_timings);
 
-	switch(speed) {
+	switch (drive->dma_mode) {
 		case XFER_MW_DMA_2:
 			timing = 0x0103; 
 			break;
@@ -84,7 +83,8 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void triflex_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	triflex_set_mode(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	triflex_set_mode(hwif, drive);
 }
 
 static const struct ide_port_ops triflex_port_ops = {
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 5228a4786de5..f210633a3d57 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -125,10 +125,10 @@ static void tx4939ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	/* tx4939ide_tf_load_fixup() will set the Sys_Ctl register */
 }
 
-static void tx4939ide_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void tx4939ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	u32 mask, val;
+	const u8 mode = drive->dma_mode;
 
 	/* Update Data Transfer Mode for this drive. */
 	if (mode >= XFER_UDMA_0)
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index 6d995fc9d4f5..6769fe252b07 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -169,22 +169,22 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
 
 /**
  *	via_set_drive		-	configure transfer mode
+ *	@hwif: port
  *	@drive: Drive to set up
- *	@speed: desired speed
  *
  *	via_set_drive() computes timing values configures the chipset to
  *	a desired transfer mode.  It also can be called by upper layers.
  */
 
-static void via_set_drive(ide_drive_t *drive, const u8 speed)
+static void via_set_drive(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	ide_drive_t *peer = ide_get_pair_dev(drive);
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	struct ide_host *host = pci_get_drvdata(dev);
 	struct via82cxxx_dev *vdev = host->host_priv;
 	struct ide_timing t, p;
 	unsigned int T, UT;
+	const u8 speed = drive->dma_mode;
 
 	T = 1000000000 / via_clock;
 
@@ -216,7 +216,8 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed)
 
 static void via_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	via_set_drive(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	via_set_drive(hwif, drive);
 }
 
 static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 803ec306883c..53ecdba82d72 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -625,7 +625,7 @@ extern const struct ide_tp_ops default_tp_ops;
 struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
 	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
-	void	(*set_dma_mode)(ide_drive_t *, const u8);
+	void	(*set_dma_mode)(struct hwif_s *, ide_drive_t *);
 	int	(*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
 	void	(*resetproc)(ide_drive_t *);
-- 
cgit v1.2.3


From 220c58bc6d1198c4c4e69a385d364602c38b6b1c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 18 Jan 2010 07:22:38 +0000
Subject: ide: make ide_get_best_pio_mode() static

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-xfer-mode.c | 3 +--
 include/linux/ide.h         | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index 9b549e4d1848..5fc8d5c17de9 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL(ide_xfer_verbose);
  *	This is used by most chipset support modules when "auto-tuning".
  */
 
-u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
+static u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
 {
 	u16 *id = drive->id;
 	int pio_mode = -1, overridden = 0;
@@ -105,7 +105,6 @@ u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
 
 	return pio_mode;
 }
-EXPORT_SYMBOL_GPL(ide_get_best_pio_mode);
 
 int ide_pio_need_iordy(ide_drive_t *drive, const u8 pio)
 {
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 53ecdba82d72..97e6ab435184 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1496,7 +1496,6 @@ int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int);
 #ifdef CONFIG_IDE_XFER_MODE
 int ide_scan_pio_blacklist(char *);
 const char *ide_xfer_verbose(u8);
-u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8);
 int ide_pio_need_iordy(ide_drive_t *, const u8);
 int ide_set_pio_mode(ide_drive_t *, u8);
 int ide_set_dma_mode(ide_drive_t *, u8);
-- 
cgit v1.2.3


From 4f9c85a1b03bfa5c0a0d8488a3a7766f3c9fb756 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Mon, 18 Jan 2010 05:37:16 +0000
Subject: phylib: Move workqueue initialization to a proper place

commit 541cd3ee00a4fe975b22fac6a3bc846bacef37f7 ("phylib: Fix deadlock
on resume") caused TI DaVinci EMAC ethernet driver to oops upon resume:

 PM: resume of devices complete after 237.098 msecs
 Restarting tasks ... done.
 kernel BUG at kernel/workqueue.c:354!
 Unable to handle kernel NULL pointer dereference at virtual address 00000000
 [...]
 Backtrace:
 [<c002c598>] (__bug+0x0/0x2c) from [<c0052a54>] (queue_delayed_work_on+0x74/0xf8)
 [<c00529e0>] (queue_delayed_work_on+0x0/0xf8) from [<c0052b30>] (queue_delayed_work+0x2c/0x30)

The oops pops up because TI DaVinci EMAC driver detaches PHY on
suspend and attaches it back on resume. Attaching makes phylib call
phy_start_machine() that initializes a workqueue. On the other hand,
PHY's resume routine will call phy_start_machine() again, and that
will cause the oops since we just destroyed the already scheduled
workqueue.

This patch fixes the issue by moving workqueue initialization to
phy_device_create().

p.s. We don't see this oops with ucc_geth and gianfar drivers because
they perform a fine-grained suspend, i.e. they just stop the PHYs
without detaching.

Reported-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 4 +---
 drivers/net/phy/phy_device.c | 1 +
 include/linux/phy.h          | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index b0e9f9c51721..0295097d6c44 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -410,7 +410,6 @@ EXPORT_SYMBOL(phy_start_aneg);
 
 
 static void phy_change(struct work_struct *work);
-static void phy_state_machine(struct work_struct *work);
 
 /**
  * phy_start_machine - start PHY state machine tracking
@@ -430,7 +429,6 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
 
-	INIT_DELAYED_WORK(&phydev->state_queue, phy_state_machine);
 	schedule_delayed_work(&phydev->state_queue, HZ);
 }
 
@@ -761,7 +759,7 @@ EXPORT_SYMBOL(phy_start);
  * phy_state_machine - Handle the state machine
  * @work: work_struct that describes the work to be done
  */
-static void phy_state_machine(struct work_struct *work)
+void phy_state_machine(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct phy_device *phydev =
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8212b2b93422..adbc0fded130 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -177,6 +177,7 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 	dev->state = PHY_DOWN;
 
 	mutex_init(&dev->lock);
+	INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
 
 	return dev;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7968defd2fa7..6a7eb402165d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -485,6 +485,7 @@ void phy_driver_unregister(struct phy_driver *drv);
 int phy_driver_register(struct phy_driver *new_driver);
 void phy_prepare_link(struct phy_device *phydev,
 		void (*adjust_link)(struct net_device *));
+void phy_state_machine(struct work_struct *work);
 void phy_start_machine(struct phy_device *phydev,
 		void (*handler)(struct net_device *));
 void phy_stop_machine(struct phy_device *phydev);
-- 
cgit v1.2.3


From 11380a4b2d86fae9a6bce75c9373668cc323fe57 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Jan 2010 13:46:10 -0800
Subject: net: Unexport napi_gro_flush().

Nothing outside of net/core/dev.c uses it.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 -
 net/core/dev.c            | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a3fccc85b1a0..468a11dea58c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1527,7 +1527,6 @@ extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
 #define HAVE_NETIF_RECEIVE_SKB 1
 extern int		netif_receive_skb(struct sk_buff *skb);
-extern void		napi_gro_flush(struct napi_struct *napi);
 extern gro_result_t	dev_gro_receive(struct napi_struct *napi,
 					struct sk_buff *skb);
 extern gro_result_t	napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index a008f6987a95..5747b9edc1bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2582,7 +2582,7 @@ out:
 	return netif_receive_skb(skb);
 }
 
-void napi_gro_flush(struct napi_struct *napi)
+static void napi_gro_flush(struct napi_struct *napi)
 {
 	struct sk_buff *skb, *next;
 
@@ -2595,7 +2595,6 @@ void napi_gro_flush(struct napi_struct *napi)
 	napi->gro_count = 0;
 	napi->gro_list = NULL;
 }
-EXPORT_SYMBOL(napi_gro_flush);
 
 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 552e450929a7298cc8834fd2824a60b2e914f70e Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 20 Jan 2010 13:49:45 -0700
Subject: spi/dw_spi: refine the IRQ mode working flow

Now dw_spi core fully supports 3 transfer modes: pure polling,
DMA and IRQ mode. IRQ mode will use the FIFO half empty as
the IRQ trigger, so each interface driver need set the fifo_len,
so that core driver can handle it properly

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/dw_spi.c       | 64 ++++++++++++++++++++++++++++------------------
 drivers/spi/dw_spi_pci.c   |  1 +
 include/linux/spi/dw_spi.h |  1 +
 3 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c
index 521d680af289..1bb709b3920f 100644
--- a/drivers/spi/dw_spi.c
+++ b/drivers/spi/dw_spi.c
@@ -358,6 +358,8 @@ static void transfer_complete(struct dw_spi *dws)
 static irqreturn_t interrupt_transfer(struct dw_spi *dws)
 {
 	u16 irq_status, irq_mask = 0x3f;
+	u32 int_level = dws->fifo_len / 2;
+	u32 left;
 
 	irq_status = dw_readw(dws, isr) & irq_mask;
 	/* Error handling */
@@ -369,22 +371,23 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws)
 		return IRQ_HANDLED;
 	}
 
-	/* INT comes from tx */
-	if (dws->tx && (irq_status & SPI_INT_TXEI)) {
-		while (dws->tx < dws->tx_end)
+	if (irq_status & SPI_INT_TXEI) {
+		spi_mask_intr(dws, SPI_INT_TXEI);
+
+		left = (dws->tx_end - dws->tx) / dws->n_bytes;
+		left = (left > int_level) ? int_level : left;
+
+		while (left--)
 			dws->write(dws);
+		dws->read(dws);
 
-		if (dws->tx == dws->tx_end) {
-			spi_mask_intr(dws, SPI_INT_TXEI);
+		/* Re-enable the IRQ if there is still data left to tx */
+		if (dws->tx_end > dws->tx)
+			spi_umask_intr(dws, SPI_INT_TXEI);
+		else
 			transfer_complete(dws);
-		}
 	}
 
-	/* INT comes from rx */
-	if (dws->rx && (irq_status & SPI_INT_RXFI)) {
-		if (dws->read(dws))
-			transfer_complete(dws);
-	}
 	return IRQ_HANDLED;
 }
 
@@ -428,6 +431,7 @@ static void pump_transfers(unsigned long data)
 	u8 bits = 0;
 	u8 imask = 0;
 	u8 cs_change = 0;
+	u16 txint_level = 0;
 	u16 clk_div = 0;
 	u32 speed = 0;
 	u32 cr0 = 0;
@@ -438,6 +442,9 @@ static void pump_transfers(unsigned long data)
 	chip = dws->cur_chip;
 	spi = message->spi;
 
+	if (unlikely(!chip->clk_div))
+		chip->clk_div = dws->max_freq / chip->speed_hz;
+
 	if (message->state == ERROR_STATE) {
 		message->status = -EIO;
 		goto early_exit;
@@ -492,7 +499,7 @@ static void pump_transfers(unsigned long data)
 
 			/* clk_div doesn't support odd number */
 			clk_div = dws->max_freq / speed;
-			clk_div = (clk_div >> 1) << 1;
+			clk_div = (clk_div + 1) & 0xfffe;
 
 			chip->speed_hz = speed;
 			chip->clk_div = clk_div;
@@ -535,11 +542,16 @@ static void pump_transfers(unsigned long data)
 	/* Check if current transfer is a DMA transaction */
 	dws->dma_mapped = map_dma_buffers(dws);
 
+	/*
+	 * Interrupt mode
+	 * we only need set the TXEI IRQ, as TX/RX always happen syncronizely
+	 */
 	if (!dws->dma_mapped && !chip->poll_mode) {
-		if (dws->rx)
-			imask |= SPI_INT_RXFI;
-		if (dws->tx)
-			imask |= SPI_INT_TXEI;
+		int templen = dws->len / dws->n_bytes;
+		txint_level = dws->fifo_len / 2;
+		txint_level = (templen > txint_level) ? txint_level : templen;
+
+		imask |= SPI_INT_TXEI;
 		dws->transfer_handler = interrupt_transfer;
 	}
 
@@ -549,21 +561,23 @@ static void pump_transfers(unsigned long data)
 	 *	2. clk_div is changed
 	 *	3. control value changes
 	 */
-	if (dw_readw(dws, ctrl0) != cr0 || cs_change || clk_div) {
+	if (dw_readw(dws, ctrl0) != cr0 || cs_change || clk_div || imask) {
 		spi_enable_chip(dws, 0);
 
 		if (dw_readw(dws, ctrl0) != cr0)
 			dw_writew(dws, ctrl0, cr0);
 
+		spi_set_clk(dws, clk_div ? clk_div : chip->clk_div);
+		spi_chip_sel(dws, spi->chip_select);
+
 		/* Set the interrupt mask, for poll mode just diable all int */
 		spi_mask_intr(dws, 0xff);
-		if (!chip->poll_mode)
+		if (imask)
 			spi_umask_intr(dws, imask);
+		if (txint_level)
+			dw_writew(dws, txfltr, txint_level);
 
-		spi_set_clk(dws, clk_div ? clk_div : chip->clk_div);
-		spi_chip_sel(dws, spi->chip_select);
 		spi_enable_chip(dws, 1);
-
 		if (cs_change)
 			dws->prev_chip = chip;
 	}
@@ -712,11 +726,11 @@ static int dw_spi_setup(struct spi_device *spi)
 	}
 	chip->bits_per_word = spi->bits_per_word;
 
+	if (!spi->max_speed_hz) {
+		dev_err(&spi->dev, "No max speed HZ parameter\n");
+		return -EINVAL;
+	}
 	chip->speed_hz = spi->max_speed_hz;
-	if (chip->speed_hz)
-		chip->clk_div = 25000000 / chip->speed_hz;
-	else
-		chip->clk_div = 8;	/* default value */
 
 	chip->tmode = 0; /* Tx & Rx */
 	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
diff --git a/drivers/spi/dw_spi_pci.c b/drivers/spi/dw_spi_pci.c
index 7980f1443ce1..1f0735f9cc76 100644
--- a/drivers/spi/dw_spi_pci.c
+++ b/drivers/spi/dw_spi_pci.c
@@ -73,6 +73,7 @@ static int __devinit spi_pci_probe(struct pci_dev *pdev,
 	dws->num_cs = 4;
 	dws->max_freq = 25000000;	/* for Moorestwon */
 	dws->irq = pdev->irq;
+	dws->fifo_len = 40;		/* FIFO has 40 words buffer */
 
 	ret = dw_spi_add_host(dws);
 	if (ret)
diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h
index 51b3e771a9a3..1a127a31e017 100644
--- a/include/linux/spi/dw_spi.h
+++ b/include/linux/spi/dw_spi.h
@@ -90,6 +90,7 @@ struct dw_spi {
 	unsigned long		paddr;
 	u32			iolen;
 	int			irq;
+	u32			fifo_len;	/* depth of the FIFO buffer */
 	u32			max_freq;	/* max bus freq supported */
 
 	u16			bus_num;
-- 
cgit v1.2.3


From 04a723ea9c53ba608b0411aa36948bb57c51a08e Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 6 Jan 2010 10:16:51 -0800
Subject: USB: Fix duplicate sysfs problem after device reset.

Borislav Petkov reports issues with duplicate sysfs endpoint files after a
resume from a hibernate.  It turns out that the code to support alternate
settings under xHCI has issues when a device with a non-default alternate
setting is reset during the hibernate:

[  427.681810] Restarting tasks ...
[  427.681995] hub 1-0:1.0: state 7 ports 6 chg 0004 evt 0000
[  427.682019] usb usb3: usb resume
[  427.682030] ohci_hcd 0000:00:12.0: wakeup root hub
[  427.682191] hub 1-0:1.0: port 2, status 0501, change 0000, 480 Mb/s
[  427.682205] usb 1-2: usb wakeup-resume
[  427.682226] usb 1-2: finish reset-resume
[  427.682886] done.
[  427.734658] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.734663] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.746682] hub 3-0:1.0: hub_reset_resume
[  427.746693] hub 3-0:1.0: trying to enable port power on non-switchable hub
[  427.786715] usb 1-2: reset high speed USB device using ehci_hcd and address 2
[  427.839653] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.839666] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.847717] ohci_hcd 0000:00:12.0: GetStatus roothub.portstatus [1] = 0x00010100 CSC PPS
[  427.915497] hub 1-2:1.0: remove_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 1
[  427.915774] hub 1-2:1.0: remove_intf_ep_devs: bNumEndpoints: 1
[  427.915934] hub 1-2:1.0: if: ffff88022f9e8800: endpoint devs removed.
[  427.916158] hub 1-2:1.0: create_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 0, ->unregistering: 0
[  427.916434] hub 1-2:1.0: create_intf_ep_devs: bNumEndpoints: 1
[  427.916609]  ep_81: create, parent hub
[  427.916632] ------------[ cut here ]------------
[  427.916644] WARNING: at fs/sysfs/dir.c:477 sysfs_add_one+0x82/0x96()
[  427.916649] Hardware name: System Product Name
[  427.916653] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:12.2/usb1/1-2/1-2:1.0/ep_81'
[  427.916658] Modules linked in: binfmt_misc kvm_amd kvm powernow_k8 cpufreq_ondemand cpufreq_powersave cpufreq_userspace freq_table cpufreq_conservative ipv6 vfat fat
+8250_pnp 8250 pcspkr ohci_hcd serial_core k10temp edac_core
[  427.916694] Pid: 278, comm: khubd Not tainted 2.6.33-rc2-00187-g08d869a-dirty #13
[  427.916699] Call Trace:

The problem is caused by a mismatch between the USB core's view of the
device state and the USB device and xHCI host's view of the device state.

After the device reset and re-configuration, the device and the xHCI host
think they are using alternate setting 0 of all interfaces.  However, the
USB core keeps track of the old state, which may include non-zero
alternate settings.  It uses intf->cur_altsetting to keep the endpoint
sysfs files for the old state across the reset.

The bandwidth allocation functions need to know what the xHCI host thinks
the current alternate settings are, so original patch set
intf->cur_altsetting to the alternate setting 0.  This caused duplicate
endpoint files to be created.

The solution is to not set intf->cur_altsetting before calling
usb_set_interface() in usb_reset_and_verify_device().  Instead, we add a
new flag to struct usb_interface to tell usb_hcd_alloc_bandwidth() to use
alternate setting 0 as the currently installed alternate setting.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Tested-by: Borislav Petkov <petkovbb@googlemail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c | 18 ++++++++++++++++++
 drivers/usb/core/hub.c | 15 +++++----------
 include/linux/usb.h    |  1 +
 3 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0495fa651225..80995ef0868c 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1684,6 +1684,24 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev,
 		}
 	}
 	if (cur_alt && new_alt) {
+		struct usb_interface *iface = usb_ifnum_to_if(udev,
+				cur_alt->desc.bInterfaceNumber);
+
+		if (iface->resetting_device) {
+			/*
+			 * The USB core just reset the device, so the xHCI host
+			 * and the device will think alt setting 0 is installed.
+			 * However, the USB core will pass in the alternate
+			 * setting installed before the reset as cur_alt.  Dig
+			 * out the alternate setting 0 structure, or the first
+			 * alternate setting if a broken device doesn't have alt
+			 * setting 0.
+			 */
+			cur_alt = usb_altnum_to_altsetting(iface, 0);
+			if (!cur_alt)
+				cur_alt = &iface->altsetting[0];
+		}
+
 		/* Drop all the endpoints in the current alt setting */
 		for (i = 0; i < cur_alt->desc.bNumEndpoints; i++) {
 			ret = hcd->driver->drop_endpoint(hcd, udev,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b9f5fcd713e2..35cc8b9ba1f5 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3695,19 +3695,14 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 			usb_enable_interface(udev, intf, true);
 			ret = 0;
 		} else {
-			/* We've just reset the device, so it will think alt
-			 * setting 0 is installed.  For usb_set_interface() to
-			 * work properly, we need to set the current alternate
-			 * interface setting to 0 (or the first alt setting, if
-			 * the device doesn't have alt setting 0).
+			/* Let the bandwidth allocation function know that this
+			 * device has been reset, and it will have to use
+			 * alternate setting 0 as the current alternate setting.
 			 */
-			intf->cur_altsetting =
-				usb_find_alt_setting(config, i, 0);
-			if (!intf->cur_altsetting)
-				intf->cur_altsetting =
-					&config->intf_cache[i]->altsetting[0];
+			intf->resetting_device = 1;
 			ret = usb_set_interface(udev, desc->bInterfaceNumber,
 					desc->bAlternateSetting);
+			intf->resetting_device = 0;
 		}
 		if (ret < 0) {
 			dev_err(&udev->dev, "failed to restore interface %d "
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e101a2d04d75..d7ace1b80f09 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -192,6 +192,7 @@ struct usb_interface {
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
 	unsigned needs_binding:1;	/* needs delayed unbind/rebind */
 	unsigned reset_running:1;
+	unsigned resetting_device:1;	/* true: bandwidth alloc after reset */
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-- 
cgit v1.2.3


From 3bf127637e22ddf95e67e10a23c339cee3d52429 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Thu, 21 Jan 2010 00:02:36 -0800
Subject: Input: sh_keysc - add mode 4 and mode 5 support

Add Mode 4 and Mode 5 support to the SH_KEYSC driver. These modes allow
slightly larger key pad matrixes.

While at it, make use of resource_size().

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/sh_keysc.c | 6 +++---
 include/linux/input/sh_keysc.h    | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index 076111fc72d2..25706f802258 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c
@@ -36,6 +36,8 @@ static const struct {
 	[SH_KEYSC_MODE_1] = { 0, 6, 5 },
 	[SH_KEYSC_MODE_2] = { 1, 5, 6 },
 	[SH_KEYSC_MODE_3] = { 2, 4, 7 },
+	[SH_KEYSC_MODE_4] = { 3, 6, 6 },
+	[SH_KEYSC_MODE_5] = { 4, 6, 7 },
 };
 
 struct sh_keysc_priv {
@@ -122,8 +124,6 @@ static irqreturn_t sh_keysc_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-#define res_size(res) ((res)->end - (res)->start + 1)
-
 static int __devinit sh_keysc_probe(struct platform_device *pdev)
 {
 	struct sh_keysc_priv *priv;
@@ -164,7 +164,7 @@ static int __devinit sh_keysc_probe(struct platform_device *pdev)
 	memcpy(&priv->pdata, pdev->dev.platform_data, sizeof(priv->pdata));
 	pdata = &priv->pdata;
 
-	priv->iomem_base = ioremap_nocache(res->start, res_size(res));
+	priv->iomem_base = ioremap_nocache(res->start, resource_size(res));
 	if (priv->iomem_base == NULL) {
 		dev_err(&pdev->dev, "failed to remap I/O memory\n");
 		error = -ENXIO;
diff --git a/include/linux/input/sh_keysc.h b/include/linux/input/sh_keysc.h
index c211b5cf08e6..2aff38bcf2ba 100644
--- a/include/linux/input/sh_keysc.h
+++ b/include/linux/input/sh_keysc.h
@@ -1,10 +1,11 @@
 #ifndef __SH_KEYSC_H__
 #define __SH_KEYSC_H__
 
-#define SH_KEYSC_MAXKEYS 30
+#define SH_KEYSC_MAXKEYS 42
 
 struct sh_keysc_info {
-	enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3 } mode;
+	enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3,
+	       SH_KEYSC_MODE_4, SH_KEYSC_MODE_5 } mode;
 	int scan_timing; /* 0 -> 7, see KYCR1, SCN[2:0] */
 	int delay;
 	int kycr2_delay;
-- 
cgit v1.2.3


From 50b926e439620c469565e8be0f28be78f5fca1ce Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 4 Jan 2010 14:44:56 +0100
Subject: sched: Fix vmark regression on big machines

SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't
enabled, leading to many cache misses on large machines as we traverse
looking for an idle shared cache to wake to.  Change the enabler of
select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the
sibling domain level.

Reported-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1262612696.15495.15.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/topology.h | 2 +-
 kernel/sched_fair.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 57e63579bfdd..5b81156780b1 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
-				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 42ac3c9f66f6..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1508,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 			 * If there's an idle sibling in this domain, make that
 			 * the wake_affine target instead of the current cpu.
 			 */
-			if (tmp->flags & SD_PREFER_SIBLING)
+			if (tmp->flags & SD_SHARE_PKG_RESOURCES)
 				target = select_idle_sibling(p, tmp, target);
 
 			if (target >= 0) {
-- 
cgit v1.2.3


From 3d45fd804a95055ecab5b3eed81f5ab2dbb047a2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Dec 2009 17:12:46 +0100
Subject: sched: Remove the sched_class load_balance methods

Take out the sched_class methods for load-balancing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  8 ------
 kernel/sched.c          | 26 -------------------
 kernel/sched_fair.c     | 66 +++++++++++++++++++++++++++----------------------
 kernel/sched_idletask.c | 21 ----------------
 kernel/sched_rt.c       | 20 ---------------
 5 files changed, 37 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2f842db03ce..50d685cde70e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1087,14 +1087,6 @@ struct sched_class {
 #ifdef CONFIG_SMP
 	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
 
-	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
-			struct rq *busiest, unsigned long max_load_move,
-			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *all_pinned, int *this_best_prio);
-
-	int (*move_one_task) (struct rq *this_rq, int this_cpu,
-			      struct rq *busiest, struct sched_domain *sd,
-			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
 	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
diff --git a/kernel/sched.c b/kernel/sched.c
index 13a2acf18b2d..c0be07932a8d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1390,32 +1390,6 @@ static const u32 prio_to_wmult[40] = {
  /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
 };
 
-static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
-
-/*
- * runqueue iterator, to support SMP load-balancing between different
- * scheduling classes, without having to expose their internal data
- * structures to the load-balancing proper:
- */
-struct rq_iterator {
-	void *arg;
-	struct task_struct *(*start)(void *);
-	struct task_struct *(*next)(void *);
-};
-
-#ifdef CONFIG_SMP
-static unsigned long
-balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-	      unsigned long max_load_move, struct sched_domain *sd,
-	      enum cpu_idle_type idle, int *all_pinned,
-	      int *this_best_prio, struct rq_iterator *iterator);
-
-static int
-iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		   struct sched_domain *sd, enum cpu_idle_type idle,
-		   struct rq_iterator *iterator);
-#endif
-
 /* Time spent by the tasks of the cpu accounting group executing in ... */
 enum cpuacct_stat_index {
 	CPUACCT_STAT_USER,	/* ... user mode */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5116b81d7727..faf9a2f099ab 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1851,6 +1851,24 @@ static struct task_struct *load_balance_next_fair(void *arg)
 	return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
 }
 
+/*
+ * runqueue iterator, to support SMP load-balancing between different
+ * scheduling classes, without having to expose their internal data
+ * structures to the load-balancing proper:
+ */
+struct rq_iterator {
+	void *arg;
+	struct task_struct *(*start)(void *);
+	struct task_struct *(*next)(void *);
+};
+
+static unsigned long
+balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+		unsigned long max_load_move, struct sched_domain *sd,
+		enum cpu_idle_type idle, int *all_pinned,
+		int *this_best_prio, struct rq_iterator *iterator);
+
+
 static unsigned long
 __load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		unsigned long max_load_move, struct sched_domain *sd,
@@ -1929,8 +1947,20 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 #endif
 
 static int
-move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		   struct sched_domain *sd, enum cpu_idle_type idle)
+iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+		struct sched_domain *sd, enum cpu_idle_type idle,
+		struct rq_iterator *iterator);
+
+/*
+ * move_one_task tries to move exactly one task from busiest to this_rq, as
+ * part of active balancing operations within "domain".
+ * Returns 1 if successful and 0 otherwise.
+ *
+ * Called with both runqueues locked.
+ */
+static int
+move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+	      struct sched_domain *sd, enum cpu_idle_type idle)
 {
 	struct cfs_rq *busy_cfs_rq;
 	struct rq_iterator cfs_rq_iterator;
@@ -2094,16 +2124,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		      struct sched_domain *sd, enum cpu_idle_type idle,
 		      int *all_pinned)
 {
-	const struct sched_class *class = sched_class_highest;
-	unsigned long total_load_moved = 0;
+	unsigned long total_load_moved = 0, load_moved;
 	int this_best_prio = this_rq->curr->prio;
 
 	do {
-		total_load_moved +=
-			class->load_balance(this_rq, this_cpu, busiest,
+		load_moved = load_balance_fair(this_rq, this_cpu, busiest,
 				max_load_move - total_load_moved,
 				sd, idle, all_pinned, &this_best_prio);
-		class = class->next;
+
+		total_load_moved += load_moved;
 
 #ifdef CONFIG_PREEMPT
 		/*
@@ -2114,7 +2143,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
 			break;
 #endif
-	} while (class && max_load_move > total_load_moved);
+	} while (load_moved && max_load_move > total_load_moved);
 
 	return total_load_moved > 0;
 }
@@ -2145,25 +2174,6 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	return 0;
 }
 
-/*
- * move_one_task tries to move exactly one task from busiest to this_rq, as
- * part of active balancing operations within "domain".
- * Returns 1 if successful and 0 otherwise.
- *
- * Called with both runqueues locked.
- */
-static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
-			 struct sched_domain *sd, enum cpu_idle_type idle)
-{
-	const struct sched_class *class;
-
-	for_each_class(class) {
-		if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
-			return 1;
-	}
-
-	return 0;
-}
 /********** Helpers for find_busiest_group ************************/
 /*
  * sd_lb_stats - Structure to store the statistics of a sched_domain
@@ -3873,8 +3883,6 @@ static const struct sched_class fair_sched_class = {
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_fair,
 
-	.load_balance		= load_balance_fair,
-	.move_one_task		= move_one_task_fair,
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
 
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 01332bfc61a7..a8a6d8a50947 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -44,24 +44,6 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
 {
 }
 
-#ifdef CONFIG_SMP
-static unsigned long
-load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		  unsigned long max_load_move,
-		  struct sched_domain *sd, enum cpu_idle_type idle,
-		  int *all_pinned, int *this_best_prio)
-{
-	return 0;
-}
-
-static int
-move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		   struct sched_domain *sd, enum cpu_idle_type idle)
-{
-	return 0;
-}
-#endif
-
 static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
 {
 }
@@ -119,9 +101,6 @@ static const struct sched_class idle_sched_class = {
 
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_idle,
-
-	.load_balance		= load_balance_idle,
-	.move_one_task		= move_one_task_idle,
 #endif
 
 	.set_curr_task          = set_curr_task_idle,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 072b3fcee8d8..502bb614e40a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1481,24 +1481,6 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
 		push_rt_tasks(rq);
 }
 
-static unsigned long
-load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		unsigned long max_load_move,
-		struct sched_domain *sd, enum cpu_idle_type idle,
-		int *all_pinned, int *this_best_prio)
-{
-	/* don't touch RT tasks */
-	return 0;
-}
-
-static int
-move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		 struct sched_domain *sd, enum cpu_idle_type idle)
-{
-	/* don't touch RT tasks */
-	return 0;
-}
-
 static void set_cpus_allowed_rt(struct task_struct *p,
 				const struct cpumask *new_mask)
 {
@@ -1746,8 +1728,6 @@ static const struct sched_class rt_sched_class = {
 #ifdef CONFIG_SMP
 	.select_task_rq		= select_task_rq_rt,
 
-	.load_balance		= load_balance_rt,
-	.move_one_task		= move_one_task_rt,
 	.set_cpus_allowed       = set_cpus_allowed_rt,
 	.rq_online              = rq_online_rt,
 	.rq_offline             = rq_offline_rt,
-- 
cgit v1.2.3


From 7c9414385ebfdd87cc542d4e7e3bb0dbb2d3ce25 Mon Sep 17 00:00:00 2001
From: Dhaval Giani <dhaval.giani@gmail.com>
Date: Wed, 20 Jan 2010 13:26:18 +0100
Subject: sched: Remove USER_SCHED

Remove the USER_SCHED feature. It has been scheduled to be removed in
2.6.34 as per http://marc.info/?l=linux-kernel&m=125728479022976&w=2

Signed-off-by: Dhaval Giani <dhaval.giani@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1263990378.24844.3.camel@localhost>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/feature-removal-schedule.txt |  15 --
 include/linux/sched.h                      |  14 +-
 init/Kconfig                               |  81 +++-----
 kernel/ksysfs.c                            |   8 -
 kernel/sched.c                             | 114 +----------
 kernel/sys.c                               |   5 -
 kernel/user.c                              | 305 -----------------------------
 7 files changed, 38 insertions(+), 504 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 870d190fe617..04a3fc3d139b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -6,21 +6,6 @@ be removed from this file.
 
 ---------------------------
 
-What:	USER_SCHED
-When:	2.6.34
-
-Why:	USER_SCHED was implemented as a proof of concept for group scheduling.
-	The effect of USER_SCHED can already be achieved from userspace with
-	the help of libcgroup. The removal of USER_SCHED will also simplify
-	the scheduler code with the removal of one major ifdef. There are also
-	issues USER_SCHED has with USER_NS. A decision was taken not to fix
-	those and instead remove USER_SCHED. Also new group scheduling
-	features will not be implemented for USER_SCHED.
-
-Who:	Dhaval Giani <dhaval@linux.vnet.ibm.com>
-
----------------------------
-
 What:	PRISM54
 When:	2.6.34
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d685cde70e..8b079735ae5f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -731,14 +731,6 @@ struct user_struct {
 	uid_t uid;
 	struct user_namespace *user_ns;
 
-#ifdef CONFIG_USER_SCHED
-	struct task_group *tg;
-#ifdef CONFIG_SYSFS
-	struct kobject kobj;
-	struct delayed_work work;
-#endif
-#endif
-
 #ifdef CONFIG_PERF_EVENTS
 	atomic_long_t locked_vm;
 #endif
@@ -2502,13 +2494,9 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
 extern void normalize_rt_tasks(void);
 
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 
 extern struct task_group init_task_group;
-#ifdef CONFIG_USER_SCHED
-extern struct task_group root_task_group;
-extern void set_tg_uid(struct user_struct *user);
-#endif
 
 extern struct task_group *sched_create_group(struct task_group *parent);
 extern void sched_destroy_group(struct task_group *tg);
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..e9fa3007a6fc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -435,57 +435,6 @@ config LOG_BUF_SHIFT
 config HAVE_UNSTABLE_SCHED_CLOCK
 	bool
 
-config GROUP_SCHED
-	bool "Group CPU scheduler"
-	depends on EXPERIMENTAL
-	default n
-	help
-	  This feature lets CPU scheduler recognize task groups and control CPU
-	  bandwidth allocation to such task groups.
-	  In order to create a group from arbitrary set of processes, use
-	  CONFIG_CGROUPS. (See Control Group support.)
-
-config FAIR_GROUP_SCHED
-	bool "Group scheduling for SCHED_OTHER"
-	depends on GROUP_SCHED
-	default GROUP_SCHED
-
-config RT_GROUP_SCHED
-	bool "Group scheduling for SCHED_RR/FIFO"
-	depends on EXPERIMENTAL
-	depends on GROUP_SCHED
-	default n
-	help
-	  This feature lets you explicitly allocate real CPU bandwidth
-	  to users or control groups (depending on the "Basis for grouping tasks"
-	  setting below. If enabled, it will also make it impossible to
-	  schedule realtime tasks for non-root users until you allocate
-	  realtime bandwidth for them.
-	  See Documentation/scheduler/sched-rt-group.txt for more information.
-
-choice
-	depends on GROUP_SCHED
-	prompt "Basis for grouping tasks"
-	default USER_SCHED
-
-config USER_SCHED
-	bool "user id"
-	help
-	  This option will choose userid as the basis for grouping
-	  tasks, thus providing equal CPU bandwidth to each user.
-
-config CGROUP_SCHED
-	bool "Control groups"
- 	depends on CGROUPS
- 	help
-	  This option allows you to create arbitrary task groups
-	  using the "cgroup" pseudo filesystem and control
-	  the cpu bandwidth allocated to each such task group.
-	  Refer to Documentation/cgroups/cgroups.txt for more
-	  information on "cgroup" pseudo filesystem.
-
-endchoice
-
 menuconfig CGROUPS
 	boolean "Control Group support"
 	help
@@ -606,6 +555,36 @@ config CGROUP_MEM_RES_CTLR_SWAP
 	  Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
 	  size is 4096bytes, 512k per 1Gbytes of swap.
 
+menuconfig CGROUP_SCHED
+	bool "Group CPU scheduler"
+	depends on EXPERIMENTAL && CGROUPS
+	default n
+	help
+	  This feature lets CPU scheduler recognize task groups and control CPU
+	  bandwidth allocation to such task groups. It uses cgroups to group
+	  tasks.
+
+if CGROUP_SCHED
+config FAIR_GROUP_SCHED
+	bool "Group scheduling for SCHED_OTHER"
+	depends on CGROUP_SCHED
+	default CGROUP_SCHED
+
+config RT_GROUP_SCHED
+	bool "Group scheduling for SCHED_RR/FIFO"
+	depends on EXPERIMENTAL
+	depends on CGROUP_SCHED
+	default n
+	help
+	  This feature lets you explicitly allocate real CPU bandwidth
+	  to users or control groups (depending on the "Basis for grouping tasks"
+	  setting below. If enabled, it will also make it impossible to
+	  schedule realtime tasks for non-root users until you allocate
+	  realtime bandwidth for them.
+	  See Documentation/scheduler/sched-rt-group.txt for more information.
+
+endif #CGROUP_SCHED
+
 endif # CGROUPS
 
 config MM_OWNER
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 3feaf5a74514..6b1ccc3f0205 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -197,16 +197,8 @@ static int __init ksysfs_init(void)
 			goto group_exit;
 	}
 
-	/* create the /sys/kernel/uids/ directory */
-	error = uids_sysfs_init();
-	if (error)
-		goto notes_exit;
-
 	return 0;
 
-notes_exit:
-	if (notes_size > 0)
-		sysfs_remove_bin_file(kernel_kobj, &notes_attr);
 group_exit:
 	sysfs_remove_group(kernel_kobj, &kernel_attr_group);
 kset_exit:
diff --git a/kernel/sched.c b/kernel/sched.c
index c0be07932a8d..41e76d325648 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -233,7 +233,7 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
  */
 static DEFINE_MUTEX(sched_domains_mutex);
 
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 
 #include <linux/cgroup.h>
 
@@ -243,13 +243,7 @@ static LIST_HEAD(task_groups);
 
 /* task group related information */
 struct task_group {
-#ifdef CONFIG_CGROUP_SCHED
 	struct cgroup_subsys_state css;
-#endif
-
-#ifdef CONFIG_USER_SCHED
-	uid_t uid;
-#endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/* schedulable entities of this group on each cpu */
@@ -274,35 +268,7 @@ struct task_group {
 	struct list_head children;
 };
 
-#ifdef CONFIG_USER_SCHED
-
-/* Helper function to pass uid information to create_sched_user() */
-void set_tg_uid(struct user_struct *user)
-{
-	user->tg->uid = user->uid;
-}
-
-/*
- * Root task group.
- *	Every UID task group (including init_task_group aka UID-0) will
- *	be a child to this group.
- */
-struct task_group root_task_group;
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/* Default task group's sched entity on each cpu */
-static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
-/* Default task group's cfs_rq on each cpu */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);
-#endif /* CONFIG_FAIR_GROUP_SCHED */
-
-#ifdef CONFIG_RT_GROUP_SCHED
-static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var);
-#endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_USER_SCHED */
 #define root_task_group init_task_group
-#endif /* CONFIG_USER_SCHED */
 
 /* task_group_lock serializes add/remove of task groups and also changes to
  * a task group's cpu shares.
@@ -318,11 +284,7 @@ static int root_task_group_empty(void)
 }
 #endif
 
-#ifdef CONFIG_USER_SCHED
-# define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
-#else /* !CONFIG_USER_SCHED */
 # define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
-#endif /* CONFIG_USER_SCHED */
 
 /*
  * A weight of 0 or 1 can cause arithmetics problems.
@@ -348,11 +310,7 @@ static inline struct task_group *task_group(struct task_struct *p)
 {
 	struct task_group *tg;
 
-#ifdef CONFIG_USER_SCHED
-	rcu_read_lock();
-	tg = __task_cred(p)->user->tg;
-	rcu_read_unlock();
-#elif defined(CONFIG_CGROUP_SCHED)
+#ifdef CONFIG_CGROUP_SCHED
 	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
 				struct task_group, css);
 #else
@@ -383,7 +341,7 @@ static inline struct task_group *task_group(struct task_struct *p)
 	return NULL;
 }
 
-#endif	/* CONFIG_GROUP_SCHED */
+#endif	/* CONFIG_CGROUP_SCHED */
 
 /* CFS-related fields in a runqueue */
 struct cfs_rq {
@@ -7678,9 +7636,6 @@ void __init sched_init(void)
 #ifdef CONFIG_RT_GROUP_SCHED
 	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
 #endif
-#ifdef CONFIG_USER_SCHED
-	alloc_size *= 2;
-#endif
 #ifdef CONFIG_CPUMASK_OFFSTACK
 	alloc_size += num_possible_cpus() * cpumask_size();
 #endif
@@ -7694,13 +7649,6 @@ void __init sched_init(void)
 		init_task_group.cfs_rq = (struct cfs_rq **)ptr;
 		ptr += nr_cpu_ids * sizeof(void **);
 
-#ifdef CONFIG_USER_SCHED
-		root_task_group.se = (struct sched_entity **)ptr;
-		ptr += nr_cpu_ids * sizeof(void **);
-
-		root_task_group.cfs_rq = (struct cfs_rq **)ptr;
-		ptr += nr_cpu_ids * sizeof(void **);
-#endif /* CONFIG_USER_SCHED */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 #ifdef CONFIG_RT_GROUP_SCHED
 		init_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -7709,13 +7657,6 @@ void __init sched_init(void)
 		init_task_group.rt_rq = (struct rt_rq **)ptr;
 		ptr += nr_cpu_ids * sizeof(void **);
 
-#ifdef CONFIG_USER_SCHED
-		root_task_group.rt_se = (struct sched_rt_entity **)ptr;
-		ptr += nr_cpu_ids * sizeof(void **);
-
-		root_task_group.rt_rq = (struct rt_rq **)ptr;
-		ptr += nr_cpu_ids * sizeof(void **);
-#endif /* CONFIG_USER_SCHED */
 #endif /* CONFIG_RT_GROUP_SCHED */
 #ifdef CONFIG_CPUMASK_OFFSTACK
 		for_each_possible_cpu(i) {
@@ -7735,22 +7676,13 @@ void __init sched_init(void)
 #ifdef CONFIG_RT_GROUP_SCHED
 	init_rt_bandwidth(&init_task_group.rt_bandwidth,
 			global_rt_period(), global_rt_runtime());
-#ifdef CONFIG_USER_SCHED
-	init_rt_bandwidth(&root_task_group.rt_bandwidth,
-			global_rt_period(), RUNTIME_INF);
-#endif /* CONFIG_USER_SCHED */
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 	list_add(&init_task_group.list, &task_groups);
 	INIT_LIST_HEAD(&init_task_group.children);
 
-#ifdef CONFIG_USER_SCHED
-	INIT_LIST_HEAD(&root_task_group.children);
-	init_task_group.parent = &root_task_group;
-	list_add(&init_task_group.siblings, &root_task_group.children);
-#endif /* CONFIG_USER_SCHED */
-#endif /* CONFIG_GROUP_SCHED */
+#endif /* CONFIG_CGROUP_SCHED */
 
 #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
 	update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
@@ -7790,25 +7722,6 @@ void __init sched_init(void)
 		 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
 		 */
 		init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL);
-#elif defined CONFIG_USER_SCHED
-		root_task_group.shares = NICE_0_LOAD;
-		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, 0, NULL);
-		/*
-		 * In case of task-groups formed thr' the user id of tasks,
-		 * init_task_group represents tasks belonging to root user.
-		 * Hence it forms a sibling of all subsequent groups formed.
-		 * In this case, init_task_group gets only a fraction of overall
-		 * system cpu resource, based on the weight assigned to root
-		 * user's cpu share (INIT_TASK_GROUP_LOAD). This is accomplished
-		 * by letting tasks of init_task_group sit in a separate cfs_rq
-		 * (init_tg_cfs_rq) and having one entity represent this group of
-		 * tasks in rq->cfs (i.e init_task_group->se[] != NULL).
-		 */
-		init_tg_cfs_entry(&init_task_group,
-				&per_cpu(init_tg_cfs_rq, i),
-				&per_cpu(init_sched_entity, i), i, 1,
-				root_task_group.se[i]);
-
 #endif
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
@@ -7817,12 +7730,6 @@ void __init sched_init(void)
 		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
 #ifdef CONFIG_CGROUP_SCHED
 		init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
-#elif defined CONFIG_USER_SCHED
-		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
-		init_tg_rt_entry(&init_task_group,
-				&per_cpu(init_rt_rq_var, i),
-				&per_cpu(init_sched_rt_entity, i), i, 1,
-				root_task_group.rt_se[i]);
 #endif
 #endif
 
@@ -8218,7 +8125,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-#ifdef CONFIG_GROUP_SCHED
+#ifdef CONFIG_CGROUP_SCHED
 static void free_sched_group(struct task_group *tg)
 {
 	free_fair_sched_group(tg);
@@ -8327,7 +8234,7 @@ void sched_move_task(struct task_struct *tsk)
 
 	task_rq_unlock(rq, &flags);
 }
-#endif /* CONFIG_GROUP_SCHED */
+#endif /* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void __set_se_shares(struct sched_entity *se, unsigned long shares)
@@ -8469,13 +8376,6 @@ static int tg_schedulable(struct task_group *tg, void *data)
 		runtime = d->rt_runtime;
 	}
 
-#ifdef CONFIG_USER_SCHED
-	if (tg == &root_task_group) {
-		period = global_rt_period();
-		runtime = global_rt_runtime();
-	}
-#endif
-
 	/*
 	 * Cannot have more runtime than the period.
 	 */
diff --git a/kernel/sys.c b/kernel/sys.c
index 26a6b73a6b85..f75bf0936f47 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -569,11 +569,6 @@ static int set_user(struct cred *new)
 	if (!new_user)
 		return -EAGAIN;
 
-	if (!task_can_switch_user(new_user, current)) {
-		free_uid(new_user);
-		return -EINVAL;
-	}
-
 	if (atomic_read(&new_user->processes) >=
 				current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
 			new_user != INIT_USER) {
diff --git a/kernel/user.c b/kernel/user.c
index 46d0165ca70c..766467b3bcb7 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -56,9 +56,6 @@ struct user_struct root_user = {
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.user_ns	= &init_user_ns,
-#ifdef CONFIG_USER_SCHED
-	.tg		= &init_task_group,
-#endif
 };
 
 /*
@@ -75,268 +72,6 @@ static void uid_hash_remove(struct user_struct *up)
 	put_user_ns(up->user_ns);
 }
 
-#ifdef CONFIG_USER_SCHED
-
-static void sched_destroy_user(struct user_struct *up)
-{
-	sched_destroy_group(up->tg);
-}
-
-static int sched_create_user(struct user_struct *up)
-{
-	int rc = 0;
-
-	up->tg = sched_create_group(&root_task_group);
-	if (IS_ERR(up->tg))
-		rc = -ENOMEM;
-
-	set_tg_uid(up);
-
-	return rc;
-}
-
-#else	/* CONFIG_USER_SCHED */
-
-static void sched_destroy_user(struct user_struct *up) { }
-static int sched_create_user(struct user_struct *up) { return 0; }
-
-#endif	/* CONFIG_USER_SCHED */
-
-#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
-
-static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
-{
-	struct user_struct *user;
-	struct hlist_node *h;
-
-	hlist_for_each_entry(user, h, hashent, uidhash_node) {
-		if (user->uid == uid) {
-			/* possibly resurrect an "almost deleted" object */
-			if (atomic_inc_return(&user->__count) == 1)
-				cancel_delayed_work(&user->work);
-			return user;
-		}
-	}
-
-	return NULL;
-}
-
-static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
-static DEFINE_MUTEX(uids_mutex);
-
-static inline void uids_mutex_lock(void)
-{
-	mutex_lock(&uids_mutex);
-}
-
-static inline void uids_mutex_unlock(void)
-{
-	mutex_unlock(&uids_mutex);
-}
-
-/* uid directory attributes */
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static ssize_t cpu_shares_show(struct kobject *kobj,
-			       struct kobj_attribute *attr,
-			       char *buf)
-{
-	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
-
-	return sprintf(buf, "%lu\n", sched_group_shares(up->tg));
-}
-
-static ssize_t cpu_shares_store(struct kobject *kobj,
-				struct kobj_attribute *attr,
-				const char *buf, size_t size)
-{
-	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
-	unsigned long shares;
-	int rc;
-
-	sscanf(buf, "%lu", &shares);
-
-	rc = sched_group_set_shares(up->tg, shares);
-
-	return (rc ? rc : size);
-}
-
-static struct kobj_attribute cpu_share_attr =
-	__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
-static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
-				   struct kobj_attribute *attr,
-				   char *buf)
-{
-	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
-
-	return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
-}
-
-static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
-				    struct kobj_attribute *attr,
-				    const char *buf, size_t size)
-{
-	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
-	unsigned long rt_runtime;
-	int rc;
-
-	sscanf(buf, "%ld", &rt_runtime);
-
-	rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
-
-	return (rc ? rc : size);
-}
-
-static struct kobj_attribute cpu_rt_runtime_attr =
-	__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
-
-static ssize_t cpu_rt_period_show(struct kobject *kobj,
-				   struct kobj_attribute *attr,
-				   char *buf)
-{
-	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
-
-	return sprintf(buf, "%lu\n", sched_group_rt_period(up->tg));
-}
-
-static ssize_t cpu_rt_period_store(struct kobject *kobj,
-				    struct kobj_attribute *attr,
-				    const char *buf, size_t size)
-{
-	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
-	unsigned long rt_period;
-	int rc;
-
-	sscanf(buf, "%lu", &rt_period);
-
-	rc = sched_group_set_rt_period(up->tg, rt_period);
-
-	return (rc ? rc : size);
-}
-
-static struct kobj_attribute cpu_rt_period_attr =
-	__ATTR(cpu_rt_period, 0644, cpu_rt_period_show, cpu_rt_period_store);
-#endif
-
-/* default attributes per uid directory */
-static struct attribute *uids_attributes[] = {
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	&cpu_share_attr.attr,
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-	&cpu_rt_runtime_attr.attr,
-	&cpu_rt_period_attr.attr,
-#endif
-	NULL
-};
-
-/* the lifetime of user_struct is not managed by the core (now) */
-static void uids_release(struct kobject *kobj)
-{
-	return;
-}
-
-static struct kobj_type uids_ktype = {
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_attrs = uids_attributes,
-	.release = uids_release,
-};
-
-/*
- * Create /sys/kernel/uids/<uid>/cpu_share file for this user
- * We do not create this file for users in a user namespace (until
- * sysfs tagging is implemented).
- *
- * See Documentation/scheduler/sched-design-CFS.txt for ramifications.
- */
-static int uids_user_create(struct user_struct *up)
-{
-	struct kobject *kobj = &up->kobj;
-	int error;
-
-	memset(kobj, 0, sizeof(struct kobject));
-	if (up->user_ns != &init_user_ns)
-		return 0;
-	kobj->kset = uids_kset;
-	error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
-	if (error) {
-		kobject_put(kobj);
-		goto done;
-	}
-
-	kobject_uevent(kobj, KOBJ_ADD);
-done:
-	return error;
-}
-
-/* create these entries in sysfs:
- * 	"/sys/kernel/uids" directory
- * 	"/sys/kernel/uids/0" directory (for root user)
- * 	"/sys/kernel/uids/0/cpu_share" file (for root user)
- */
-int __init uids_sysfs_init(void)
-{
-	uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
-	if (!uids_kset)
-		return -ENOMEM;
-
-	return uids_user_create(&root_user);
-}
-
-/* delayed work function to remove sysfs directory for a user and free up
- * corresponding structures.
- */
-static void cleanup_user_struct(struct work_struct *w)
-{
-	struct user_struct *up = container_of(w, struct user_struct, work.work);
-	unsigned long flags;
-	int remove_user = 0;
-
-	/* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
-	 * atomic.
-	 */
-	uids_mutex_lock();
-
-	spin_lock_irqsave(&uidhash_lock, flags);
-	if (atomic_read(&up->__count) == 0) {
-		uid_hash_remove(up);
-		remove_user = 1;
-	}
-	spin_unlock_irqrestore(&uidhash_lock, flags);
-
-	if (!remove_user)
-		goto done;
-
-	if (up->user_ns == &init_user_ns) {
-		kobject_uevent(&up->kobj, KOBJ_REMOVE);
-		kobject_del(&up->kobj);
-		kobject_put(&up->kobj);
-	}
-
-	sched_destroy_user(up);
-	key_put(up->uid_keyring);
-	key_put(up->session_keyring);
-	kmem_cache_free(uid_cachep, up);
-
-done:
-	uids_mutex_unlock();
-}
-
-/* IRQs are disabled and uidhash_lock is held upon function entry.
- * IRQ state (as stored in flags) is restored and uidhash_lock released
- * upon function exit.
- */
-static void free_user(struct user_struct *up, unsigned long flags)
-{
-	INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
-	schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
-	spin_unlock_irqrestore(&uidhash_lock, flags);
-}
-
-#else	/* CONFIG_USER_SCHED && CONFIG_SYSFS */
-
 static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
 {
 	struct user_struct *user;
@@ -352,11 +87,6 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
 	return NULL;
 }
 
-int uids_sysfs_init(void) { return 0; }
-static inline int uids_user_create(struct user_struct *up) { return 0; }
-static inline void uids_mutex_lock(void) { }
-static inline void uids_mutex_unlock(void) { }
-
 /* IRQs are disabled and uidhash_lock is held upon function entry.
  * IRQ state (as stored in flags) is restored and uidhash_lock released
  * upon function exit.
@@ -365,32 +95,11 @@ static void free_user(struct user_struct *up, unsigned long flags)
 {
 	uid_hash_remove(up);
 	spin_unlock_irqrestore(&uidhash_lock, flags);
-	sched_destroy_user(up);
 	key_put(up->uid_keyring);
 	key_put(up->session_keyring);
 	kmem_cache_free(uid_cachep, up);
 }
 
-#endif
-
-#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED)
-/*
- * We need to check if a setuid can take place. This function should be called
- * before successfully completing the setuid.
- */
-int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
-{
-
-	return sched_rt_can_attach(up->tg, tsk);
-
-}
-#else
-int task_can_switch_user(struct user_struct *up, struct task_struct *tsk)
-{
-	return 1;
-}
-#endif
-
 /*
  * Locate the user_struct for the passed UID.  If found, take a ref on it.  The
  * caller must undo that ref with free_uid().
@@ -431,8 +140,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
 	/* Make uid_hash_find() + uids_user_create() + uid_hash_insert()
 	 * atomic.
 	 */
-	uids_mutex_lock();
-
 	spin_lock_irq(&uidhash_lock);
 	up = uid_hash_find(uid, hashent);
 	spin_unlock_irq(&uidhash_lock);
@@ -445,14 +152,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
 		new->uid = uid;
 		atomic_set(&new->__count, 1);
 
-		if (sched_create_user(new) < 0)
-			goto out_free_user;
-
 		new->user_ns = get_user_ns(ns);
 
-		if (uids_user_create(new))
-			goto out_destoy_sched;
-
 		/*
 		 * Before adding this, check whether we raced
 		 * on adding the same user already..
@@ -475,17 +176,11 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
 		spin_unlock_irq(&uidhash_lock);
 	}
 
-	uids_mutex_unlock();
-
 	return up;
 
-out_destoy_sched:
-	sched_destroy_user(new);
 	put_user_ns(new->user_ns);
-out_free_user:
 	kmem_cache_free(uid_cachep, new);
 out_unlock:
-	uids_mutex_unlock();
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 92b6759857ea3ad19bc6871044e373f6251841d3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 18 Jan 2010 14:02:16 +0100
Subject: perf: Change the is_software_event() definition

The is_software_event() definition always confuses me because its an
exclusive expression, make it an inclusive one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f75eea..8fa71874113f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -814,9 +814,14 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
  */
 static inline int is_software_event(struct perf_event *event)
 {
-	return (event->attr.type != PERF_TYPE_RAW) &&
-		(event->attr.type != PERF_TYPE_HARDWARE) &&
-		(event->attr.type != PERF_TYPE_HW_CACHE);
+	switch (event->attr.type) {
+	case PERF_TYPE_SOFTWARE:
+	case PERF_TYPE_TRACEPOINT:
+	/* for now the breakpoint stuff also works as software event */
+	case PERF_TYPE_BREAKPOINT:
+		return 1;
+	}
+	return 0;
 }
 
 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
-- 
cgit v1.2.3


From 83fe518a839e317480e50a138ef4acd73510d7ce Mon Sep 17 00:00:00 2001
From: George Shore <george@georgeshore.com>
Date: Thu, 21 Jan 2010 11:40:48 +0000
Subject: spi/dw_spi: enable platform specific chipselect.

The driver core allows for a platform-specific chipselect assert/deassert
function, however the chipselect function in the core doesn't take advantage
of this fact.

This enables the use of a custom function, should it be defined.

Signed-off-by: George Shore <george@georgeshore.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/spi/dw_spi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h
index 1a127a31e017..cc813f95a2f2 100644
--- a/include/linux/spi/dw_spi.h
+++ b/include/linux/spi/dw_spi.h
@@ -172,6 +172,10 @@ static inline void spi_chip_sel(struct dw_spi *dws, u16 cs)
 {
 	if (cs > dws->num_cs)
 		return;
+
+	if (dws->cs_control)
+		dws->cs_control(1);
+
 	dw_writel(dws, ser, 1 << cs);
 }
 
-- 
cgit v1.2.3


From ea87bb7853168434f4a82426dd1ea8421f9e604d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 20 Jan 2010 20:58:57 +0000
Subject: sched: Extend enqueue_task to allow head queueing

The ability of enqueueing a task to the head of a SCHED_FIFO priority
list is required to fix some violations of POSIX scheduling policy.

Extend the related functions with a "head" argument.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Carsten Emde <cbe@osadl.org>
Tested-by: Mathias Weber <mathias.weber.mw1@roche.com>
LKML-Reference: <20100120171629.734886007@linutronix.de>
---
 include/linux/sched.h |  3 ++-
 kernel/sched.c        | 13 +++++++------
 kernel/sched_fair.c   |  3 ++-
 kernel/sched_rt.c     |  3 ++-
 4 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8b079735ae5f..b35c0c7130c8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1067,7 +1067,8 @@ struct sched_domain;
 struct sched_class {
 	const struct sched_class *next;
 
-	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
+	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
+			      bool head);
 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
 	void (*yield_task) (struct rq *rq);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 41e76d325648..f47560ff3346 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1856,13 +1856,14 @@ static void update_avg(u64 *avg, u64 sample)
 	*avg += diff >> 3;
 }
 
-static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
+static void
+enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
 {
 	if (wakeup)
 		p->se.start_runtime = p->se.sum_exec_runtime;
 
 	sched_info_queued(p);
-	p->sched_class->enqueue_task(rq, p, wakeup);
+	p->sched_class->enqueue_task(rq, p, wakeup, head);
 	p->se.on_rq = 1;
 }
 
@@ -1892,7 +1893,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible--;
 
-	enqueue_task(rq, p, wakeup);
+	enqueue_task(rq, p, wakeup, false);
 	inc_nr_running(rq);
 }
 
@@ -4236,7 +4237,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (on_rq) {
-		enqueue_task(rq, p, 0);
+		enqueue_task(rq, p, 0, false);
 
 		check_class_changed(rq, p, prev_class, oldprio, running);
 	}
@@ -4280,7 +4281,7 @@ void set_user_nice(struct task_struct *p, long nice)
 	delta = p->prio - old_prio;
 
 	if (on_rq) {
-		enqueue_task(rq, p, 0);
+		enqueue_task(rq, p, 0, false);
 		/*
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
@@ -8230,7 +8231,7 @@ void sched_move_task(struct task_struct *tsk)
 	if (unlikely(running))
 		tsk->sched_class->set_curr_task(rq);
 	if (on_rq)
-		enqueue_task(rq, tsk, 0);
+		enqueue_task(rq, tsk, 0, false);
 
 	task_rq_unlock(rq, &flags);
 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 22231ccb2f98..0e7a7af9cf8b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1053,7 +1053,8 @@ static inline void hrtick_update(struct rq *rq)
  * increased. Here we update the fair scheduling stats and
  * then put the task into the rbtree:
  */
-static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
+static void
+enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 502bb614e40a..38076dabb44a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -878,7 +878,8 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
 /*
  * Adding/removing a task to/from a priority array:
  */
-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
+static void
+enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
 {
 	struct sched_rt_entity *rt_se = &p->rt;
 
-- 
cgit v1.2.3


From d0dd2de0d055f0ffb1e2ecdc21380de9d12a85e2 Mon Sep 17 00:00:00 2001
From: Andriy Tkachuk <andrit@ukr.net>
Date: Wed, 20 Jan 2010 13:55:06 +0200
Subject: mac80211: Account HT Control field in Data frame hdrlen according to
 802.11n-2009

ieee80211_hdrlen() should account account new HT Control field in 802.11
data frame header introduced by IEEE 802.11n standard.

According to 802.11n-2009 HT Control field is present in data frames
when both of following are met:

   1. It is QoS data frame.
   2. Order bit is set in Frame Control field.

The change might be totally compatible with legacy non-11n aware frames,
because 802.11-2007 standard states that "all QoS STAs set this subfield
to 0".

Signed-off-by: Andriy V. Tkachuk <andrit@ukr.net>
Acked-by : Benoit Papillault <benoit.papillault@free.fr>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 ++
 net/wireless/util.c       | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 842701906ae9..19984958ab7b 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -138,6 +138,8 @@
 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK	0x03
 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT	5
 
+#define IEEE80211_HT_CTL_LEN		4
+
 struct ieee80211_hdr {
 	__le16 frame_control;
 	__le16 duration_id;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 23557c1d0a9c..be2ab8c59e3a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -227,8 +227,11 @@ unsigned int ieee80211_hdrlen(__le16 fc)
 	if (ieee80211_is_data(fc)) {
 		if (ieee80211_has_a4(fc))
 			hdrlen = 30;
-		if (ieee80211_is_data_qos(fc))
+		if (ieee80211_is_data_qos(fc)) {
 			hdrlen += IEEE80211_QOS_CTL_LEN;
+			if (ieee80211_has_order(fc))
+				hdrlen += IEEE80211_HT_CTL_LEN;
+		}
 		goto out;
 	}
 
-- 
cgit v1.2.3


From a271623f871dda970319ca15dfad3a8c8c36249f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 22 Jan 2010 10:13:10 +0000
Subject: netdev: remove certain HAVE_ macros

After netdev_ops compat code HAVE_* macros aren't needed, in fact
they _will_ result in compile breakage for out of tree drivers.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 468a11dea58c..b5fb51d0b8b1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -621,30 +621,21 @@ struct net_device_ops {
 						   struct net_device *dev);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb);
-#define HAVE_CHANGE_RX_FLAGS
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
-#define HAVE_SET_RX_MODE
 	void			(*ndo_set_rx_mode)(struct net_device *dev);
-#define HAVE_MULTICAST
 	void			(*ndo_set_multicast_list)(struct net_device *dev);
-#define HAVE_SET_MAC_ADDR
 	int			(*ndo_set_mac_address)(struct net_device *dev,
 						       void *addr);
-#define HAVE_VALIDATE_ADDR
 	int			(*ndo_validate_addr)(struct net_device *dev);
-#define HAVE_PRIVATE_IOCTL
 	int			(*ndo_do_ioctl)(struct net_device *dev,
 					        struct ifreq *ifr, int cmd);
-#define HAVE_SET_CONFIG
 	int			(*ndo_set_config)(struct net_device *dev,
 					          struct ifmap *map);
-#define HAVE_CHANGE_MTU
 	int			(*ndo_change_mtu)(struct net_device *dev,
 						  int new_mtu);
 	int			(*ndo_neigh_setup)(struct net_device *dev,
 						   struct neigh_parms *);
-#define HAVE_TX_TIMEOUT
 	void			(*ndo_tx_timeout) (struct net_device *dev);
 
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
@@ -656,7 +647,6 @@ struct net_device_ops {
 	void			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
-#define HAVE_NETDEV_POLL
 	void                    (*ndo_poll_controller)(struct net_device *dev);
 #endif
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-- 
cgit v1.2.3


From cb289d6244a37cf932c571d6deb0daa8030f931b Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Wed, 13 Jan 2010 09:34:36 -0800
Subject: eventfd - allow atomic read and waitqueue remove

KVM needs a wait to atomically remove themselves from the eventfd ->poll()
wait queue head, in order to handle correctly their IRQfd deassign
operation.

This patch introduces such API, plus a way to read an eventfd from its
context.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 fs/eventfd.c            | 89 ++++++++++++++++++++++++++++++++++++++++---------
 include/linux/eventfd.h | 16 +++++++++
 2 files changed, 90 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventfd.c b/fs/eventfd.c
index d26402ff06ea..7758cc382ef0 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+{
+	*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+	ctx->count -= *cnt;
+}
+
+/**
+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
+ * @ctx: [in] Pointer to eventfd context.
+ * @wait: [in] Wait queue to be removed.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked.
+ *
+ * This is used to atomically remove a wait queue entry from the eventfd wait
+ * queue head, and read/reset the counter value.
+ */
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+				  __u64 *cnt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->wqh.lock, flags);
+	eventfd_ctx_do_read(ctx, cnt);
+	__remove_wait_queue(&ctx->wqh, wait);
+	if (*cnt != 0 && waitqueue_active(&ctx->wqh))
+		wake_up_locked_poll(&ctx->wqh, POLLOUT);
+	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+
+	return *cnt != 0 ? 0 : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
+
+/**
+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
+ * @ctx: [in] Pointer to eventfd context.
+ * @no_wait: [in] Different from zero if the operation should not block.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN      : The operation would have blocked but @no_wait was nonzero.
+ * -ERESTARTSYS : A signal interrupted the wait operation.
+ *
+ * If @no_wait is zero, the function might sleep until the eventfd internal
+ * counter becomes greater than zero.
+ */
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
 {
-	struct eventfd_ctx *ctx = file->private_data;
 	ssize_t res;
-	__u64 ucnt = 0;
 	DECLARE_WAITQUEUE(wait, current);
 
-	if (count < sizeof(ucnt))
-		return -EINVAL;
 	spin_lock_irq(&ctx->wqh.lock);
+	*cnt = 0;
 	res = -EAGAIN;
 	if (ctx->count > 0)
-		res = sizeof(ucnt);
-	else if (!(file->f_flags & O_NONBLOCK)) {
+		res = 0;
+	else if (!no_wait) {
 		__add_wait_queue(&ctx->wqh, &wait);
-		for (res = 0;;) {
+		for (;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (ctx->count > 0) {
-				res = sizeof(ucnt);
+				res = 0;
 				break;
 			}
 			if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 		__remove_wait_queue(&ctx->wqh, &wait);
 		__set_current_state(TASK_RUNNING);
 	}
-	if (likely(res > 0)) {
-		ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
-		ctx->count -= ucnt;
+	if (likely(res == 0)) {
+		eventfd_ctx_do_read(ctx, cnt);
 		if (waitqueue_active(&ctx->wqh))
 			wake_up_locked_poll(&ctx->wqh, POLLOUT);
 	}
 	spin_unlock_irq(&ctx->wqh.lock);
-	if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
-		return -EFAULT;
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
+
+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+			    loff_t *ppos)
+{
+	struct eventfd_ctx *ctx = file->private_data;
+	ssize_t res;
+	__u64 cnt;
+
+	if (count < sizeof(cnt))
+		return -EINVAL;
+	res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
+	if (res < 0)
+		return res;
+
+	return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
+}
 
 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
 			     loff_t *ppos)
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 94dd10366a78..91bb4f27238c 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -10,6 +10,7 @@
 
 #include <linux/fcntl.h>
 #include <linux/file.h>
+#include <linux/wait.h>
 
 /*
  * CAREFUL: Check include/asm-generic/fcntl.h when defining
@@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 int eventfd_signal(struct eventfd_ctx *ctx, int n);
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+				  __u64 *cnt);
 
 #else /* CONFIG_EVENTFD */
 
@@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
 
 }
 
+static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait,
+				       __u64 *cnt)
+{
+	return -ENOSYS;
+}
+
+static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
+						wait_queue_t *wait, __u64 *cnt)
+{
+	return -ENOSYS;
+}
+
 #endif
 
 #endif /* _LINUX_EVENTFD_H */
-- 
cgit v1.2.3


From 9df5f74194871ebd0e51ef5ad2eca5084acaaaba Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Mon, 25 Jan 2010 11:42:20 -0600
Subject: mm: add coherence API for DMA to vmalloc/vmap areas

On Virtually Indexed architectures (which don't do automatic alias
resolution in their caches), we have to flush via the correct
virtual address to prepare pages for DMA.  On some architectures
(like arm) we cannot prevent the CPU from doing data movein along
the alias (and thus giving stale read data), so we not only have to
introduce a flush API to push dirty cache lines out, but also an invalidate
API to kill inconsistent cache lines that may have moved in before
DMA changed the data

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 Documentation/cachetlb.txt | 24 ++++++++++++++++++++++++
 include/linux/highmem.h    |  6 ++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index da42ab414c48..b231414bb8bc 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -377,3 +377,27 @@ maps this page at its virtual address.
 	All the functionality of flush_icache_page can be implemented in
 	flush_dcache_page and update_mmu_cache. In 2.7 the hope is to
 	remove this interface completely.
+
+The final category of APIs is for I/O to deliberately aliased address
+ranges inside the kernel.  Such aliases are set up by use of the
+vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
+subsystem assumes that the user mapping and kernel offset mapping are
+the only aliases.  This isn't true for vmap aliases, so anything in
+the kernel trying to do I/O to vmap areas must manually manage
+coherency.  It must do this by flushing the vmap range before doing
+I/O and invalidating it after the I/O returns.
+
+  void flush_kernel_vmap_range(void *vaddr, int size)
+       flushes the kernel cache for a given virtual address range in
+       the vmap area.  This is to make sure that any data the kernel
+       modified in the vmap range is made visible to the physical
+       page.  The design is to make this area safe to perform I/O on.
+       Note that this API does *not* also flush the offset map alias
+       of the area.
+
+  void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates
+       the cache for a given virtual address range in the vmap area
+       which prevents the processor from making the cache stale by
+       speculatively reading data while the I/O was occurring to the
+       physical pages.  This is only necessary for data reads into the
+       vmap area.
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 211ff4497269..adfe1013b2bd 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -17,6 +17,12 @@ static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page
 static inline void flush_kernel_dcache_page(struct page *page)
 {
 }
+static inline void flush_kernel_vmap_range(void *vaddr, int size)
+{
+}
+static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
+{
+}
 #endif
 
 #include <asm/kmap_types.h>
-- 
cgit v1.2.3


From 32e7bfc41110bc8f29ec0f293c3bcee6645fef34 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 25 Jan 2010 13:36:10 -0800
Subject: net: use helpers to access uc list V2

This patch introduces three macros to work with uc list from net drivers.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c                  |  5 ++---
 drivers/net/e1000/e1000_main.c      |  4 ++--
 drivers/net/igb/igb_main.c          |  7 ++++---
 drivers/net/ixgbe/ixgbe_common.c    |  7 +++----
 drivers/net/ixgbe/ixgbe_common.h    |  2 +-
 drivers/net/ixgbe/ixgbe_main.c      |  2 +-
 drivers/net/ixgbe/ixgbe_type.h      |  4 ++--
 drivers/net/mv643xx_eth.c           |  3 +--
 drivers/net/niu.c                   |  4 ++--
 drivers/net/stmmac/dwmac1000_core.c | 10 +++++-----
 drivers/net/virtio_net.c            | 12 +++++++-----
 drivers/s390/net/qeth_l2_main.c     |  2 +-
 include/linux/netdevice.h           |  5 +++++
 net/core/dev.c                      |  4 ++--
 14 files changed, 38 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d83512d3e02f..a7b6b12c1c05 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -48,7 +48,6 @@
 #include <linux/cache.h>
 #include <linux/firmware.h>
 #include <linux/log2.h>
-#include <linux/list.h>
 
 #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
 #define BCM_CNIC 1
@@ -3579,14 +3578,14 @@ bnx2_set_rx_mode(struct net_device *dev)
 		sort_mode |= BNX2_RPM_SORT_USER0_MC_HSH_EN;
 	}
 
-	if (dev->uc.count > BNX2_MAX_UNICAST_ADDRESSES) {
+	if (netdev_uc_count(dev) > BNX2_MAX_UNICAST_ADDRESSES) {
 		rx_mode |= BNX2_EMAC_RX_MODE_PROMISCUOUS;
 		sort_mode |= BNX2_RPM_SORT_USER0_PROM_EN |
 			     BNX2_RPM_SORT_USER0_PROM_VLAN;
 	} else if (!(dev->flags & IFF_PROMISC)) {
 		/* Add all entries into to the match filter list */
 		i = 0;
-		list_for_each_entry(ha, &dev->uc.list, list) {
+		netdev_for_each_uc_addr(ha, dev) {
 			bnx2_set_mac_addr(bp, ha->addr,
 					  i + BNX2_START_UNICAST_ADDRESS_INDEX);
 			sort_mode |= (1 <<
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 87f575ca427d..2ce88c5f75c5 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2139,7 +2139,7 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 			rctl |= E1000_RCTL_VFE;
 	}
 
-	if (netdev->uc.count > rar_entries - 1) {
+	if (netdev_uc_count(netdev) > rar_entries - 1) {
 		rctl |= E1000_RCTL_UPE;
 	} else if (!(netdev->flags & IFF_PROMISC)) {
 		rctl &= ~E1000_RCTL_UPE;
@@ -2162,7 +2162,7 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 	 */
 	i = 1;
 	if (use_uc)
-		list_for_each_entry(ha, &netdev->uc.list, list) {
+		netdev_for_each_uc_addr(ha, netdev) {
 			if (i == rar_entries)
 				break;
 			e1000_rar_set(hw, ha->addr, i++);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index d9679493c635..01cc29483e2f 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -2905,12 +2905,13 @@ static int igb_write_uc_addr_list(struct net_device *netdev)
 	int count = 0;
 
 	/* return ENOMEM indicating insufficient memory for addresses */
-	if (netdev->uc.count > rar_entries)
+	if (netdev_uc_count(netdev) > rar_entries)
 		return -ENOMEM;
 
-	if (netdev->uc.count && rar_entries) {
+	if (!netdev_uc_empty(netdev) && rar_entries) {
 		struct netdev_hw_addr *ha;
-		list_for_each_entry(ha, &netdev->uc.list, list) {
+
+		netdev_for_each_uc_addr(ha, netdev) {
 			if (!rar_entries)
 				break;
 			igb_rar_set_qsel(adapter, ha->addr,
diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c
index 276c2aaa800b..eb49020903c1 100644
--- a/drivers/net/ixgbe/ixgbe_common.c
+++ b/drivers/net/ixgbe/ixgbe_common.c
@@ -28,7 +28,6 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <linux/list.h>
 #include <linux/netdevice.h>
 
 #include "ixgbe.h"
@@ -1347,7 +1346,7 @@ static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
 /**
  *  ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses
  *  @hw: pointer to hardware structure
- *  @uc_list: the list of new addresses
+ *  @netdev: pointer to net device structure
  *
  *  The given list replaces any existing list.  Clears the secondary addrs from
  *  receive address registers.  Uses unused receive address registers for the
@@ -1357,7 +1356,7 @@ static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
  *  manually putting the device into promiscuous mode.
  **/
 s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw,
-				      struct list_head *uc_list)
+				      struct net_device *netdev)
 {
 	u32 i;
 	u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc;
@@ -1381,7 +1380,7 @@ s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw,
 	}
 
 	/* Add the new addresses */
-	list_for_each_entry(ha, uc_list, list) {
+	netdev_for_each_uc_addr(ha, netdev) {
 		hw_dbg(hw, " Adding the secondary addresses:\n");
 		ixgbe_add_uc_addr(hw, ha->addr, 0);
 	}
diff --git a/drivers/net/ixgbe/ixgbe_common.h b/drivers/net/ixgbe/ixgbe_common.h
index dfff0ffaa502..13606d4809c9 100644
--- a/drivers/net/ixgbe/ixgbe_common.h
+++ b/drivers/net/ixgbe/ixgbe_common.h
@@ -60,7 +60,7 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
                                       u32 mc_addr_count,
                                       ixgbe_mc_addr_itr func);
 s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw,
-				      struct list_head *uc_list);
+				      struct net_device *netdev);
 s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
 s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
 s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index ee41d331a35f..439645d2aeef 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2568,7 +2568,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 
 	/* reprogram secondary unicast list */
-	hw->mac.ops.update_uc_addr_list(hw, &netdev->uc.list);
+	hw->mac.ops.update_uc_addr_list(hw, netdev);
 
 	/* reprogram multicast list */
 	addr_count = netdev->mc_count;
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index b4caa7011a2b..0db67c19b2c4 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -30,7 +30,7 @@
 
 #include <linux/types.h>
 #include <linux/mdio.h>
-#include <linux/list.h>
+#include <linux/netdevice.h>
 
 /* Vendor ID */
 #define IXGBE_INTEL_VENDOR_ID   0x8086
@@ -2405,7 +2405,7 @@ struct ixgbe_mac_operations {
 	s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
 	s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
 	s32 (*init_rx_addrs)(struct ixgbe_hw *);
-	s32 (*update_uc_addr_list)(struct ixgbe_hw *, struct list_head *);
+	s32 (*update_uc_addr_list)(struct ixgbe_hw *, struct net_device *);
 	s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32,
 	                           ixgbe_mc_addr_itr);
 	s32 (*enable_mc)(struct ixgbe_hw *);
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index af67af55efe7..e24072a9a979 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -55,7 +55,6 @@
 #include <linux/types.h>
 #include <linux/inet_lro.h>
 #include <asm/system.h>
-#include <linux/list.h>
 
 static char mv643xx_eth_driver_name[] = "mv643xx_eth";
 static char mv643xx_eth_driver_version[] = "1.4";
@@ -1697,7 +1696,7 @@ static u32 uc_addr_filter_mask(struct net_device *dev)
 		return 0;
 
 	nibbles = 1 << (dev->dev_addr[5] & 0x0f);
-	list_for_each_entry(ha, &dev->uc.list, list) {
+	netdev_for_each_uc_addr(ha, dev) {
 		if (memcmp(dev->dev_addr, ha->addr, 5))
 			return 0;
 		if ((dev->dev_addr[5] ^ ha->addr[5]) & 0xf0)
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 0e260cfbff7b..af9a8647c7e8 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6372,7 +6372,7 @@ static void niu_set_rx_mode(struct net_device *dev)
 	if ((dev->flags & IFF_ALLMULTI) || (dev->mc_count > 0))
 		np->flags |= NIU_FLAGS_MCAST;
 
-	alt_cnt = dev->uc.count;
+	alt_cnt = netdev_uc_count(dev);
 	if (alt_cnt > niu_num_alt_addr(np)) {
 		alt_cnt = 0;
 		np->flags |= NIU_FLAGS_PROMISC;
@@ -6381,7 +6381,7 @@ static void niu_set_rx_mode(struct net_device *dev)
 	if (alt_cnt) {
 		int index = 0;
 
-		list_for_each_entry(ha, &dev->uc.list, list) {
+		netdev_for_each_uc_addr(ha, dev) {
 			err = niu_set_alt_mac(np, index, ha->addr);
 			if (err)
 				printk(KERN_WARNING PFX "%s: Error %d "
diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index 928eac05b912..d812e9cdb3db 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -83,7 +83,7 @@ static void dwmac1000_set_filter(struct net_device *dev)
 	unsigned int value = 0;
 
 	DBG(KERN_INFO "%s: # mcasts %d, # unicast %d\n",
-	    __func__, dev->mc_count, dev->uc.count);
+	    __func__, dev->mc_count, netdev_uc_count(dev));
 
 	if (dev->flags & IFF_PROMISC)
 		value = GMAC_FRAME_FILTER_PR;
@@ -117,7 +117,7 @@ static void dwmac1000_set_filter(struct net_device *dev)
 	}
 
 	/* Handle multiple unicast addresses (perfect filtering)*/
-	if (dev->uc.count > GMAC_MAX_UNICAST_ADDRESSES)
+	if (netdev_uc_count(dev) > GMAC_MAX_UNICAST_ADDRESSES)
 		/* Switch to promiscuous mode is more than 16 addrs
 		   are required */
 		value |= GMAC_FRAME_FILTER_PR;
@@ -125,9 +125,9 @@ static void dwmac1000_set_filter(struct net_device *dev)
 		int reg = 1;
 		struct netdev_hw_addr *ha;
 
-			list_for_each_entry(ha, &dev->uc.list, list) {
-				dwmac1000_set_umac_addr(ioaddr, ha->addr, reg);
-				reg++;
+		netdev_for_each_uc_addr(ha, dev) {
+			dwmac1000_set_umac_addr(ioaddr, ha->addr, reg);
+			reg++;
 		}
 	}
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c708ecc3cb2e..088332a943f7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -675,6 +675,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	struct virtio_net_ctrl_mac *mac_data;
 	struct dev_addr_list *addr;
 	struct netdev_hw_addr *ha;
+	int uc_count;
 	void *buf;
 	int i;
 
@@ -701,8 +702,9 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 			 allmulti ? "en" : "dis");
 
+	uc_count = netdev_uc_count(dev);
 	/* MAC filter - use one buffer for both lists */
-	mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) +
+	mac_data = buf = kzalloc(((uc_count + dev->mc_count) * ETH_ALEN) +
 				 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 	if (!buf) {
 		dev_warn(&dev->dev, "No memory for MAC address buffer\n");
@@ -712,16 +714,16 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	sg_init_table(sg, 2);
 
 	/* Store the unicast list and count in the front of the buffer */
-	mac_data->entries = dev->uc.count;
+	mac_data->entries = uc_count;
 	i = 0;
-	list_for_each_entry(ha, &dev->uc.list, list)
+	netdev_for_each_uc_addr(ha, dev)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
 	sg_set_buf(&sg[0], mac_data,
-		   sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN));
+		   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
 
 	/* multicast list and count fill the end */
-	mac_data = (void *)&mac_data->macs[dev->uc.count][0];
+	mac_data = (void *)&mac_data->macs[uc_count][0];
 
 	mac_data->entries = dev->mc_count;
 	addr = dev->mc_list;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index c3258b0dd649..51fde6f2e0b8 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -622,7 +622,7 @@ static void qeth_l2_set_multicast_list(struct net_device *dev)
 	for (dm = dev->mc_list; dm; dm = dm->next)
 		qeth_l2_add_mc(card, dm->da_addr, 0);
 
-	list_for_each_entry(ha, &dev->uc.list, list)
+	netdev_for_each_uc_addr(ha, dev)
 		qeth_l2_add_mc(card, ha->addr, 1);
 
 	spin_unlock_bh(&card->mclock);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b5fb51d0b8b1..93a32a5ca74f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -263,6 +263,11 @@ struct netdev_hw_addr_list {
 	int			count;
 };
 
+#define netdev_uc_count(dev) ((dev)->uc.count)
+#define netdev_uc_empty(dev) ((dev)->uc.count == 0)
+#define netdev_for_each_uc_addr(ha, dev) \
+	list_for_each_entry(ha, &dev->uc.list, list)
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
diff --git a/net/core/dev.c b/net/core/dev.c
index 4fad9db417b1..2cba5c521e56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3665,10 +3665,10 @@ void __dev_set_rx_mode(struct net_device *dev)
 		/* Unicast addresses changes may only happen under the rtnl,
 		 * therefore calling __dev_set_promiscuity here is safe.
 		 */
-		if (dev->uc.count > 0 && !dev->uc_promisc) {
+		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
 			__dev_set_promiscuity(dev, 1);
 			dev->uc_promisc = 1;
-		} else if (dev->uc.count == 0 && dev->uc_promisc) {
+		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
 			__dev_set_promiscuity(dev, -1);
 			dev->uc_promisc = 0;
 		}
-- 
cgit v1.2.3


From 6d3faf6f431bafb25f4b9926c50a7e5c267738c6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 24 Jan 2010 14:48:00 +0100
Subject: firewire: cdev: add_descriptor documentation fix

struct fw_cdev_add_descriptor.length is in quadlets, not in bytes.
Also remove any doubts about the endianess of descriptor data.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 1f716d9f714b..520ecf86cbb3 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -380,7 +380,7 @@ struct fw_cdev_initiate_bus_reset {
  * @immediate:	If non-zero, immediate key to insert before pointer
  * @key:	Upper 8 bits of root directory pointer
  * @data:	Userspace pointer to contents of descriptor block
- * @length:	Length of descriptor block data, in bytes
+ * @length:	Length of descriptor block data, in quadlets
  * @handle:	Handle to the descriptor, written by the kernel
  *
  * Add a descriptor block and optionally a preceding immediate key to the local
@@ -394,6 +394,8 @@ struct fw_cdev_initiate_bus_reset {
  * If not 0, the @immediate field specifies an immediate key which will be
  * inserted before the root directory pointer.
  *
+ * @immediate, @key, and @data array elements are CPU-endian quadlets.
+ *
  * If successful, the kernel adds the descriptor and writes back a handle to the
  * kernel-side object to be used for later removal of the descriptor block and
  * immediate key.
-- 
cgit v1.2.3


From abd50713944c8ea9e0af5b7bffa0aacae21cc91a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Jan 2010 18:50:16 +0100
Subject: perf: Reimplement frequency driven sampling

There was a bug in the old period code that caused intel_pmu_enable_all()
or native_write_msr_safe() to show up quite high in the profiles.

In staring at that code it made my head hurt, so I rewrote it in a
hopefully simpler fashion. Its now fully symetric between tick and
overflow driven adjustments and uses less data to boot.

The only complication is that it basically wants to do a u128 division.
The code approximates that in a rather simple truncate until it fits
fashion, taking care to balance the terms while truncating.

This version does not generate that sampling artefact.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   5 +-
 kernel/perf_event.c        | 132 +++++++++++++++++++++++++++++++--------------
 2 files changed, 94 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c6f812e4d058..72b2615600d8 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -498,9 +498,8 @@ struct hw_perf_event {
 	atomic64_t			period_left;
 	u64				interrupts;
 
-	u64				freq_count;
-	u64				freq_interrupts;
-	u64				freq_stamp;
+	u64				freq_time_stamp;
+	u64				freq_count_stamp;
 #endif
 };
 
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index edc46b92b508..251fb9552492 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1423,14 +1423,83 @@ void perf_event_task_sched_in(struct task_struct *task)
 
 static void perf_log_throttle(struct perf_event *event, int enable);
 
-static void perf_adjust_period(struct perf_event *event, u64 events)
+static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
+{
+	u64 frequency = event->attr.sample_freq;
+	u64 sec = NSEC_PER_SEC;
+	u64 divisor, dividend;
+
+	int count_fls, nsec_fls, frequency_fls, sec_fls;
+
+	count_fls = fls64(count);
+	nsec_fls = fls64(nsec);
+	frequency_fls = fls64(frequency);
+	sec_fls = 30;
+
+	/*
+	 * We got @count in @nsec, with a target of sample_freq HZ
+	 * the target period becomes:
+	 *
+	 *             @count * 10^9
+	 * period = -------------------
+	 *          @nsec * sample_freq
+	 *
+	 */
+
+	/*
+	 * Reduce accuracy by one bit such that @a and @b converge
+	 * to a similar magnitude.
+	 */
+#define REDUCE_FLS(a, b) 		\
+do {					\
+	if (a##_fls > b##_fls) {	\
+		a >>= 1;		\
+		a##_fls--;		\
+	} else {			\
+		b >>= 1;		\
+		b##_fls--;		\
+	}				\
+} while (0)
+
+	/*
+	 * Reduce accuracy until either term fits in a u64, then proceed with
+	 * the other, so that finally we can do a u64/u64 division.
+	 */
+	while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
+		REDUCE_FLS(nsec, frequency);
+		REDUCE_FLS(sec, count);
+	}
+
+	if (count_fls + sec_fls > 64) {
+		divisor = nsec * frequency;
+
+		while (count_fls + sec_fls > 64) {
+			REDUCE_FLS(count, sec);
+			divisor >>= 1;
+		}
+
+		dividend = count * sec;
+	} else {
+		dividend = count * sec;
+
+		while (nsec_fls + frequency_fls > 64) {
+			REDUCE_FLS(nsec, frequency);
+			dividend >>= 1;
+		}
+
+		divisor = nsec * frequency;
+	}
+
+	return div64_u64(dividend, divisor);
+}
+
+static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	u64 period, sample_period;
 	s64 delta;
 
-	events *= hwc->sample_period;
-	period = div64_u64(events, event->attr.sample_freq);
+	period = perf_calculate_period(event, nsec, count);
 
 	delta = (s64)(period - hwc->sample_period);
 	delta = (delta + 7) / 8; /* low pass filter */
@@ -1441,13 +1510,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events)
 		sample_period = 1;
 
 	hwc->sample_period = sample_period;
+
+	if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+		perf_disable();
+		event->pmu->disable(event);
+		atomic64_set(&hwc->period_left, 0);
+		event->pmu->enable(event);
+		perf_enable();
+	}
 }
 
 static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 {
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
-	u64 interrupts, freq;
+	u64 interrupts, now;
+	s64 delta;
 
 	raw_spin_lock(&ctx->lock);
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
@@ -1468,44 +1546,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 		if (interrupts == MAX_INTERRUPTS) {
 			perf_log_throttle(event, 1);
 			event->pmu->unthrottle(event);
-			interrupts = 2*sysctl_perf_event_sample_rate/HZ;
 		}
 
 		if (!event->attr.freq || !event->attr.sample_freq)
 			continue;
 
-		/*
-		 * if the specified freq < HZ then we need to skip ticks
-		 */
-		if (event->attr.sample_freq < HZ) {
-			freq = event->attr.sample_freq;
-
-			hwc->freq_count += freq;
-			hwc->freq_interrupts += interrupts;
-
-			if (hwc->freq_count < HZ)
-				continue;
-
-			interrupts = hwc->freq_interrupts;
-			hwc->freq_interrupts = 0;
-			hwc->freq_count -= HZ;
-		} else
-			freq = HZ;
-
-		perf_adjust_period(event, freq * interrupts);
+		event->pmu->read(event);
+		now = atomic64_read(&event->count);
+		delta = now - hwc->freq_count_stamp;
+		hwc->freq_count_stamp = now;
 
-		/*
-		 * In order to avoid being stalled by an (accidental) huge
-		 * sample period, force reset the sample period if we didn't
-		 * get any events in this freq period.
-		 */
-		if (!interrupts) {
-			perf_disable();
-			event->pmu->disable(event);
-			atomic64_set(&hwc->period_left, 0);
-			event->pmu->enable(event);
-			perf_enable();
-		}
+		if (delta > 0)
+			perf_adjust_period(event, TICK_NSEC, delta);
 	}
 	raw_spin_unlock(&ctx->lock);
 }
@@ -3768,12 +3820,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
 
 	if (event->attr.freq) {
 		u64 now = perf_clock();
-		s64 delta = now - hwc->freq_stamp;
+		s64 delta = now - hwc->freq_time_stamp;
 
-		hwc->freq_stamp = now;
+		hwc->freq_time_stamp = now;
 
-		if (delta > 0 && delta < TICK_NSEC)
-			perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
+		if (delta > 0 && delta < 2*TICK_NSEC)
+			perf_adjust_period(event, delta, hwc->last_period);
 	}
 
 	/*
-- 
cgit v1.2.3


From 0531b2aac59c2296570ac52bfc032ef2ace7d5e1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Jan 2010 09:20:03 -0800
Subject: mm: add new 'read_cache_page_gfp()' helper function

It's a simplified 'read_cache_page()' which takes a page allocation
flag, so that different paths can control how aggressive the memory
allocations are that populate a address space.

In particular, the intel GPU object mapping code wants to be able to do
a certain amount of own internal memory management by automatically
shrinking the address space when memory starts getting tight.  This
allows it to dynamically use different memory allocation policies on a
per-allocation basis, rather than depend on the (static) address space
gfp policy.

The actual new function is a one-liner, but re-organizing the helper
functions to the point where you can do this with a single line of code
is what most of the patch is all about.

Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |   2 +
 mm/filemap.c            | 100 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 70 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ed5d7501e181..3c62ed408492 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -253,6 +253,8 @@ extern struct page * read_cache_page_async(struct address_space *mapping,
 extern struct page * read_cache_page(struct address_space *mapping,
 				pgoff_t index, filler_t *filler,
 				void *data);
+extern struct page * read_cache_page_gfp(struct address_space *mapping,
+				pgoff_t index, gfp_t gfp_mask);
 extern int read_cache_pages(struct address_space *mapping,
 		struct list_head *pages, filler_t *filler, void *data);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 96ac6b0eb6cb..e3736923220e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap);
 static struct page *__read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *,struct page*),
-				void *data)
+				void *data,
+				gfp_t gfp)
 {
 	struct page *page;
 	int err;
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		page = page_cache_alloc_cold(mapping);
+		page = __page_cache_alloc(gfp | __GFP_COLD);
 		if (!page)
 			return ERR_PTR(-ENOMEM);
 		err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
@@ -1661,31 +1662,18 @@ repeat:
 	return page;
 }
 
-/**
- * read_cache_page_async - read into page cache, fill it if needed
- * @mapping:	the page's address_space
- * @index:	the page index
- * @filler:	function to perform the read
- * @data:	destination for read data
- *
- * Same as read_cache_page, but don't wait for page to become unlocked
- * after submitting it to the filler.
- *
- * Read into the page cache. If a page already exists, and PageUptodate() is
- * not set, try to fill the page but don't wait for it to become unlocked.
- *
- * If the page does not get brought uptodate, return -EIO.
- */
-struct page *read_cache_page_async(struct address_space *mapping,
+static struct page *do_read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *,struct page*),
-				void *data)
+				void *data,
+				gfp_t gfp)
+
 {
 	struct page *page;
 	int err;
 
 retry:
-	page = __read_cache_page(mapping, index, filler, data);
+	page = __read_cache_page(mapping, index, filler, data, gfp);
 	if (IS_ERR(page))
 		return page;
 	if (PageUptodate(page))
@@ -1710,8 +1698,67 @@ out:
 	mark_page_accessed(page);
 	return page;
 }
+
+/**
+ * read_cache_page_async - read into page cache, fill it if needed
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @filler:	function to perform the read
+ * @data:	destination for read data
+ *
+ * Same as read_cache_page, but don't wait for page to become unlocked
+ * after submitting it to the filler.
+ *
+ * Read into the page cache. If a page already exists, and PageUptodate() is
+ * not set, try to fill the page but don't wait for it to become unlocked.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_async(struct address_space *mapping,
+				pgoff_t index,
+				int (*filler)(void *,struct page*),
+				void *data)
+{
+	return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
+}
 EXPORT_SYMBOL(read_cache_page_async);
 
+static struct page *wait_on_page_read(struct page *page)
+{
+	if (!IS_ERR(page)) {
+		wait_on_page_locked(page);
+		if (!PageUptodate(page)) {
+			page_cache_release(page);
+			page = ERR_PTR(-EIO);
+		}
+	}
+	return page;
+}
+
+/**
+ * read_cache_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @gfp:	the page allocator flags to use if allocating
+ *
+ * This is the same as "read_mapping_page(mapping, index, NULL)", but with
+ * any new page allocations done using the specified allocation flags. Note
+ * that the Radix tree operations will still use GFP_KERNEL, so you can't
+ * expect to do this atomically or anything like that - but you can pass in
+ * other page requirements.
+ *
+ * If the page does not get brought uptodate, return -EIO.
+ */
+struct page *read_cache_page_gfp(struct address_space *mapping,
+				pgoff_t index,
+				gfp_t gfp)
+{
+	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+
+	return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
+}
+EXPORT_SYMBOL(read_cache_page_gfp);
+
 /**
  * read_cache_page - read into page cache, fill it if needed
  * @mapping:	the page's address_space
@@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping,
 				int (*filler)(void *,struct page*),
 				void *data)
 {
-	struct page *page;
-
-	page = read_cache_page_async(mapping, index, filler, data);
-	if (IS_ERR(page))
-		goto out;
-	wait_on_page_locked(page);
-	if (!PageUptodate(page)) {
-		page_cache_release(page);
-		page = ERR_PTR(-EIO);
-	}
- out:
-	return page;
+	return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
 }
 EXPORT_SYMBOL(read_cache_page);
 
-- 
cgit v1.2.3


From 6016a363f6b56b46b24655bcfc0499b715851cf3 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 28 Jan 2010 14:06:53 -0700
Subject: of: unify phandle name in struct device_node

In struct device_node, the phandle is named 'linux_phandle' for PowerPC
and MicroBlaze, and 'node' for SPARC.  There is no good reason for the
difference, it is just an artifact of the code diverging over a couple
of years.  This patch renames both to simply .phandle.

Note: the .node also existed in PowerPC/MicroBlaze, but the only user
seems to be arch/powerpc/platforms/powermac/pfunc_core.c.  It doesn't
look like the assignment between .linux_phandle and .node is
significantly different enough to warrant the separate code paths
unless ibm,phandle properties actually appear in Apple device trees.

I think it is safe to eliminate the old .node property and use
phandle everywhere.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/microblaze/kernel/of_platform.c         |  2 +-
 arch/microblaze/kernel/prom.c                |  2 +-
 arch/powerpc/kernel/of_platform.c            |  2 +-
 arch/powerpc/kernel/prom.c                   |  6 +++---
 arch/powerpc/platforms/cell/spu_manage.c     |  6 +++---
 arch/powerpc/platforms/powermac/pfunc_core.c |  2 +-
 arch/sparc/kernel/devices.c                  |  2 +-
 arch/sparc/kernel/of_device_32.c             |  2 +-
 arch/sparc/kernel/of_device_64.c             |  2 +-
 arch/sparc/kernel/prom_common.c              |  8 ++++----
 arch/sparc/kernel/smp_64.c                   |  2 +-
 drivers/of/fdt.c                             |  7 +++----
 drivers/sbus/char/openprom.c                 | 10 +++++-----
 drivers/video/aty/atyfb_base.c               |  2 +-
 include/linux/of.h                           |  5 +----
 sound/aoa/fabrics/layout.c                   |  2 +-
 16 files changed, 29 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/of_platform.c b/arch/microblaze/kernel/of_platform.c
index acf4574d0f18..1c6d684996d7 100644
--- a/arch/microblaze/kernel/of_platform.c
+++ b/arch/microblaze/kernel/of_platform.c
@@ -185,7 +185,7 @@ EXPORT_SYMBOL(of_find_device_by_node);
 static int of_dev_phandle_match(struct device *dev, void *data)
 {
 	phandle *ph = data;
-	return to_of_device(dev)->node->linux_phandle == *ph;
+	return to_of_device(dev)->node->phandle == *ph;
 }
 
 struct of_device *of_find_device_by_phandle(phandle ph)
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 46407e643926..6eff83a71218 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -342,7 +342,7 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 
 	read_lock(&devtree_lock);
 	for (np = allnodes; np != NULL; np = np->allnext)
-		if (np->linux_phandle == handle)
+		if (np->phandle == handle)
 			break;
 	of_node_get(np);
 	read_unlock(&devtree_lock);
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 1a4fc0d11a03..666d08db319e 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -214,7 +214,7 @@ EXPORT_SYMBOL(of_find_device_by_node);
 static int of_dev_phandle_match(struct device *dev, void *data)
 {
 	phandle *ph = data;
-	return to_of_device(dev)->node->linux_phandle == *ph;
+	return to_of_device(dev)->node->phandle == *ph;
 }
 
 struct of_device *of_find_device_by_phandle(phandle ph)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index deccd91d7e81..1ed2ec2ea05b 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -778,7 +778,7 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 
 	read_lock(&devtree_lock);
 	for (np = allnodes; np != 0; np = np->allnext)
-		if (np->linux_phandle == handle)
+		if (np->phandle == handle)
 			break;
 	of_node_get(np);
 	read_unlock(&devtree_lock);
@@ -907,9 +907,9 @@ static int of_finish_dynamic_node(struct device_node *node)
 	if (machine_is(powermac))
 		return -ENODEV;
 
-	/* fix up new node's linux_phandle field */
+	/* fix up new node's phandle field */
 	if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL)))
-		node->linux_phandle = *ibm_phandle;
+		node->phandle = *ibm_phandle;
 
 out:
 	of_node_put(parent);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 4c506c1463cd..891f18e337a2 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -457,7 +457,7 @@ neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
 			continue;
 		vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
 		for (i=0; i < (lenp / sizeof(phandle)); i++) {
-			if (vic_handles[i] == target->linux_phandle)
+			if (vic_handles[i] == target->phandle)
 				return spu;
 		}
 	}
@@ -499,7 +499,7 @@ static void init_affinity_node(int cbe)
 
 			if (strcmp(name, "spe") == 0) {
 				spu = devnode_spu(cbe, vic_dn);
-				avoid_ph = last_spu_dn->linux_phandle;
+				avoid_ph = last_spu_dn->phandle;
 			} else {
 				/*
 				 * "mic-tm" and "bif0" nodes do not have
@@ -514,7 +514,7 @@ static void init_affinity_node(int cbe)
 					last_spu->has_mem_affinity = 1;
 					spu->has_mem_affinity = 1;
 				}
-				avoid_ph = vic_dn->linux_phandle;
+				avoid_ph = vic_dn->phandle;
 			}
 
 			list_add_tail(&spu->aff_list, &last_spu->aff_list);
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index 96d5ce50364e..ede49e78a8da 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -842,7 +842,7 @@ struct pmf_function *__pmf_find_function(struct device_node *target,
 	list_for_each_entry(func, &dev->functions, link) {
 		if (name && strcmp(name, func->name))
 			continue;
-		if (func->phandle && target->node != func->phandle)
+		if (func->phandle && target->phandle != func->phandle)
 			continue;
 		if ((func->flags & flags) == 0)
 			continue;
diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c
index b171ae8de90d..b062de9424a4 100644
--- a/arch/sparc/kernel/devices.c
+++ b/arch/sparc/kernel/devices.c
@@ -59,7 +59,7 @@ static int __cpu_find_by(int (*compare)(int, int, void *), void *compare_arg,
 
 	cur_inst = 0;
 	for_each_node_by_type(dp, "cpu") {
-		int err = check_cpu_node(dp->node, &cur_inst,
+		int err = check_cpu_node(dp->phandle, &cur_inst,
 					 compare, compare_arg,
 					 prom_node, mid);
 		if (!err) {
diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c
index 4c26eb59e742..09138d403c7f 100644
--- a/arch/sparc/kernel/of_device_32.c
+++ b/arch/sparc/kernel/of_device_32.c
@@ -433,7 +433,7 @@ build_resources:
 	if (!parent)
 		dev_set_name(&op->dev, "root");
 	else
-		dev_set_name(&op->dev, "%08x", dp->node);
+		dev_set_name(&op->dev, "%08x", dp->phandle);
 
 	if (of_device_register(op)) {
 		printk("%s: Could not register of device.\n",
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 881947e59e95..036f18ae59a6 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -666,7 +666,7 @@ static struct of_device * __init scan_one_device(struct device_node *dp,
 	if (!parent)
 		dev_set_name(&op->dev, "root");
 	else
-		dev_set_name(&op->dev, "%08x", dp->node);
+		dev_set_name(&op->dev, "%08x", dp->phandle);
 
 	if (of_device_register(op)) {
 		printk("%s: Could not register of device.\n",
diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index d80a65d9e893..5832e13dfeeb 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -42,7 +42,7 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 	struct device_node *np;
 
 	for (np = allnodes; np; np = np->allnext)
-		if (np->node == handle)
+		if (np->phandle == handle)
 			break;
 
 	return np;
@@ -89,7 +89,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
 			void *old_val = prop->value;
 			int ret;
 
-			ret = prom_setprop(dp->node, name, val, len);
+			ret = prom_setprop(dp->phandle, name, val, len);
 
 			err = -EINVAL;
 			if (ret >= 0) {
@@ -236,7 +236,7 @@ static struct device_node * __init prom_create_node(phandle node,
 
 	dp->name = get_one_property(node, "name");
 	dp->type = get_one_property(node, "device_type");
-	dp->node = node;
+	dp->phandle = node;
 
 	dp->properties = build_prop_list(node);
 
@@ -313,7 +313,7 @@ void __init prom_build_devicetree(void)
 
 	nextp = &allnodes->allnext;
 	allnodes->child = prom_build_tree(allnodes,
-					  prom_getchild(allnodes->node),
+					  prom_getchild(allnodes->phandle),
 					  &nextp);
 	of_console_init();
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index aa36223497b9..eb14844a0021 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -370,7 +370,7 @@ static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 	} else {
 		struct device_node *dp = of_find_node_by_cpuid(cpu);
 
-		prom_startcpu(dp->node, entry, cookie);
+		prom_startcpu(dp->phandle, entry, cookie);
 	}
 
 	for (timeout = 0; timeout < 50000; timeout++) {
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 616a4767a950..7f8861121a31 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -310,12 +310,11 @@ unsigned long __init unflatten_dt_node(unsigned long mem,
 					__alignof__(struct property));
 		if (allnextpp) {
 			if (strcmp(pname, "linux,phandle") == 0) {
-				np->node = *((u32 *)*p);
-				if (np->linux_phandle == 0)
-					np->linux_phandle = np->node;
+				if (np->phandle == 0)
+					np->phandle = *((u32 *)*p);
 			}
 			if (strcmp(pname, "ibm,phandle") == 0)
-				np->linux_phandle = *((u32 *)*p);
+				np->phandle = *((u32 *)*p);
 			pp->name = pname;
 			pp->length = sz;
 			pp->value = (void *)*p;
diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c
index 75ac19b1192f..fc2f676e984d 100644
--- a/drivers/sbus/char/openprom.c
+++ b/drivers/sbus/char/openprom.c
@@ -233,7 +233,7 @@ static int opromnext(void __user *argp, unsigned int cmd, struct device_node *dp
 
 	ph = 0;
 	if (dp)
-		ph = dp->node;
+		ph = dp->phandle;
 
 	data->current_node = dp;
 	*((int *) op->oprom_array) = ph;
@@ -256,7 +256,7 @@ static int oprompci2node(void __user *argp, struct device_node *dp, struct openp
 
 		dp = pci_device_to_OF_node(pdev);
 		data->current_node = dp;
-		*((int *)op->oprom_array) = dp->node;
+		*((int *)op->oprom_array) = dp->phandle;
 		op->oprom_size = sizeof(int);
 		err = copyout(argp, op, bufsize + sizeof(int));
 
@@ -273,7 +273,7 @@ static int oprompath2node(void __user *argp, struct device_node *dp, struct open
 
 	dp = of_find_node_by_path(op->oprom_array);
 	if (dp)
-		ph = dp->node;
+		ph = dp->phandle;
 	data->current_node = dp;
 	*((int *)op->oprom_array) = ph;
 	op->oprom_size = sizeof(int);
@@ -540,7 +540,7 @@ static int opiocgetnext(unsigned int cmd, void __user *argp)
 		}
 	}
 	if (dp)
-		nd = dp->node;
+		nd = dp->phandle;
 	if (copy_to_user(argp, &nd, sizeof(phandle)))
 		return -EFAULT;
 
@@ -570,7 +570,7 @@ static int openprom_bsd_ioctl(struct inode * inode, struct file * file,
 	case OPIOCGETOPTNODE:
 		BUILD_BUG_ON(sizeof(phandle) != sizeof(int));
 
-		if (copy_to_user(argp, &options_node->node, sizeof(phandle)))
+		if (copy_to_user(argp, &options_node->phandle, sizeof(phandle)))
 			return -EFAULT;
 
 		return 0;
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 913b4a47ae52..bb20987d58af 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -3104,7 +3104,7 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev,
 	}
 
 	dp = pci_device_to_OF_node(pdev);
-	if (node == dp->node) {
+	if (node == dp->phandle) {
 		struct fb_var_screeninfo *var = &default_var;
 		unsigned int N, P, Q, M, T, R;
 		u32 v_total, h_total;
diff --git a/include/linux/of.h b/include/linux/of.h
index d4c014a35ea5..dbabf86e0b7a 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -39,10 +39,7 @@ struct of_irq_controller;
 struct device_node {
 	const char *name;
 	const char *type;
-	phandle	node;
-#if !defined(CONFIG_SPARC)
-	phandle linux_phandle;
-#endif
+	phandle phandle;
 	char	*full_name;
 
 	struct	property *properties;
diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c
index 586965f9605f..7a437da05646 100644
--- a/sound/aoa/fabrics/layout.c
+++ b/sound/aoa/fabrics/layout.c
@@ -768,7 +768,7 @@ static int check_codec(struct aoa_codec *codec,
 				"required property %s not present\n", propname);
 			return -ENODEV;
 		}
-		if (*ref != codec->node->linux_phandle) {
+		if (*ref != codec->node->phandle) {
 			printk(KERN_INFO "snd-aoa-fabric-layout: "
 				"%s doesn't match!\n", propname);
 			return -ENODEV;
-- 
cgit v1.2.3


From 430ad5a600a83956749307b13257c464c3826b55 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Thu, 28 Jan 2010 09:32:29 +0800
Subject: perf: Factorize trace events raw sample buffer operations

Introduce ftrace_perf_buf_prepare() and ftrace_perf_buf_submit() to
gather the common code that operates on raw events sampling buffer.
This cleans up redundant code between regular trace events, syscall
events and kprobe events.

Changelog v1->v2:
- Rename function name as per Masami and Frederic's suggestion
- Add __kprobes for ftrace_perf_buf_prepare() and make
  ftrace_perf_buf_submit() inline as per Masami's suggestion
- Export ftrace_perf_buf_prepare since modules will use it

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <4B60E92D.9000808@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h       | 18 ++++++--
 include/trace/ftrace.h             | 48 +++------------------
 kernel/trace/trace_event_profile.c | 52 ++++++++++++++++++++---
 kernel/trace/trace_kprobe.c        | 86 +++++---------------------------------
 kernel/trace/trace_syscalls.c      | 71 +++++--------------------------
 5 files changed, 88 insertions(+), 187 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 0a09e758c7d3..cd95919d9ff3 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -5,6 +5,7 @@
 #include <linux/trace_seq.h>
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
+#include <linux/perf_event.h>
 
 struct trace_array;
 struct tracer;
@@ -138,9 +139,6 @@ struct ftrace_event_call {
 
 #define FTRACE_MAX_PROFILE_SIZE	2048
 
-extern char *perf_trace_buf;
-extern char *perf_trace_buf_nmi;
-
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
@@ -195,6 +193,20 @@ extern void ftrace_profile_disable(int event_id);
 extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
+extern void *
+ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
+			 unsigned long *irq_flags);
+
+static inline void
+ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
+		       u64 count, unsigned long irq_flags)
+{
+	struct trace_entry *entry = raw_data;
+
+	perf_tp_event(entry->type, addr, count, raw_data, size);
+	perf_swevent_put_recursion_context(rctx);
+	local_irq_restore(irq_flags);
+}
 #endif
 
 #endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 4a46a60c2077..f2c09e4d656c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -850,22 +850,12 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 			    proto)					\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
-	extern int perf_swevent_get_recursion_context(void);		\
-	extern void perf_swevent_put_recursion_context(int rctx);	\
-	extern void perf_tp_event(int, u64, u64, void *, int);		\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
-	struct trace_entry *ent;					\
 	int __entry_size;						\
 	int __data_size;						\
-	char *trace_buf;						\
-	char *raw_data;							\
-	int __cpu;							\
 	int rctx;							\
-	int pc;								\
-									\
-	pc = preempt_count();						\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
@@ -875,42 +865,16 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 	if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE,		\
 		      "profile buffer not large enough"))		\
 		return;							\
-									\
-	local_irq_save(irq_flags);					\
-									\
-	rctx = perf_swevent_get_recursion_context();			\
-	if (rctx < 0)							\
-		goto end_recursion;					\
-									\
-	__cpu = smp_processor_id();					\
-									\
-	if (in_nmi())							\
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);	\
-	else								\
-		trace_buf = rcu_dereference(perf_trace_buf);		\
-									\
-	if (!trace_buf)							\
-		goto end;						\
-									\
-	raw_data = per_cpu_ptr(trace_buf, __cpu);			\
-									\
-	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
-	entry = (struct ftrace_raw_##call *)raw_data;			\
-	ent = &entry->ent;						\
-	tracing_generic_entry_update(ent, irq_flags, pc);		\
-	ent->type = event_call->id;					\
-									\
+	entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare(	\
+		__entry_size, event_call->id, &rctx, &irq_flags);	\
+	if (!entry)							\
+		return;							\
 	tstruct								\
 									\
 	{ assign; }							\
 									\
-	perf_tp_event(event_call->id, __addr, __count, entry,		\
-			     __entry_size);				\
-									\
-end:									\
-	perf_swevent_put_recursion_context(rctx);			\
-end_recursion:								\
-	local_irq_restore(irq_flags);					\
+	ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr,	\
+			       __count, irq_flags);			\
 }
 
 #undef DEFINE_EVENT
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 9e25573242cf..f0d693005075 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -6,14 +6,12 @@
  */
 
 #include <linux/module.h>
+#include <linux/kprobes.h>
 #include "trace.h"
 
 
-char *perf_trace_buf;
-EXPORT_SYMBOL_GPL(perf_trace_buf);
-
-char *perf_trace_buf_nmi;
-EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
+static char *perf_trace_buf;
+static char *perf_trace_buf_nmi;
 
 typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
 
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id)
 	}
 	mutex_unlock(&event_mutex);
 }
+
+__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
+					int *rctxp, unsigned long *irq_flags)
+{
+	struct trace_entry *entry;
+	char *trace_buf, *raw_data;
+	int pc, cpu;
+
+	pc = preempt_count();
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(*irq_flags);
+
+	*rctxp = perf_swevent_get_recursion_context();
+	if (*rctxp < 0)
+		goto err_recursion;
+
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
+	else
+		trace_buf = rcu_dereference(perf_trace_buf);
+
+	if (!trace_buf)
+		goto err;
+
+	raw_data = per_cpu_ptr(trace_buf, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	entry = (struct trace_entry *)raw_data;
+	tracing_generic_entry_update(entry, *irq_flags, pc);
+	entry->type = type;
+
+	return raw_data;
+err:
+	perf_swevent_put_recursion_context(*rctxp);
+err_recursion:
+	local_irq_restore(*irq_flags);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d6266cad6953..2e28ee36646f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1243,14 +1243,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kprobe_trace_entry *entry;
-	struct trace_entry *ent;
-	int size, __size, i, pc, __cpu;
+	int size, __size, i;
 	unsigned long irq_flags;
-	char *trace_buf;
-	char *raw_data;
 	int rctx;
 
-	pc = preempt_count();
 	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
@@ -1258,45 +1254,16 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
 		     "profile buffer not large enough"))
 		return 0;
 
-	/*
-	 * Protect the non nmi buffer
-	 * This also protects the rcu read side
-	 */
-	local_irq_save(irq_flags);
-
-	rctx = perf_swevent_get_recursion_context();
-	if (rctx < 0)
-		goto end_recursion;
-
-	__cpu = smp_processor_id();
-
-	if (in_nmi())
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);
-	else
-		trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto end;
-
-	raw_data = per_cpu_ptr(trace_buf, __cpu);
-
-	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-	entry = (struct kprobe_trace_entry *)raw_data;
-	ent = &entry->ent;
+	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+	if (!entry)
+		return 0;
 
-	tracing_generic_entry_update(ent, irq_flags, pc);
-	ent->type = call->id;
 	entry->nargs = tp->nr_args;
 	entry->ip = (unsigned long)kp->addr;
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
-	perf_tp_event(call->id, entry->ip, 1, entry, size);
 
-end:
-	perf_swevent_put_recursion_context(rctx);
-end_recursion:
-	local_irq_restore(irq_flags);
+	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
 
 	return 0;
 }
@@ -1308,14 +1275,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 	struct ftrace_event_call *call = &tp->call;
 	struct kretprobe_trace_entry *entry;
-	struct trace_entry *ent;
-	int size, __size, i, pc, __cpu;
+	int size, __size, i;
 	unsigned long irq_flags;
-	char *trace_buf;
-	char *raw_data;
 	int rctx;
 
-	pc = preempt_count();
 	__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
@@ -1323,46 +1286,17 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
 		     "profile buffer not large enough"))
 		return 0;
 
-	/*
-	 * Protect the non nmi buffer
-	 * This also protects the rcu read side
-	 */
-	local_irq_save(irq_flags);
-
-	rctx = perf_swevent_get_recursion_context();
-	if (rctx < 0)
-		goto end_recursion;
-
-	__cpu = smp_processor_id();
-
-	if (in_nmi())
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);
-	else
-		trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto end;
-
-	raw_data = per_cpu_ptr(trace_buf, __cpu);
-
-	/* Zero dead bytes from alignment to avoid buffer leak to userspace */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-	entry = (struct kretprobe_trace_entry *)raw_data;
-	ent = &entry->ent;
+	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+	if (!entry)
+		return 0;
 
-	tracing_generic_entry_update(ent, irq_flags, pc);
-	ent->type = call->id;
 	entry->nargs = tp->nr_args;
 	entry->func = (unsigned long)tp->rp.kp.addr;
 	entry->ret_ip = (unsigned long)ri->ret_addr;
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
-	perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
 
-end:
-	perf_swevent_put_recursion_context(rctx);
-end_recursion:
-	local_irq_restore(irq_flags);
+	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
 
 	return 0;
 }
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f694f66d75b0..4e332b9e449c 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_enter *rec;
 	unsigned long flags;
-	char *trace_buf;
-	char *raw_data;
 	int syscall_nr;
 	int rctx;
 	int size;
-	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 		      "profile buffer not large enough"))
 		return;
 
-	/* Protect the per cpu buffer, begin the rcu read side */
-	local_irq_save(flags);
-
-	rctx = perf_swevent_get_recursion_context();
-	if (rctx < 0)
-		goto end_recursion;
-
-	cpu = smp_processor_id();
-
-	trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto end;
-
-	raw_data = per_cpu_ptr(trace_buf, cpu);
-
-	/* zero the dead bytes from align to not leak stack to user */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+	rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
+				sys_data->enter_event->id, &rctx, &flags);
+	if (!rec)
+		return;
 
-	rec = (struct syscall_trace_enter *) raw_data;
-	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->enter_event->id;
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
-
-end:
-	perf_swevent_put_recursion_context(rctx);
-end_recursion:
-	local_irq_restore(flags);
+	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
 }
 
 int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	struct syscall_trace_exit *rec;
 	unsigned long flags;
 	int syscall_nr;
-	char *trace_buf;
-	char *raw_data;
 	int rctx;
 	int size;
-	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 		"exit event has grown above profile buffer size"))
 		return;
 
-	/* Protect the per cpu buffer, begin the rcu read side */
-	local_irq_save(flags);
-
-	rctx = perf_swevent_get_recursion_context();
-	if (rctx < 0)
-		goto end_recursion;
-
-	cpu = smp_processor_id();
-
-	trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto end;
-
-	raw_data = per_cpu_ptr(trace_buf, cpu);
-
-	/* zero the dead bytes from align to not leak stack to user */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-
-	rec = (struct syscall_trace_exit *)raw_data;
+	rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
+				sys_data->exit_event->id, &rctx, &flags);
+	if (!rec)
+		return;
 
-	tracing_generic_entry_update(&rec->ent, 0, 0);
-	rec->ent.type = sys_data->exit_event->id;
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
-
-end:
-	perf_swevent_put_recursion_context(rctx);
-end_recursion:
-	local_irq_restore(flags);
+	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
 }
 
 int prof_sysexit_enable(struct ftrace_event_call *call)
-- 
cgit v1.2.3


From bb209c8287d2d55ec4a67e3933346e0a3ee0da76 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 26 Jan 2010 17:10:03 +0000
Subject: powerpc/pci: Add calls to set_pcie_port_type() and
 set_pcie_hotplug_bridge()

We are missing these when building the pci_dev from scratch off
the Open Firmware device-tree

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci_of_scan.c | 2 ++
 drivers/pci/probe.c               | 4 ++--
 include/linux/pci.h               | 4 ++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 7311fdfb9bf8..693eb9a25bfa 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -140,6 +140,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	dev->devfn = devfn;
 	dev->multifunction = 0;		/* maybe a lie? */
 	dev->needs_freset = 0;		/* pcie fundamental reset required */
+	set_pcie_port_type(dev);
 
 	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
 	dev->device = get_int_prop(node, "device-id", 0xffff);
@@ -164,6 +165,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 		/* a PCI-PCI bridge */
 		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
 		dev->rom_base_reg = PCI_ROM_ADDRESS1;
+		set_pcie_hotplug_bridge(dev);
 	} else if (!strcmp(type, "cardbus")) {
 		dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
 	} else {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 98ffb2de22e9..446e4a94d7d3 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -681,7 +681,7 @@ static void pci_read_irq(struct pci_dev *dev)
 	dev->irq = irq;
 }
 
-static void set_pcie_port_type(struct pci_dev *pdev)
+void set_pcie_port_type(struct pci_dev *pdev)
 {
 	int pos;
 	u16 reg16;
@@ -695,7 +695,7 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
-static void set_pcie_hotplug_bridge(struct pci_dev *pdev)
+void set_pcie_hotplug_bridge(struct pci_dev *pdev)
 {
 	int pos;
 	u16 reg16;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 174e5392e51e..c1968f464c38 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -756,6 +756,10 @@ pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 
+/* For use by arch with custom probe code */
+void set_pcie_port_type(struct pci_dev *pdev);
+void set_pcie_hotplug_bridge(struct pci_dev *pdev);
+
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 #ifdef CONFIG_HOTPLUG
-- 
cgit v1.2.3


From cb6ecf6f7afece066265e243657b0ac28150a7b2 Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Thu, 28 Jan 2010 22:28:27 -0800
Subject: Input: add the ABS_MT_PRESSURE event

For pressure-based multi-touch devices, a direct way to send sensor
intensity data per finger is needed. This patch adds the ABS_MT_PRESSURE
event to the MT protocol.

Requested-by: Yoonyoung Shim <jy0922.shim@samsung.com>
Requested-by: Mika Kuoppala <mika.kuoppala@nokia.com>
Requested-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 1 +
 include/linux/input.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 30b503b8d67b..86cb2d2196ff 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -46,6 +46,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = {
 	ABS_MT_TOOL_TYPE,
 	ABS_MT_BLOB_ID,
 	ABS_MT_TRACKING_ID,
+	ABS_MT_PRESSURE,
 	0
 };
 static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)];
diff --git a/include/linux/input.h b/include/linux/input.h
index 7be8a6537b57..735ceaf1bc2d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -660,6 +660,7 @@ struct input_absinfo {
 #define ABS_MT_TOOL_TYPE	0x37	/* Type of touching device */
 #define ABS_MT_BLOB_ID		0x38	/* Group a set of packets as a blob */
 #define ABS_MT_TRACKING_ID	0x39	/* Unique ID of initiated contact */
+#define ABS_MT_PRESSURE		0x3a	/* Pressure on contact area */
 
 #define ABS_MAX			0x3f
 #define ABS_CNT			(ABS_MAX+1)
-- 
cgit v1.2.3


From 9f41699ed067fa695faff8e2e9981b2550abec62 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Jan 2010 15:59:29 +0100
Subject: bitops: Provide compile time HWEIGHT{8,16,32,64}

Provide compile time versions of hweight.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <20100122155535.797688466@chello.nl>
[ Remove some whitespace damage while we are at it ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/bitops.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c05a29cb9bb2..ba0fd1eb4af7 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -25,7 +25,7 @@
 static __inline__ int get_bitmask_order(unsigned int count)
 {
 	int order;
-	
+
 	order = fls(count);
 	return order;	/* We could be slightly more clever with -1 here... */
 }
@@ -33,7 +33,7 @@ static __inline__ int get_bitmask_order(unsigned int count)
 static __inline__ int get_count_order(unsigned int count)
 {
 	int order;
-	
+
 	order = fls(count) - 1;
 	if (count & (count - 1))
 		order++;
@@ -45,6 +45,20 @@ static inline unsigned long hweight_long(unsigned long w)
 	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
 }
 
+#define HWEIGHT8(w)			\
+      (	(!!((w) & (1ULL << 0))) +	\
+	(!!((w) & (1ULL << 1))) +	\
+	(!!((w) & (1ULL << 2))) +	\
+	(!!((w) & (1ULL << 3))) +	\
+	(!!((w) & (1ULL << 4))) +	\
+	(!!((w) & (1ULL << 5))) +	\
+	(!!((w) & (1ULL << 6))) +	\
+	(!!((w) & (1ULL << 7)))	)
+
+#define HWEIGHT16(w) (HWEIGHT8(w)  + HWEIGHT8(w >> 8))
+#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
+#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
+
 /**
  * rol32 - rotate a 32-bit value left
  * @word: value to rotate
-- 
cgit v1.2.3


From 184f412c3341cd24fbd26604634a5800b83dbdc3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 Jan 2010 08:39:39 +0100
Subject: perf, x86: Clean up event constraints code a bit

- Remove stray debug code
 - Improve ugly macros a bit
 - Remove some whitespace damage
 - (Also fix up some accumulated damage in perf_event.h)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Stephane Eranian <eranian@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
---
 arch/x86/kernel/cpu/perf_event.c | 37 ++++++++-----------------------------
 include/linux/perf_event.h       | 24 +++++++++++-------------
 2 files changed, 19 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 66de282ad2fb..fdbe24842271 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -93,24 +93,19 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 };
 
-#define EVENT_CONSTRAINT(c, n, m) { 	\
+#define EVENT_CONSTRAINT(c, n, m) {	\
 	{ .idxmsk64[0] = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
 	.weight = HWEIGHT64((u64)(n)),	\
 }
 
-#define INTEL_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+#define INTEL_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+#define FIXED_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
 
-#define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
+#define EVENT_CONSTRAINT_END			EVENT_CONSTRAINT(0, 0, 0)
 
-#define EVENT_CONSTRAINT_END \
-	EVENT_CONSTRAINT(0, 0, 0)
-
-#define for_each_event_constraint(e, c) \
-	for ((e) = (c); (e)->cmask; (e)++)
+#define for_each_event_constraint(e, c)		for ((e) = (c); (e)->cmask; (e)++)
 
 /*
  * struct x86_pmu - generic x86 pmu
@@ -1276,14 +1271,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 		if (test_bit(hwc->idx, used_mask))
 			break;
 
-#if 0
-		pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n",
-			 smp_processor_id(),
-			 hwc->config,
-			 hwc->idx,
-			 assign ? 'y' : 'n');
-#endif
-
 		set_bit(hwc->idx, used_mask);
 		if (assign)
 			assign[i] = hwc->idx;
@@ -1333,14 +1320,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			if (j == X86_PMC_IDX_MAX)
 				break;
 
-#if 0
-			pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n",
-				smp_processor_id(),
-				hwc->config,
-				j,
-				assign ? 'y' : 'n');
-#endif
-
 			set_bit(j, used_mask);
 
 			if (assign)
@@ -2596,9 +2575,9 @@ static const struct pmu pmu = {
  * validate a single event group
  *
  * validation include:
- * 	- check events are compatible which each other
- * 	- events do not compete for the same counter
- * 	- number of events <= number of counters
+ *	- check events are compatible which each other
+ *	- events do not compete for the same counter
+ *	- number of events <= number of counters
  *
  * validation ensures the group can be loaded onto the
  * PMU if it was the only group available.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 72b2615600d8..953c17731e0d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -290,7 +290,7 @@ struct perf_event_mmap_page {
 };
 
 #define PERF_RECORD_MISC_CPUMODE_MASK		(3 << 0)
-#define PERF_RECORD_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
 #define PERF_RECORD_MISC_KERNEL			(1 << 0)
 #define PERF_RECORD_MISC_USER			(2 << 0)
 #define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
@@ -356,8 +356,8 @@ enum perf_event_type {
 	 *	u64				stream_id;
 	 * };
 	 */
-	PERF_RECORD_THROTTLE		= 5,
-	PERF_RECORD_UNTHROTTLE		= 6,
+	PERF_RECORD_THROTTLE			= 5,
+	PERF_RECORD_UNTHROTTLE			= 6,
 
 	/*
 	 * struct {
@@ -371,10 +371,10 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u32				pid, tid;
+	 *	struct perf_event_header	header;
+	 *	u32				pid, tid;
 	 *
-	 * 	struct read_format		values;
+	 *	struct read_format		values;
 	 * };
 	 */
 	PERF_RECORD_READ			= 8,
@@ -412,7 +412,7 @@ enum perf_event_type {
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 * };
 	 */
-	PERF_RECORD_SAMPLE		= 9,
+	PERF_RECORD_SAMPLE			= 9,
 
 	PERF_RECORD_MAX,			/* non-ABI */
 };
@@ -752,8 +752,7 @@ extern int perf_max_events;
 extern const struct pmu *hw_perf_event_init(struct perf_event *event);
 
 extern void perf_event_task_sched_in(struct task_struct *task);
-extern void perf_event_task_sched_out(struct task_struct *task,
-					struct task_struct *next);
+extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
 extern void perf_event_task_tick(struct task_struct *task);
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
@@ -853,8 +852,7 @@ extern int sysctl_perf_event_mlock;
 extern int sysctl_perf_event_sample_rate;
 
 extern void perf_event_init(void);
-extern void perf_tp_event(int event_id, u64 addr, u64 count,
-				 void *record, int entry_size);
+extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
@@ -895,13 +893,13 @@ static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
 static inline void
-perf_bp_event(struct perf_event *event, void *data)		{ }
+perf_bp_event(struct perf_event *event, void *data)			{ }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
-static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
+static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 static inline void perf_event_enable(struct perf_event *event)		{ }
 static inline void perf_event_disable(struct perf_event *event)		{ }
-- 
cgit v1.2.3


From 488991e28e55b4fbca8067edf0259f69d1a6f92c Mon Sep 17 00:00:00 2001
From: "Alan D. Brunelle" <Alan.Brunelle@hp.com>
Date: Fri, 29 Jan 2010 09:04:08 +0100
Subject: block: Added in stricter no merge semantics for block I/O

Updated 'nomerges' tunable to accept a value of '2' - indicating that _no_
merges at all are to be attempted (not even the simple one-hit cache).

The following table illustrates the additional benefit - 5 minute runs of
a random I/O load were applied to a dozen devices on a 16-way x86_64 system.

nomerges        Throughput      %System         Improvement (tput / %sys)
--------        ------------    -----------     -------------------------
0               12.45 MB/sec    0.669365609
1               12.50 MB/sec    0.641519199     0.40% / 2.71%
2               12.52 MB/sec    0.639849750     0.56% / 2.96%

Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 Documentation/ABI/testing/sysfs-block | 14 ++++++++++++++
 Documentation/block/queue-sysfs.txt   | 10 +++++-----
 block/blk-sysfs.c                     | 11 +++++++----
 block/elevator.c                      | 11 ++++++++++-
 include/linux/blkdev.h                |  3 +++
 5 files changed, 39 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index d2f90334bb93..4873c759d535 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -128,3 +128,17 @@ Description:
 		preferred request size for workloads where sustained
 		throughput is desired.  If no optimal I/O size is
 		reported this file contains 0.
+
+What:		/sys/block/<disk>/queue/nomerges
+Date:		January 2010
+Contact:
+Description:
+		Standard I/O elevator operations include attempts to
+		merge contiguous I/Os. For known random I/O loads these
+		attempts will always fail and result in extra cycles
+		being spent in the kernel. This allows one to turn off
+		this behavior on one of two ways: When set to 1, complex
+		merge checks are disabled, but the simple one-shot merges
+		with the previous I/O request are enabled. When set to 2,
+		all merge tries are disabled. The default value is 0 -
+		which enables all types of merge tries.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index e164403f60e1..f65274081c8d 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -25,11 +25,11 @@ size allowed by the hardware.
 
 nomerges (RW)
 -------------
-This enables the user to disable the lookup logic involved with IO merging
-requests in the block layer. Merging may still occur through a direct
-1-hit cache, since that comes for (almost) free. The IO scheduler will not
-waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults
-to 0, enabling all merges.
+This enables the user to disable the lookup logic involved with IO
+merging requests in the block layer. By default (0) all merges are
+enabled. When set to 1 only simple one-hit merges will be tried. When
+set to 2 no merge algorithms will be tried (including one-hit or more
+complex tree/hash lookups).
 
 nr_requests (RW)
 ----------------
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8606c9543fdd..e85442415db3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -189,7 +189,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
 
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
-	return queue_var_show(blk_queue_nomerges(q), page);
+	return queue_var_show((blk_queue_nomerges(q) << 1) |
+			       blk_queue_noxmerges(q), page);
 }
 
 static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
@@ -199,10 +200,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
 	ssize_t ret = queue_var_store(&nm, page, count);
 
 	spin_lock_irq(q->queue_lock);
-	if (nm)
+	queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+	queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
+	if (nm == 2)
 		queue_flag_set(QUEUE_FLAG_NOMERGES, q);
-	else
-		queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+	else if (nm)
+		queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
 	spin_unlock_irq(q->queue_lock);
 
 	return ret;
diff --git a/block/elevator.c b/block/elevator.c
index 9ad5ccc4c5ee..ee3a883840f2 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -473,6 +473,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
+	/*
+	 * Levels of merges:
+	 * 	nomerges:  No merges at all attempted
+	 * 	noxmerges: Only simple one-hit cache try
+	 * 	merges:	   All merge tries attempted
+	 */
+	if (blk_queue_nomerges(q))
+		return ELEVATOR_NO_MERGE;
+
 	/*
 	 * First try one-hit cache.
 	 */
@@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 		}
 	}
 
-	if (blk_queue_nomerges(q))
+	if (blk_queue_noxmerges(q))
 		return ELEVATOR_NO_MERGE;
 
 	/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ffb13ad35716..f71f5c58620c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -463,6 +463,7 @@ struct request_queue
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
 #define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES   18	/* No extended merges */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -589,6 +590,8 @@ enum {
 #define blk_queue_queuing(q)	test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
+#define blk_queue_noxmerges(q)	\
+	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
-- 
cgit v1.2.3


From 1f5b8f8a2031ae9507eb67743cad4d424739bfff Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Thu, 28 Jan 2010 15:02:41 -0800
Subject: ntp: Make time_esterror and time_maxerror static

Make time_esterror and time_maxerror static as no one uses them
outside of ntp.c

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: richard@rsk.demon.co.uk
LKML-Reference: <1264719761.3437.47.camel@localhost.localdomain>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 3 ---
 kernel/time/ntp.c     | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 94f8faecdcbc..7a082b32d8e1 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -238,9 +238,6 @@ extern int tickadj;			/* amount of adjustment per tick */
  * phase-lock loop variables
  */
 extern int time_status;		/* clock synchronization status bits */
-extern long time_maxerror;	/* maximum error */
-extern long time_esterror;	/* estimated error */
-
 extern long time_adjust;	/* The amount of adjtime left */
 
 extern void ntp_init(void);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 4800f933910e..74b1b37b1595 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -58,10 +58,10 @@ static s64			time_offset;
 static long			time_constant = 2;
 
 /* maximum error (usecs):						*/
-long				time_maxerror = NTP_PHASE_LIMIT;
+static long			time_maxerror = NTP_PHASE_LIMIT;
 
 /* estimated error (usecs):						*/
-long				time_esterror = NTP_PHASE_LIMIT;
+static long			time_esterror = NTP_PHASE_LIMIT;
 
 /* frequency offset (scaled nsecs/secs):				*/
 static s64			time_freq;
-- 
cgit v1.2.3


From 221af7f87b97431e3ee21ce4b0e77d5411cf1549 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 28 Jan 2010 22:14:42 -0800
Subject: Split 'flush_old_exec' into two functions

'flush_old_exec()' is the point of no return when doing an execve(), and
it is pretty badly misnamed.  It doesn't just flush the old executable
environment, it also starts up the new one.

Which is very inconvenient for things like setting up the new
personality, because we want the new personality to affect the starting
of the new environment, but at the same time we do _not_ want the new
personality to take effect if flushing the old one fails.

As a result, the x86-64 '32-bit' personality is actually done using this
insane "I'm going to change the ABI, but I haven't done it yet" bit
(TIF_ABI_PENDING), with SET_PERSONALITY() not actually setting the
personality, but just the "pending" bit, so that "flush_thread()" can do
the actual personality magic.

This patch in no way changes any of that insanity, but it does split the
'flush_old_exec()' function up into a preparatory part that can fail
(still called flush_old_exec()), and a new part that will actually set
up the new exec environment (setup_new_exec()).  All callers are changed
to trivially comply with the new world order.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/kernel/process_64.c |  2 +-
 arch/x86/ia32/ia32_aout.c   | 10 ++++++----
 fs/binfmt_aout.c            |  1 +
 fs/binfmt_elf.c             | 27 ++-------------------------
 fs/binfmt_elf_fdpic.c       |  3 +++
 fs/binfmt_flat.c            |  1 +
 fs/binfmt_som.c             |  1 +
 fs/exec.c                   | 26 ++++++++++++++++----------
 include/linux/binfmts.h     |  1 +
 include/linux/sched.h       |  2 +-
 10 files changed, 33 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 31f80c61b031..ec79faf6f021 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -368,7 +368,7 @@ void exit_thread(void)
 void flush_thread(void)
 {
 
-	/* Called by fs/exec.c (flush_old_exec) to remove traces of a
+	/* Called by fs/exec.c (setup_new_exec) to remove traces of a
 	 * previously running executable. */
 #ifdef CONFIG_SH_FPU
 	if (last_task_used_math == current) {
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 2a4d073d2cf1..435d2a5323da 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -308,15 +308,17 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	if (retval)
 		return retval;
 
-	regs->cs = __USER32_CS;
-	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
-		regs->r13 = regs->r14 = regs->r15 = 0;
-
 	/* OK, This is the point of no return */
 	set_personality(PER_LINUX);
 	set_thread_flag(TIF_IA32);
 	clear_thread_flag(TIF_ABI_PENDING);
 
+	setup_new_exec(bprm);
+
+	regs->cs = __USER32_CS;
+	regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
+		regs->r13 = regs->r14 = regs->r15 = 0;
+
 	current->mm->end_code = ex.a_text +
 		(current->mm->start_code = N_TXTADDR(ex));
 	current->mm->end_data = ex.a_data +
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 346b69405363..fdd397099172 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -264,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 #else
 	set_personality(PER_LINUX);
 #endif
+	setup_new_exec(bprm);
 
 	current->mm->end_code = ex.a_text +
 		(current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index edd90c49003c..fd5b2ea5d299 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 				goto out_free_interp;
 
-			/*
-			 * The early SET_PERSONALITY here is so that the lookup
-			 * for the interpreter happens in the namespace of the 
-			 * to-be-execed image.  SET_PERSONALITY can select an
-			 * alternate root.
-			 *
-			 * However, SET_PERSONALITY is NOT allowed to switch
-			 * this task into the new images's memory mapping
-			 * policy - that is, TASK_SIZE must still evaluate to
-			 * that which is appropriate to the execing application.
-			 * This is because exit_mmap() needs to have TASK_SIZE
-			 * evaluate to the size of the old image.
-			 *
-			 * So if (say) a 64-bit application is execing a 32-bit
-			 * application it is the architecture's responsibility
-			 * to defer changing the value of TASK_SIZE until the
-			 * switch really is going to happen - do this in
-			 * flush_thread().	- akpm
-			 */
-			SET_PERSONALITY(loc->elf_ex);
-
 			interpreter = open_exec(elf_interpreter);
 			retval = PTR_ERR(interpreter);
 			if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		/* Verify the interpreter has a valid arch */
 		if (!elf_check_arch(&loc->interp_elf_ex))
 			goto out_free_dentry;
-	} else {
-		/* Executables without an interpreter also need a personality  */
-		SET_PERSONALITY(loc->elf_ex);
 	}
 
 	/* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 
 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		current->flags |= PF_RANDOMIZE;
-	arch_pick_mmap_layout(current->mm);
+
+	setup_new_exec(bprm);
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c57d9ce5ff7e..18d77297ccc8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -321,6 +321,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	set_personality(PER_LINUX_FDPIC);
 	if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
+
+	setup_new_exec(bprm);
+
 	set_binfmt(&elf_fdpic_format);
 
 	current->mm->start_code = 0;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index d4a00ea1054c..42c6b4a54445 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 		/* OK, This is the point of no return */
 		set_personality(PER_LINUX_32BIT);
+		setup_new_exec(bprm);
 	}
 
 	/*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 2a9b5330cc5e..cc8560f6c9b0 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	/* OK, This is the point of no return */
 	current->flags &= ~PF_FORKNOEXEC;
 	current->personality = PER_HPUX;
+	setup_new_exec(bprm);
 
 	/* Set the task size for HP-UX processes such that
 	 * the gateway page is outside the address space.
diff --git a/fs/exec.c b/fs/exec.c
index 632b02e34ec7..675c3f44c2ea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -941,9 +941,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 
 int flush_old_exec(struct linux_binprm * bprm)
 {
-	char * name;
-	int i, ch, retval;
-	char tcomm[sizeof(current->comm)];
+	int retval;
 
 	/*
 	 * Make sure we have a private signal table and that
@@ -963,6 +961,20 @@ int flush_old_exec(struct linux_binprm * bprm)
 		goto out;
 
 	bprm->mm = NULL;		/* We're using it now */
+	return 0;
+
+out:
+	return retval;
+}
+EXPORT_SYMBOL(flush_old_exec);
+
+void setup_new_exec(struct linux_binprm * bprm)
+{
+	int i, ch;
+	char * name;
+	char tcomm[sizeof(current->comm)];
+
+	arch_pick_mmap_layout(current->mm);
 
 	/* This is the point of no return */
 	current->sas_ss_sp = current->sas_ss_size = 0;
@@ -1019,14 +1031,8 @@ int flush_old_exec(struct linux_binprm * bprm)
 			
 	flush_signal_handlers(current, 0);
 	flush_old_files(current->files);
-
-	return 0;
-
-out:
-	return retval;
 }
-
-EXPORT_SYMBOL(flush_old_exec);
+EXPORT_SYMBOL(setup_new_exec);
 
 /*
  * Prepare credentials and lock ->cred_guard_mutex.
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index cd4349bdc34e..89c6249fc561 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -109,6 +109,7 @@ extern int prepare_binprm(struct linux_binprm *);
 extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
 extern int flush_old_exec(struct linux_binprm * bprm);
+extern void setup_new_exec(struct linux_binprm * bprm);
 
 extern int suid_dumpable;
 #define SUID_DUMP_DISABLE	0	/* No setuid dumping */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..abdfacc58653 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1369,7 +1369,7 @@ struct task_struct {
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
 				       it with task_lock())
-				     - initialized normally by flush_old_exec */
+				     - initialized normally by setup_new_exec */
 /* file system info */
 	int link_count, total_link_count;
 #ifdef CONFIG_SYSVIPC
-- 
cgit v1.2.3


From 5352ae638e2d7d5c9b2e4d528676bbf2af6fd6f3 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 28 Jan 2010 17:04:43 -0600
Subject: perf, hw_breakpoint, kgdb: Do not take mutex for kernel debugger

This patch fixes the regression in functionality where the
kernel debugger and the perf API do not nicely share hw
breakpoint reservations.

The kernel debugger cannot use any mutex_lock() calls because it
can start the kernel running from an invalid context.

A mutex free version of the reservation API needed to get
created for the kernel debugger to safely update hw breakpoint
reservations.

The possibility for a breakpoint reservation to be concurrently
processed at the time that kgdb interrupts the system is
improbable. Should this corner case occur the end user is
warned, and the kernel debugger will prohibit updating the
hardware breakpoint reservations.

Any time the kernel debugger reserves a hardware breakpoint it
will be a system wide reservation.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: torvalds@linux-foundation.org
LKML-Reference: <1264719883-7285-3-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kgdb.c        | 51 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/hw_breakpoint.h |  2 ++
 kernel/hw_breakpoint.c        | 52 ++++++++++++++++++++++++++++++++++---------
 3 files changed, 95 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 62bea7307eaa..bfba6019d762 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -239,6 +239,49 @@ static void kgdb_correct_hw_break(void)
 	hw_breakpoint_restore();
 }
 
+static int hw_break_reserve_slot(int breakno)
+{
+	int cpu;
+	int cnt = 0;
+	struct perf_event **pevent;
+
+	for_each_online_cpu(cpu) {
+		cnt++;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_reserve_bp_slot(*pevent))
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	for_each_online_cpu(cpu) {
+		cnt--;
+		if (!cnt)
+			break;
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		dbg_release_bp_slot(*pevent);
+	}
+	return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+	struct perf_event **pevent;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+		if (dbg_release_bp_slot(*pevent))
+			/*
+			 * The debugger is responisble for handing the retry on
+			 * remove failure.
+			 */
+			return -1;
+	}
+	return 0;
+}
+
 static int
 kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 {
@@ -250,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 	if (i == 4)
 		return -1;
 
+	if (hw_break_release_slot(i)) {
+		printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr);
+		return -1;
+	}
 	breakinfo[i].enabled = 0;
 
 	return 0;
@@ -316,6 +363,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
 		return -1;
 	}
 	breakinfo[i].addr = addr;
+	if (hw_break_reserve_slot(i)) {
+		breakinfo[i].addr = 0;
+		return -1;
+	}
 	breakinfo[i].enabled = 1;
 
 	return 0;
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 41235c93e4e9..070ba0621738 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -75,6 +75,8 @@ extern int __register_perf_hw_breakpoint(struct perf_event *bp);
 extern void unregister_hw_breakpoint(struct perf_event *bp);
 extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
 
+extern int dbg_reserve_bp_slot(struct perf_event *bp);
+extern int dbg_release_bp_slot(struct perf_event *bp);
 extern int reserve_bp_slot(struct perf_event *bp);
 extern void release_bp_slot(struct perf_event *bp);
 
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index c030ae657f20..8a5c7d55ac9f 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -243,38 +243,70 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
  *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
  *            + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
  */
-int reserve_bp_slot(struct perf_event *bp)
+static int __reserve_bp_slot(struct perf_event *bp)
 {
 	struct bp_busy_slots slots = {0};
-	int ret = 0;
-
-	mutex_lock(&nr_bp_mutex);
 
 	fetch_bp_busy_slots(&slots, bp);
 
 	/* Flexible counters need to keep at least one slot */
-	if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
-		ret = -ENOSPC;
-		goto end;
-	}
+	if (slots.pinned + (!!slots.flexible) == HBP_NUM)
+		return -ENOSPC;
 
 	toggle_bp_slot(bp, true);
 
-end:
+	return 0;
+}
+
+int reserve_bp_slot(struct perf_event *bp)
+{
+	int ret;
+
+	mutex_lock(&nr_bp_mutex);
+
+	ret = __reserve_bp_slot(bp);
+
 	mutex_unlock(&nr_bp_mutex);
 
 	return ret;
 }
 
+static void __release_bp_slot(struct perf_event *bp)
+{
+	toggle_bp_slot(bp, false);
+}
+
 void release_bp_slot(struct perf_event *bp)
 {
 	mutex_lock(&nr_bp_mutex);
 
-	toggle_bp_slot(bp, false);
+	__release_bp_slot(bp);
 
 	mutex_unlock(&nr_bp_mutex);
 }
 
+/*
+ * Allow the kernel debugger to reserve breakpoint slots without
+ * taking a lock using the dbg_* variant of for the reserve and
+ * release breakpoint slots.
+ */
+int dbg_reserve_bp_slot(struct perf_event *bp)
+{
+	if (mutex_is_locked(&nr_bp_mutex))
+		return -1;
+
+	return __reserve_bp_slot(bp);
+}
+
+int dbg_release_bp_slot(struct perf_event *bp)
+{
+	if (mutex_is_locked(&nr_bp_mutex))
+		return -1;
+
+	__release_bp_slot(bp);
+
+	return 0;
+}
 
 int register_perf_hw_breakpoint(struct perf_event *bp)
 {
-- 
cgit v1.2.3


From ef7995f4e46b1677f3eaaf547316e1a910b38dcb Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 29 Jan 2010 23:59:12 -0800
Subject: Input: implement input filters

Sometimes it is desirable to suppress certain events from reaching
input handlers and thus user space. One such example is Mac mouse
button emulation code which catches certain key presses and converts
them into button clicks as if they were emitted by a virtual mouse.
The original key press events should be completely suppressed,
otherwise user space will be confused, and while keyboard driver
does it on its own evdev is blissfully unaware of this arrangement.

This patch adds notion of 'filter' to the standard input handlers,
which may flag event as filtered thus preventing it from reaching
other input handlers. Filters don't (nor will they ever) have a
notion of priority relative to each other, input core will run all
of them first and any one of them may mark event as filtered.

This patch is inspired by similar patch by Matthew Garret but the
implementation and intended usage are quite different.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 41 ++++++++++++++++++++++++++++++++++-------
 include/linux/input.h |  8 ++++++++
 2 files changed, 42 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 6c161e220868..7080a9d4b840 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -86,12 +86,14 @@ static int input_defuzz_abs_event(int value, int old_val, int fuzz)
 }
 
 /*
- * Pass event through all open handles. This function is called with
+ * Pass event first through all filters and then, if event has not been
+ * filtered out, through all open handles. This function is called with
  * dev->event_lock held and interrupts disabled.
  */
 static void input_pass_event(struct input_dev *dev,
 			     unsigned int type, unsigned int code, int value)
 {
+	struct input_handler *handler;
 	struct input_handle *handle;
 
 	rcu_read_lock();
@@ -99,11 +101,25 @@ static void input_pass_event(struct input_dev *dev,
 	handle = rcu_dereference(dev->grab);
 	if (handle)
 		handle->handler->event(handle, type, code, value);
-	else
-		list_for_each_entry_rcu(handle, &dev->h_list, d_node)
-			if (handle->open)
-				handle->handler->event(handle,
-							type, code, value);
+	else {
+		bool filtered = false;
+
+		list_for_each_entry_rcu(handle, &dev->h_list, d_node) {
+			if (!handle->open)
+				continue;
+
+			handler = handle->handler;
+			if (!handler->filter) {
+				if (filtered)
+					break;
+
+				handler->event(handle, type, code, value);
+
+			} else if (handler->filter(handle, type, code, value))
+				filtered = true;
+		}
+	}
+
 	rcu_read_unlock();
 }
 
@@ -990,6 +1006,8 @@ static int input_handlers_seq_show(struct seq_file *seq, void *v)
 	union input_seq_state *state = (union input_seq_state *)&seq->private;
 
 	seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name);
+	if (handler->filter)
+		seq_puts(seq, " (filter)");
 	if (handler->fops)
 		seq_printf(seq, " Minor=%d", handler->minor);
 	seq_putc(seq, '\n');
@@ -1803,7 +1821,16 @@ int input_register_handle(struct input_handle *handle)
 	error = mutex_lock_interruptible(&dev->mutex);
 	if (error)
 		return error;
-	list_add_tail_rcu(&handle->d_node, &dev->h_list);
+
+	/*
+	 * Filters go to the head of the list, normal handlers
+	 * to the tail.
+	 */
+	if (handler->filter)
+		list_add_rcu(&handle->d_node, &dev->h_list);
+	else
+		list_add_tail_rcu(&handle->d_node, &dev->h_list);
+
 	mutex_unlock(&dev->mutex);
 
 	/*
diff --git a/include/linux/input.h b/include/linux/input.h
index 7be8a6537b57..6c9d3d49fa91 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1198,6 +1198,8 @@ struct input_handle;
  * @event: event handler. This method is being called by input core with
  *	interrupts disabled and dev->event_lock spinlock held and so
  *	it may not sleep
+ * @filter: similar to @event; separates normal event handlers from
+ *	"filters".
  * @connect: called when attaching a handler to an input device
  * @disconnect: disconnects a handler from input device
  * @start: starts handler for given handle. This function is called by
@@ -1219,6 +1221,11 @@ struct input_handle;
  * same time. All of them will get their copy of input event generated by
  * the device.
  *
+ * The very same structure is used to implement input filters. Input core
+ * allows filters to run first and will not pass event to regular handlers
+ * if any of the filters indicate that the event should be filtered (by
+ * returning %true from their filter() method).
+ *
  * Note that input core serializes calls to connect() and disconnect()
  * methods.
  */
@@ -1227,6 +1234,7 @@ struct input_handler {
 	void *private;
 
 	void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
+	bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
 	int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id);
 	void (*disconnect)(struct input_handle *handle);
 	void (*start)(struct input_handle *handle);
-- 
cgit v1.2.3


From 99b089c3c38a83ebaeb1cc4584ddcde841626467 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 30 Jan 2010 00:53:29 -0800
Subject: Input: Mac button emulation - implement as an input filter

Current implementation of Mac mouse button emulation plugs into legacy
keyboard driver, converts certain keys into button events on a separate
device, and suppresses the real events from reaching tty. This worked
well enough until user space started using evdev which was completely
unaware of this arrangement and kept sending original key presses to
its users. Change the implementation to use newly added input filter
framework so that original key presses are not transmitted to any
handlers.

As a bonus remove SYSCTL dependencies from the code and use Kconfig
instead; also do not create the emulated mouse device until user
activates emulation.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/char/keyboard.c     |   5 -
 drivers/macintosh/Kconfig   |   1 +
 drivers/macintosh/mac_hid.c | 257 ++++++++++++++++++++++++++++++++------------
 include/linux/kbd_kern.h    |   3 -
 4 files changed, 188 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index f706b1dffdb3..cbf64b985ef4 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -1185,11 +1185,6 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 
 	rep = (down == 2);
 
-#ifdef CONFIG_MAC_EMUMOUSEBTN
-	if (mac_hid_mouse_emulate_buttons(1, keycode, down))
-		return;
-#endif /* CONFIG_MAC_EMUMOUSEBTN */
-
 	if ((raw_mode = (kbd->kbdmode == VC_RAW)) && !hw_raw)
 		if (emulate_raw(vc, keycode, !down << 7))
 			if (keycode < BTN_MISC && printk_ratelimit())
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 3d906833948d..aa3c27e5255d 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -172,6 +172,7 @@ config INPUT_ADBHID
 
 config MAC_EMUMOUSEBTN
 	bool "Support for mouse button 2+3 emulation"
+	depends on SYSCTL
 	select INPUT
 	help
 	  This provides generic support for emulating the 2nd and 3rd mouse
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index 7b4ef5bb556b..0b210a90aef5 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -13,17 +13,195 @@
 #include <linux/sysctl.h>
 #include <linux/input.h>
 #include <linux/module.h>
-#include <linux/kbd_kern.h>
 
-
-static struct input_dev *emumousebtn;
-static int emumousebtn_input_register(void);
 static int mouse_emulate_buttons;
 static int mouse_button2_keycode = KEY_RIGHTCTRL;	/* right control key */
 static int mouse_button3_keycode = KEY_RIGHTALT;	/* right option key */
-static int mouse_last_keycode;
 
-#if defined(CONFIG_SYSCTL)
+static struct input_dev *mac_hid_emumouse_dev;
+
+static int mac_hid_create_emumouse(void)
+{
+	static struct lock_class_key mac_hid_emumouse_dev_event_class;
+	static struct lock_class_key mac_hid_emumouse_dev_mutex_class;
+	int err;
+
+	mac_hid_emumouse_dev = input_allocate_device();
+	if (!mac_hid_emumouse_dev)
+		return -ENOMEM;
+
+	lockdep_set_class(&mac_hid_emumouse_dev->event_lock,
+			  &mac_hid_emumouse_dev_event_class);
+	lockdep_set_class(&mac_hid_emumouse_dev->mutex,
+			  &mac_hid_emumouse_dev_mutex_class);
+
+	mac_hid_emumouse_dev->name = "Macintosh mouse button emulation";
+	mac_hid_emumouse_dev->id.bustype = BUS_ADB;
+	mac_hid_emumouse_dev->id.vendor = 0x0001;
+	mac_hid_emumouse_dev->id.product = 0x0001;
+	mac_hid_emumouse_dev->id.version = 0x0100;
+
+	mac_hid_emumouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	mac_hid_emumouse_dev->keybit[BIT_WORD(BTN_MOUSE)] =
+		BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	mac_hid_emumouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+
+	err = input_register_device(mac_hid_emumouse_dev);
+	if (err) {
+		input_free_device(mac_hid_emumouse_dev);
+		mac_hid_emumouse_dev = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
+static void mac_hid_destroy_emumouse(void)
+{
+	input_unregister_device(mac_hid_emumouse_dev);
+	mac_hid_emumouse_dev = NULL;
+}
+
+static bool mac_hid_emumouse_filter(struct input_handle *handle,
+				    unsigned int type, unsigned int code,
+				    int value)
+{
+	unsigned int btn;
+
+	if (type != EV_KEY)
+		return false;
+
+	if (code == mouse_button2_keycode)
+		btn = BTN_MIDDLE;
+	else if (code == mouse_button3_keycode)
+		btn = BTN_RIGHT;
+	else
+		return false;
+
+	input_report_key(mac_hid_emumouse_dev, btn, value);
+	input_sync(mac_hid_emumouse_dev);
+
+	return true;
+}
+
+static int mac_hid_emumouse_connect(struct input_handler *handler,
+				    struct input_dev *dev,
+				    const struct input_device_id *id)
+{
+	struct input_handle *handle;
+	int error;
+
+	/* Don't bind to ourselves */
+	if (dev == mac_hid_emumouse_dev)
+		return -ENODEV;
+
+	handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = "mac-button-emul";
+
+	error = input_register_handle(handle);
+	if (error) {
+		printk(KERN_ERR
+			"mac_hid: Failed to register button emulation handle, "
+			"error %d\n", error);
+		goto err_free;
+	}
+
+	error = input_open_device(handle);
+	if (error) {
+		printk(KERN_ERR
+			"mac_hid: Failed to open input device, error %d\n",
+			error);
+		goto err_unregister;
+	}
+
+	return 0;
+
+ err_unregister:
+	input_unregister_handle(handle);
+ err_free:
+	kfree(handle);
+	return error;
+}
+
+static void mac_hid_emumouse_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+static const struct input_device_id mac_hid_emumouse_ids[] = {
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+	},
+	{ },
+};
+
+MODULE_DEVICE_TABLE(input, mac_hid_emumouse_ids);
+
+static struct input_handler mac_hid_emumouse_handler = {
+	.filter		= mac_hid_emumouse_filter,
+	.connect	= mac_hid_emumouse_connect,
+	.disconnect	= mac_hid_emumouse_disconnect,
+	.name		= "mac-button-emul",
+	.id_table	= mac_hid_emumouse_ids,
+};
+
+static int mac_hid_start_emulation(void)
+{
+	int err;
+
+	err = mac_hid_create_emumouse();
+	if (err)
+		return err;
+
+	err = input_register_handler(&mac_hid_emumouse_handler);
+	if (err) {
+		mac_hid_destroy_emumouse();
+		return err;
+	}
+
+	return 0;
+}
+
+static void mac_hid_stop_emulation(void)
+{
+	input_unregister_handler(&mac_hid_emumouse_handler);
+	mac_hid_destroy_emumouse();
+}
+
+static int mac_hid_toggle_emumouse(ctl_table *table, int write,
+				   void __user *buffer, size_t *lenp,
+				   loff_t *ppos)
+{
+	int *valp = table->data;
+	int old_val = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
+
+	if (rc == 0 && write && *valp != old_val) {
+		if (*valp == 1)
+			rc = mac_hid_start_emulation();
+		else if (*valp == 0)
+			mac_hid_stop_emulation();
+		else
+			rc = -EINVAL;
+	}
+
+	/* Restore the old value in case of error */
+	if (rc)
+		*valp = old_val;
+
+	return rc;
+}
+
 /* file(s) in /proc/sys/dev/mac_hid */
 static ctl_table mac_hid_files[] = {
 	{
@@ -31,7 +209,7 @@ static ctl_table mac_hid_files[] = {
 		.data		= &mouse_emulate_buttons,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= mac_hid_toggle_emumouse,
 	},
 	{
 		.procname	= "mouse_button2_keycode",
@@ -74,73 +252,12 @@ static ctl_table mac_hid_root_dir[] = {
 
 static struct ctl_table_header *mac_hid_sysctl_header;
 
-#endif /* endif CONFIG_SYSCTL */
-
-int mac_hid_mouse_emulate_buttons(int caller, unsigned int keycode, int down)
-{
-	switch (caller) {
-	case 1:
-		/* Called from keyboard.c */
-		if (mouse_emulate_buttons
-		    && (keycode == mouse_button2_keycode
-			|| keycode == mouse_button3_keycode)) {
-			if (mouse_emulate_buttons == 1) {
-				input_report_key(emumousebtn,
-						 keycode == mouse_button2_keycode ? BTN_MIDDLE : BTN_RIGHT,
-						 down);
-				input_sync(emumousebtn);
-				return 1;
-			}
-			mouse_last_keycode = down ? keycode : 0;
-		}
-		break;
-	}
-	return 0;
-}
-
-static struct lock_class_key emumousebtn_event_class;
-static struct lock_class_key emumousebtn_mutex_class;
-
-static int emumousebtn_input_register(void)
-{
-	int ret;
-
-	emumousebtn = input_allocate_device();
-	if (!emumousebtn)
-		return -ENOMEM;
-
-	lockdep_set_class(&emumousebtn->event_lock, &emumousebtn_event_class);
-	lockdep_set_class(&emumousebtn->mutex, &emumousebtn_mutex_class);
-
-	emumousebtn->name = "Macintosh mouse button emulation";
-	emumousebtn->id.bustype = BUS_ADB;
-	emumousebtn->id.vendor = 0x0001;
-	emumousebtn->id.product = 0x0001;
-	emumousebtn->id.version = 0x0100;
-
-	emumousebtn->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
-	emumousebtn->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
-		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
-	emumousebtn->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
-
-	ret = input_register_device(emumousebtn);
-	if (ret)
-		input_free_device(emumousebtn);
-
-	return ret;
-}
 
 static int __init mac_hid_init(void)
 {
-	int err;
-
-	err = emumousebtn_input_register();
-	if (err)
-		return err;
-
-#if defined(CONFIG_SYSCTL)
 	mac_hid_sysctl_header = register_sysctl_table(mac_hid_root_dir);
-#endif /* CONFIG_SYSCTL */
+	if (!mac_hid_sysctl_header)
+		return -ENOMEM;
 
 	return 0;
 }
diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h
index 8bdb16bfe5fb..506ad20c18f8 100644
--- a/include/linux/kbd_kern.h
+++ b/include/linux/kbd_kern.h
@@ -161,7 +161,4 @@ static inline void con_schedule_flip(struct tty_struct *t)
 	schedule_delayed_work(&t->buf.work, 0);
 }
 
-/* mac_hid.c */
-extern int mac_hid_mouse_emulate_buttons(int, unsigned int, int);
-
 #endif
-- 
cgit v1.2.3


From d6ad3e286d2c075a60b9f11075a2c55aeeeca2ad Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 27 Jan 2010 16:25:22 -0600
Subject: softlockup: Add sched_clock_tick() to avoid kernel warning on kgdb
 resume

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, sched_clock() gets
the time from hardware such as the TSC on x86. In this
configuration kgdb will report a softlock warning message on
resuming or detaching from a debug session.

Sequence of events in the problem case:

 1) "cpu sched clock" and "hardware time" are at 100 sec prior
    to a call to kgdb_handle_exception()

 2) Debugger waits in kgdb_handle_exception() for 80 sec and on
    exit the following is called ...  touch_softlockup_watchdog() -->
    __raw_get_cpu_var(touch_timestamp) = 0;

 3) "cpu sched clock" = 100s (it was not updated, because the
    interrupt was disabled in kgdb) but the "hardware time" = 180 sec

 4) The first timer interrupt after resuming from
    kgdb_handle_exception updates the watchdog from the "cpu sched clock"

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> check (touch_timestamp == 0) (it is "YES"
here, we have set "touch_timestamp = 0" at kgdb) -->
__touch_softlockup_watchdog() ***(A)--> reset "touch_timestamp"
to "get_timestamp()" (Here, the "touch_timestamp" will still be
set to 100s.)  ...

    scheduler_tick() ***(B)--> sched_clock_tick() (update "cpu sched
    clock" to "hardware time" = 180s) ...  }

 5) The Second timer interrupt handler appears to have a large
    jump and trips the softlockup warning.

update_process_times() { ...  run_local_timers() -->
softlockup_tick() --> "cpu sched clock" - "touch_timestamp" =
180s-100s > 60s --> printk "soft lockup error messages" ...  }

note: ***(A) reset "touch_timestamp" to
"get_timestamp(this_cpu)"

Why is "touch_timestamp" 100 sec, instead of 180 sec?

When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, the call trace of
get_timestamp() is:

get_timestamp(this_cpu)
 -->cpu_clock(this_cpu)
 -->sched_clock_cpu(this_cpu)
 -->__update_sched_clock(sched_clock_data, now)

The __update_sched_clock() function uses the GTOD tick value to
create a window to normalize the "now" values.  So if "now"
value is too big for sched_clock_data, it will be ignored.

The fix is to invoke sched_clock_tick() to update "cpu sched
clock" in order to recover from this state.  This is done by
introducing the function touch_softlockup_watchdog_sync(). This
allows kgdb to request that the sched clock is updated when the
watchdog thread runs the first time after a resume from kgdb.

[yong.zhang0@gmail.com: Use per cpu instead of an array]
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Dongdong Deng <Dongdong.Deng@windriver.com>
Cc: kgdb-bugreport@lists.sourceforge.net
Cc: peterz@infradead.org
LKML-Reference: <1264631124-4837-2-git-send-email-jason.wessel@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 ++++
 kernel/kgdb.c         |  6 +++---
 kernel/softlockup.c   | 15 +++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6f7bba93929b..89232151a9d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -310,6 +310,7 @@ extern void sched_show_task(struct task_struct *p);
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
+extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
 				    void __user *buffer,
@@ -323,6 +324,9 @@ static inline void softlockup_tick(void)
 static inline void touch_softlockup_watchdog(void)
 {
 }
+static inline void touch_softlockup_watchdog_sync(void)
+{
+}
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 2eb517e23514..87f2cc557553 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -596,7 +596,7 @@ static void kgdb_wait(struct pt_regs *regs)
 
 	/* Signal the primary CPU that we are done: */
 	atomic_set(&cpu_in_kgdb[cpu], 0);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 }
@@ -1450,7 +1450,7 @@ acquirelock:
 	    (kgdb_info[cpu].task &&
 	     kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
 		atomic_set(&kgdb_active, -1);
-		touch_softlockup_watchdog();
+		touch_softlockup_watchdog_sync();
 		clocksource_touch_watchdog();
 		local_irq_restore(flags);
 
@@ -1550,7 +1550,7 @@ kgdb_restore:
 	}
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
-	touch_softlockup_watchdog();
+	touch_softlockup_watchdog_sync();
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index d22579087e27..0d4c7898ab80 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -25,6 +25,7 @@ static DEFINE_SPINLOCK(print_lock);
 static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
 static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(bool, softlock_touch_sync);
 
 static int __read_mostly did_panic;
 int __read_mostly softlockup_thresh = 60;
@@ -79,6 +80,12 @@ void touch_softlockup_watchdog(void)
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
+void touch_softlockup_watchdog_sync(void)
+{
+	__raw_get_cpu_var(softlock_touch_sync) = true;
+	__raw_get_cpu_var(softlockup_touch_ts) = 0;
+}
+
 void touch_all_softlockup_watchdogs(void)
 {
 	int cpu;
@@ -118,6 +125,14 @@ void softlockup_tick(void)
 	}
 
 	if (touch_ts == 0) {
+		if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
+			/*
+			 * If the time stamp was touched atomically
+			 * make sure the scheduler tick is up to date.
+			 */
+			per_cpu(softlock_touch_sync, this_cpu) = false;
+			sched_clock_tick();
+		}
 		__touch_softlockup_watchdog();
 		return;
 	}
-- 
cgit v1.2.3


From 61ef2489dbf587258526cfd4ebf4bba3b079f401 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 22 Jan 2010 16:16:19 +0800
Subject: resources: introduce generic page_is_ram()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's based on walk_system_ram_range(), for archs that don't have
their own page_is_ram().

The static verions in MIPS and SCORE are also made global.

v4: prefer plain 1 instead of PAGE_IS_RAM (H. Peter Anvin)
v3: add comment (KAMEZAWA Hiroyuki)
    "AFAIK, this "System RAM" information has been used for kdump to
    grab valid memory area and seems good for the kernel itself."
v2: add PAGE_IS_RAM macro (Américo Wang)

Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Américo Wang <xiyou.wangcong@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: Yinghai Lu <yinghai@kernel.org>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
LKML-Reference: <20100122081619.GA6431@localhost>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/mips/mm/init.c    |  2 +-
 arch/score/mm/init.c   |  2 +-
 include/linux/ioport.h |  2 ++
 kernel/resource.c      | 13 +++++++++++++
 4 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 15aa1902a788..4d72aabe8352 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -294,7 +294,7 @@ void __init fixrange_init(unsigned long start, unsigned long end,
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-static int __init page_is_ram(unsigned long pagenr)
+int page_is_ram(unsigned long pagenr)
 {
 	int i;
 
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 4e3dcd0c4716..f684a590c21d 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -59,7 +59,7 @@ static unsigned long setup_zero_page(void)
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-static int __init page_is_ram(unsigned long pagenr)
+int page_is_ram(unsigned long pagenr)
 {
 	if (pagenr >= min_low_pfn && pagenr < max_low_pfn)
 		return 1;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 83aa81297ea3..11ef7952b63a 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -188,5 +188,7 @@ extern int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *));
 
+extern int page_is_ram(unsigned long pfn);
+
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/kernel/resource.c b/kernel/resource.c
index fb11a58b9594..b4d637a55256 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -297,6 +297,19 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 
 #endif
 
+static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
+{
+	return 1;
+}
+/*
+ * This generic page_is_ram() returns true if specified address is
+ * registered as "System RAM" in iomem_resource list.
+ */
+int __attribute__((weak)) page_is_ram(unsigned long pfn)
+{
+	return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
+}
+
 /*
  * Find empty slot in the resource tree given range and alignment.
  */
-- 
cgit v1.2.3


From 53df8fdc15fb646b0219e43c989c2cdab1ab100c Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 27 Jan 2010 11:06:39 +0800
Subject: Move page_is_ram() declaration to mm.h

Move page_is_ram() declaration to mm.h, it makes no sense in <linux/ioport.h>.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
LKML-Reference: <20100127030639.GD8132@localhost>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/ioport.h | 2 --
 include/linux/mm.h     | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 11ef7952b63a..83aa81297ea3 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -188,7 +188,5 @@ extern int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *));
 
-extern int page_is_ram(unsigned long pfn);
-
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 24c395694f4d..bad433fdbfce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -265,6 +265,8 @@ static inline int get_page_unless_zero(struct page *page)
 	return atomic_inc_not_zero(&page->_count);
 }
 
+extern int page_is_ram(unsigned long pfn);
+
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
 unsigned long vmalloc_to_pfn(const void *addr);
-- 
cgit v1.2.3


From b79c7adf82e8b8a6d6ad1dadf7e687a4a030cf8c Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Tue, 2 Feb 2010 13:01:25 +0900
Subject: mtd: trivial sh_flctl changes

This patch contains a few changes for the sh_flctl driver:
 - not sh7723-only driver - get rid of kconfig dependency
 - use dev_err() instead of printk()
 - use __devinit and __devexit for probe()/remove()
 - fix probe() return values

Signed-off-by: Magnus Damm <damm@opensource.se>
Acked-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/mtd/nand/Kconfig     |  4 ++--
 drivers/mtd/nand/sh_flctl.c  | 42 +++++++++++++++++++++++-------------------
 include/linux/mtd/sh_flctl.h |  1 +
 3 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 677cd53f18c3..bb6465604235 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -457,10 +457,10 @@ config MTD_NAND_NOMADIK
 
 config MTD_NAND_SH_FLCTL
 	tristate "Support for NAND on Renesas SuperH FLCTL"
-	depends on MTD_NAND && SUPERH && CPU_SUBTYPE_SH7723
+	depends on MTD_NAND && SUPERH
 	help
 	  Several Renesas SuperH CPU has FLCTL. This option enables support
-	  for NAND Flash using FLCTL. This driver support SH7723.
+	  for NAND Flash using FLCTL.
 
 config MTD_NAND_DAVINCI
         tristate "Support NAND on DaVinci SoC"
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 02bef21f2e4b..ab068a503b29 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -1,10 +1,10 @@
 /*
  * SuperH FLCTL nand controller
  *
- * Copyright © 2008 Renesas Solutions Corp.
- * Copyright © 2008 Atom Create Engineering Co., Ltd.
+ * Copyright (c) 2008 Renesas Solutions Corp.
+ * Copyright (c) 2008 Atom Create Engineering Co., Ltd.
  *
- * Based on fsl_elbc_nand.c, Copyright © 2006-2007 Freescale Semiconductor
+ * Based on fsl_elbc_nand.c, Copyright (c) 2006-2007 Freescale Semiconductor
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -75,6 +75,11 @@ static void start_translation(struct sh_flctl *flctl)
 	writeb(TRSTRT, FLTRCR(flctl));
 }
 
+static void timeout_error(struct sh_flctl *flctl, const char *str)
+{
+	dev_err(&flctl->pdev->dev, "Timeout occured in %s\n", str);
+}
+
 static void wait_completion(struct sh_flctl *flctl)
 {
 	uint32_t timeout = LOOP_TIMEOUT_MAX;
@@ -87,7 +92,7 @@ static void wait_completion(struct sh_flctl *flctl)
 		udelay(1);
 	}
 
-	printk(KERN_ERR "wait_completion(): Timeout occured \n");
+	timeout_error(flctl, __func__);
 	writeb(0x0, FLTRCR(flctl));
 }
 
@@ -132,7 +137,7 @@ static void wait_rfifo_ready(struct sh_flctl *flctl)
 			return;
 		udelay(1);
 	}
-	printk(KERN_ERR "wait_rfifo_ready(): Timeout occured \n");
+	timeout_error(flctl, __func__);
 }
 
 static void wait_wfifo_ready(struct sh_flctl *flctl)
@@ -146,7 +151,7 @@ static void wait_wfifo_ready(struct sh_flctl *flctl)
 			return;
 		udelay(1);
 	}
-	printk(KERN_ERR "wait_wfifo_ready(): Timeout occured \n");
+	timeout_error(flctl, __func__);
 }
 
 static int wait_recfifo_ready(struct sh_flctl *flctl, int sector_number)
@@ -198,7 +203,7 @@ static int wait_recfifo_ready(struct sh_flctl *flctl, int sector_number)
 		writel(0, FL4ECCCR(flctl));
 	}
 
-	printk(KERN_ERR "wait_recfifo_ready(): Timeout occured \n");
+	timeout_error(flctl, __func__);
 	return 1;	/* timeout */
 }
 
@@ -214,7 +219,7 @@ static void wait_wecfifo_ready(struct sh_flctl *flctl)
 			return;
 		udelay(1);
 	}
-	printk(KERN_ERR "wait_wecfifo_ready(): Timeout occured \n");
+	timeout_error(flctl, __func__);
 }
 
 static void read_datareg(struct sh_flctl *flctl, int offset)
@@ -769,38 +774,36 @@ static int flctl_chip_init_tail(struct mtd_info *mtd)
 	return 0;
 }
 
-static int __init flctl_probe(struct platform_device *pdev)
+static int __devinit flctl_probe(struct platform_device *pdev)
 {
 	struct resource *res;
 	struct sh_flctl *flctl;
 	struct mtd_info *flctl_mtd;
 	struct nand_chip *nand;
 	struct sh_flctl_platform_data *pdata;
-	int ret;
+	int ret = -ENXIO;
 
 	pdata = pdev->dev.platform_data;
 	if (pdata == NULL) {
-		printk(KERN_ERR "sh_flctl platform_data not found.\n");
-		return -ENODEV;
+		dev_err(&pdev->dev, "no platform data defined\n");
+		return -EINVAL;
 	}
 
 	flctl = kzalloc(sizeof(struct sh_flctl), GFP_KERNEL);
 	if (!flctl) {
-		printk(KERN_ERR "Unable to allocate NAND MTD dev structure.\n");
+		dev_err(&pdev->dev, "failed to allocate driver data\n");
 		return -ENOMEM;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
-		printk(KERN_ERR "%s: resource not found.\n", __func__);
-		ret = -ENODEV;
+		dev_err(&pdev->dev, "failed to get I/O memory\n");
 		goto err;
 	}
 
-	flctl->reg = ioremap(res->start, res->end - res->start + 1);
+	flctl->reg = ioremap(res->start, resource_size(res));
 	if (flctl->reg == NULL) {
-		printk(KERN_ERR "%s: ioremap error.\n", __func__);
-		ret = -ENOMEM;
+		dev_err(&pdev->dev, "failed to remap I/O memory\n");
 		goto err;
 	}
 
@@ -808,6 +811,7 @@ static int __init flctl_probe(struct platform_device *pdev)
 	flctl_mtd = &flctl->mtd;
 	nand = &flctl->chip;
 	flctl_mtd->priv = nand;
+	flctl->pdev = pdev;
 	flctl->hwecc = pdata->has_hwecc;
 
 	flctl_register_init(flctl, pdata->flcmncr_val);
@@ -846,7 +850,7 @@ err:
 	return ret;
 }
 
-static int __exit flctl_remove(struct platform_device *pdev)
+static int __devexit flctl_remove(struct platform_device *pdev)
 {
 	struct sh_flctl *flctl = platform_get_drvdata(pdev);
 
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index e77c1cea404d..164c9d4013c0 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -96,6 +96,7 @@
 struct sh_flctl {
 	struct mtd_info		mtd;
 	struct nand_chip	chip;
+	struct platform_device	*pdev;
 	void __iomem		*reg;
 
 	uint8_t	done_buff[2048 + 64];	/* max size 2048 + 64 */
-- 
cgit v1.2.3


From 010ab820582d03bcd3648416b5837107e8a9c5f3 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 27 Jan 2010 09:17:21 +0000
Subject: mtd: sh_flctl SHBUSSEL and SEL_16BIT support

This patch extends the sh_flctl driver with support
for 16-bit bus configuration using SEL_16BIT and
support for multiplexed pins using SHBUSSEL.

Signed-off-by: Magnus Damm <damm@opensource.se>
Acked-by: Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/mtd/nand/sh_flctl.c  | 27 ++++++++++++++++++++++++++-
 include/linux/mtd/sh_flctl.h |  2 ++
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index ab068a503b29..1842df8bdd93 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -105,6 +105,8 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr)
 		addr = page_addr;	/* ERASE1 */
 	} else if (page_addr != -1) {
 		/* SEQIN, READ0, etc.. */
+		if (flctl->chip.options & NAND_BUSWIDTH_16)
+			column >>= 1;
 		if (flctl->page_size) {
 			addr = column & 0x0FFF;
 			addr |= (page_addr & 0xff) << 16;
@@ -280,7 +282,7 @@ static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
 static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_val)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	uint32_t flcmncr_val = readl(FLCMNCR(flctl));
+	uint32_t flcmncr_val = readl(FLCMNCR(flctl)) & ~SEL_16BIT;
 	uint32_t flcmdcr_val, addr_len_bytes = 0;
 
 	/* Set SNAND bit if page size is 2048byte */
@@ -302,6 +304,8 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va
 	case NAND_CMD_READOOB:
 		addr_len_bytes = flctl->rw_ADRCNT;
 		flcmdcr_val |= CDSRC_E;
+		if (flctl->chip.options & NAND_BUSWIDTH_16)
+			flcmncr_val |= SEL_16BIT;
 		break;
 	case NAND_CMD_SEQIN:
 		/* This case is that cmd is READ0 or READ1 or READ00 */
@@ -310,6 +314,8 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va
 	case NAND_CMD_PAGEPROG:
 		addr_len_bytes = flctl->rw_ADRCNT;
 		flcmdcr_val |= DOCMD2_E | CDSRC_E | SELRW;
+		if (flctl->chip.options & NAND_BUSWIDTH_16)
+			flcmncr_val |= SEL_16BIT;
 		break;
 	case NAND_CMD_READID:
 		flcmncr_val &= ~SNAND_E;
@@ -528,6 +534,8 @@ static void flctl_cmdfunc(struct mtd_info *mtd, unsigned int command,
 		set_addr(mtd, 0, page_addr);
 
 		flctl->read_bytes = mtd->writesize + mtd->oobsize;
+		if (flctl->chip.options & NAND_BUSWIDTH_16)
+			column >>= 1;
 		flctl->index += column;
 		goto read_normal_exit;
 
@@ -691,6 +699,18 @@ static uint8_t flctl_read_byte(struct mtd_info *mtd)
 	return data;
 }
 
+static uint16_t flctl_read_word(struct mtd_info *mtd)
+{
+       struct sh_flctl *flctl = mtd_to_flctl(mtd);
+       int index = flctl->index;
+       uint16_t data;
+       uint16_t *buf = (uint16_t *)&flctl->done_buff[index];
+
+       data = *buf;
+       flctl->index += 2;
+       return data;
+}
+
 static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 {
 	int i;
@@ -829,6 +849,11 @@ static int __devinit flctl_probe(struct platform_device *pdev)
 	nand->select_chip = flctl_select_chip;
 	nand->cmdfunc = flctl_cmdfunc;
 
+	if (pdata->flcmncr_val & SEL_16BIT) {
+		nand->options |= NAND_BUSWIDTH_16;
+		nand->read_word = flctl_read_word;
+	}
+
 	ret = nand_scan_ident(flctl_mtd, 1);
 	if (ret)
 		goto err;
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 164c9d4013c0..ab77609ec337 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -51,6 +51,8 @@
 #define _4ECCCNTEN	(0x1 << 24)
 #define _4ECCEN		(0x1 << 23)
 #define _4ECCCORRECT	(0x1 << 22)
+#define SHBUSSEL	(0x1 << 20)
+#define SEL_16BIT	(0x1 << 19)
 #define SNAND_E		(0x1 << 18)	/* SNAND (0=512 1=2048)*/
 #define QTSEL_E		(0x1 << 17)
 #define ENDIAN		(0x1 << 16)	/* 1 = little endian */
-- 
cgit v1.2.3


From c30f540b63047437ffa894b5353216410c480d1a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 2 Feb 2010 15:03:24 +0100
Subject: netfilter: xtables: CONFIG_COMPAT redux

Ifdef out
	struct nf_sockopt_ops::compat_set
	struct nf_sockopt_ops::compat_get
	struct xt_match::compat_from_user
	struct xt_match::compat_to_user
	struct xt_match::compatsize
to make structures smaller on COMPAT=n kernels.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter.h          |  9 ++++++---
 include/linux/netfilter/x_tables.h | 12 ++++++++----
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 48c54960773c..78f33d223680 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -114,15 +114,17 @@ struct nf_sockopt_ops {
 	int set_optmin;
 	int set_optmax;
 	int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len);
+#ifdef CONFIG_COMPAT
 	int (*compat_set)(struct sock *sk, int optval,
 			void __user *user, unsigned int len);
-
+#endif
 	int get_optmin;
 	int get_optmax;
 	int (*get)(struct sock *sk, int optval, void __user *user, int *len);
+#ifdef CONFIG_COMPAT
 	int (*compat_get)(struct sock *sk, int optval,
 			void __user *user, int *len);
-
+#endif
 	/* Use the module struct to lock set/get code in place */
 	struct module *owner;
 };
@@ -222,11 +224,12 @@ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
 		  unsigned int len);
 int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
 		  int *len);
-
+#ifdef CONFIG_COMPAT
 int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval,
 		char __user *opt, unsigned int len);
 int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
 		char __user *opt, int *len);
+#endif
 
 /* Call this before modifying an existing packet: ensures it is
    modifiable and linear to the point you care about (writable_len).
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 3caf5e151102..026eb78ee83c 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -283,11 +283,11 @@ struct xt_match {
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_mtdtor_param *);
-
+#ifdef CONFIG_COMPAT
 	/* Called when userspace align differs from kernel space one */
 	void (*compat_from_user)(void *dst, void *src);
 	int (*compat_to_user)(void __user *dst, void *src);
-
+#endif
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
@@ -296,7 +296,9 @@ struct xt_match {
 
 	const char *table;
 	unsigned int matchsize;
+#ifdef CONFIG_COMPAT
 	unsigned int compatsize;
+#endif
 	unsigned int hooks;
 	unsigned short proto;
 
@@ -323,17 +325,19 @@ struct xt_target {
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_tgdtor_param *);
-
+#ifdef CONFIG_COMPAT
 	/* Called when userspace align differs from kernel space one */
 	void (*compat_from_user)(void *dst, void *src);
 	int (*compat_to_user)(void __user *dst, void *src);
-
+#endif
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
 	const char *table;
 	unsigned int targetsize;
+#ifdef CONFIG_COMPAT
 	unsigned int compatsize;
+#endif
 	unsigned int hooks;
 	unsigned short proto;
 
-- 
cgit v1.2.3


From c85bb41e93184bf5494dde6d8fe5a81b564c84c8 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Tue, 2 Feb 2010 07:32:29 -0800
Subject: igmp: fix ip_mc_sf_allow race [v5]

Almost all igmp functions accessing inet->mc_list are protected by
rtnl_lock(), but there is one exception which is ip_mc_sf_allow(),
so there is a chance of either ip_mc_drop_socket or ip_mc_leave_group
remove an entry while ip_mc_sf_allow is running causing a crash.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h |  2 ++
 net/ipv4/igmp.c      | 83 +++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 64 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 724c27e5d173..93fc2449af10 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -153,6 +153,7 @@ extern int sysctl_igmp_max_msf;
 struct ip_sf_socklist {
 	unsigned int		sl_max;
 	unsigned int		sl_count;
+	struct rcu_head		rcu;
 	__be32			sl_addr[0];
 };
 
@@ -170,6 +171,7 @@ struct ip_mc_socklist {
 	struct ip_mreqn		multi;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ip_sf_socklist	*sflist;
+	struct rcu_head		rcu;
 };
 
 struct ip_sf_list {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8f5468393f01..d28363998743 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1799,7 +1799,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	iml->next = inet->mc_list;
 	iml->sflist = NULL;
 	iml->sfmode = MCAST_EXCLUDE;
-	inet->mc_list = iml;
+	rcu_assign_pointer(inet->mc_list, iml);
 	ip_mc_inc_group(in_dev, addr);
 	err = 0;
 done:
@@ -1807,24 +1807,46 @@ done:
 	return err;
 }
 
+static void ip_sf_socklist_reclaim(struct rcu_head *rp)
+{
+	struct ip_sf_socklist *psf;
+
+	psf = container_of(rp, struct ip_sf_socklist, rcu);
+	/* sk_omem_alloc should have been decreased by the caller*/
+	kfree(psf);
+}
+
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 			   struct in_device *in_dev)
 {
+	struct ip_sf_socklist *psf = iml->sflist;
 	int err;
 
-	if (iml->sflist == NULL) {
+	if (psf == NULL) {
 		/* any-source empty exclude case */
 		return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
 			iml->sfmode, 0, NULL, 0);
 	}
 	err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
-			iml->sfmode, iml->sflist->sl_count,
-			iml->sflist->sl_addr, 0);
-	sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max));
-	iml->sflist = NULL;
+			iml->sfmode, psf->sl_count, psf->sl_addr, 0);
+	rcu_assign_pointer(iml->sflist, NULL);
+	/* decrease mem now to avoid the memleak warning */
+	atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+	call_rcu(&psf->rcu, ip_sf_socklist_reclaim);
 	return err;
 }
 
+
+static void ip_mc_socklist_reclaim(struct rcu_head *rp)
+{
+	struct ip_mc_socklist *iml;
+
+	iml = container_of(rp, struct ip_mc_socklist, rcu);
+	/* sk_omem_alloc should have been decreased by the caller*/
+	kfree(iml);
+}
+
+
 /*
  *	Ask a socket to leave a group.
  */
@@ -1854,12 +1876,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 
-		*imlp = iml->next;
+		rcu_assign_pointer(*imlp, iml->next);
 
 		if (in_dev)
 			ip_mc_dec_group(in_dev, group);
 		rtnl_unlock();
-		sock_kfree_s(sk, iml, sizeof(*iml));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
+		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
 		return 0;
 	}
 	if (!in_dev)
@@ -1974,9 +1998,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 		if (psl) {
 			for (i=0; i<psl->sl_count; i++)
 				newpsl->sl_addr[i] = psl->sl_addr[i];
-			sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max));
+			/* decrease mem now to avoid the memleak warning */
+			atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+			call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
 		}
-		pmc->sflist = psl = newpsl;
+		rcu_assign_pointer(pmc->sflist, newpsl);
+		psl = newpsl;
 	}
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
 	for (i=0; i<psl->sl_count; i++) {
@@ -2072,11 +2099,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	if (psl) {
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			psl->sl_count, psl->sl_addr, 0);
-		sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+		call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
 	} else
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			0, NULL, 0);
-	pmc->sflist = newpsl;
+	rcu_assign_pointer(pmc->sflist, newpsl);
 	pmc->sfmode = msf->imsf_fmode;
 	err = 0;
 done:
@@ -2209,30 +2238,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 	struct ip_mc_socklist *pmc;
 	struct ip_sf_socklist *psl;
 	int i;
+	int ret;
 
+	ret = 1;
 	if (!ipv4_is_multicast(loc_addr))
-		return 1;
+		goto out;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) {
 		if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
 		    pmc->multi.imr_ifindex == dif)
 			break;
 	}
+	ret = inet->mc_all;
 	if (!pmc)
-		return inet->mc_all;
+		goto unlock;
 	psl = pmc->sflist;
+	ret = (pmc->sfmode == MCAST_EXCLUDE);
 	if (!psl)
-		return pmc->sfmode == MCAST_EXCLUDE;
+		goto unlock;
 
 	for (i=0; i<psl->sl_count; i++) {
 		if (psl->sl_addr[i] == rmt_addr)
 			break;
 	}
+	ret = 0;
 	if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
-		return 0;
+		goto unlock;
 	if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
-		return 0;
-	return 1;
+		goto unlock;
+	ret = 1;
+unlock:
+	rcu_read_unlock();
+out:
+	return ret;
 }
 
 /*
@@ -2251,7 +2290,7 @@ void ip_mc_drop_socket(struct sock *sk)
 	rtnl_lock();
 	while ((iml = inet->mc_list) != NULL) {
 		struct in_device *in_dev;
-		inet->mc_list = iml->next;
+		rcu_assign_pointer(inet->mc_list, iml->next);
 
 		in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
@@ -2259,7 +2298,9 @@ void ip_mc_drop_socket(struct sock *sk)
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
 			in_dev_put(in_dev);
 		}
-		sock_kfree_s(sk, iml, sizeof(*iml));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
+		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
 	}
 	rtnl_unlock();
 }
-- 
cgit v1.2.3


From f9bfbebf34eab707b065116cdc9699d25ba4252a Mon Sep 17 00:00:00 2001
From: Shirley Ma <mashirle@us.ibm.com>
Date: Fri, 29 Jan 2010 03:19:05 +0000
Subject: virtio: Add ability to detach unused buffers from vrings

There's currently no way for a virtio driver to ask for unused
buffers, so it has to keep a list itself to reclaim them at shutdown.
This is redundant, since virtio_ring stores that information.  So
add a new hook to do this.

Signed-off-by: Shirley Ma <xma@us.ibm.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++
 include/linux/virtio.h       |  4 ++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index fbd2ecde93e4..71929ee00d69 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -334,6 +334,30 @@ static bool vring_enable_cb(struct virtqueue *_vq)
 	return true;
 }
 
+static void *vring_detach_unused_buf(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->vring.num; i++) {
+		if (!vq->data[i])
+			continue;
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->data[i];
+		detach_buf(vq, i);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->num_free != vq->vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
 irqreturn_t vring_interrupt(int irq, void *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -360,6 +384,7 @@ static struct virtqueue_ops vring_vq_ops = {
 	.kick = vring_kick,
 	.disable_cb = vring_disable_cb,
 	.enable_cb = vring_enable_cb,
+	.detach_unused_buf = vring_detach_unused_buf,
 };
 
 struct virtqueue *vring_new_virtqueue(unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 057a2e010758..f508c651e53d 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -51,6 +51,9 @@ struct virtqueue {
  *	This re-enables callbacks; it returns "false" if there are pending
  *	buffers in the queue, to detect a possible race between the driver
  *	checking for more work, and enabling callbacks.
+ * @detach_unused_buf: detach first unused buffer
+ * 	vq: the struct virtqueue we're talking about.
+ * 	Returns NULL or the "data" token handed to add_buf
  *
  * Locking rules are straightforward: the driver is responsible for
  * locking.  No two operations may be invoked simultaneously, with the exception
@@ -71,6 +74,7 @@ struct virtqueue_ops {
 
 	void (*disable_cb)(struct virtqueue *vq);
 	bool (*enable_cb)(struct virtqueue *vq);
+	void *(*detach_unused_buf)(struct virtqueue *vq);
 };
 
 /**
-- 
cgit v1.2.3


From f98bfbd78c37c5946cc53089da32a5f741efdeb7 Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Tue, 2 Feb 2010 15:58:48 -0800
Subject: connector: Delete buggy notification code.

On Tue, Feb 02, 2010 at 02:57:14PM -0800, Greg KH (gregkh@suse.de) wrote:
> > There are at least two ways to fix it: using a big cannon and a small
> > one. The former way is to disable notification registration, since it is
> > not used by anyone at all. Second way is to check whether calling
> > process is root and its destination group is -1 (kind of priveledged
> > one) before command is dispatched to workqueue.
>
> Well if no one is using it, removing it makes the most sense, right?
>
> No objection from me, care to make up a patch either way for this?

Getting it is not used, let's drop support for notifications about
(un)registered events from connector.
Another option was to check credentials on receiving, but we can always
restore it without bugs if needed, but genetlink has a wider code base
and none complained, that userspace can not get notification when some
other clients were (un)registered.

Kudos for Sebastian Krahmer <krahmer@suse.de>, who found a bug in the
code.

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c | 175 ------------------------------------------
 include/linux/connector.h     |  32 --------
 2 files changed, 207 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index f06024668f99..537c29ac4487 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -36,17 +36,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Generic userspace <-> kernelspace connector.");
 
-static u32 cn_idx = CN_IDX_CONNECTOR;
-static u32 cn_val = CN_VAL_CONNECTOR;
-
-module_param(cn_idx, uint, 0);
-module_param(cn_val, uint, 0);
-MODULE_PARM_DESC(cn_idx, "Connector's main device idx.");
-MODULE_PARM_DESC(cn_val, "Connector's main device val.");
-
-static DEFINE_MUTEX(notify_lock);
-static LIST_HEAD(notify_list);
-
 static struct cn_dev cdev;
 
 static int cn_already_initialized;
@@ -209,54 +198,6 @@ static void cn_rx_skb(struct sk_buff *__skb)
 	}
 }
 
-/*
- * Notification routing.
- *
- * Gets id and checks if there are notification request for it's idx
- * and val.  If there are such requests notify the listeners with the
- * given notify event.
- *
- */
-static void cn_notify(struct cb_id *id, u32 notify_event)
-{
-	struct cn_ctl_entry *ent;
-
-	mutex_lock(&notify_lock);
-	list_for_each_entry(ent, &notify_list, notify_entry) {
-		int i;
-		struct cn_notify_req *req;
-		struct cn_ctl_msg *ctl = ent->msg;
-		int idx_found, val_found;
-
-		idx_found = val_found = 0;
-
-		req = (struct cn_notify_req *)ctl->data;
-		for (i = 0; i < ctl->idx_notify_num; ++i, ++req) {
-			if (id->idx >= req->first &&
-					id->idx < req->first + req->range) {
-				idx_found = 1;
-				break;
-			}
-		}
-
-		for (i = 0; i < ctl->val_notify_num; ++i, ++req) {
-			if (id->val >= req->first &&
-					id->val < req->first + req->range) {
-				val_found = 1;
-				break;
-			}
-		}
-
-		if (idx_found && val_found) {
-			struct cn_msg m = { .ack = notify_event, };
-
-			memcpy(&m.id, id, sizeof(m.id));
-			cn_netlink_send(&m, ctl->group, GFP_KERNEL);
-		}
-	}
-	mutex_unlock(&notify_lock);
-}
-
 /*
  * Callback add routing - adds callback with given ID and name.
  * If there is registered callback with the same ID it will not be added.
@@ -276,8 +217,6 @@ int cn_add_callback(struct cb_id *id, char *name,
 	if (err)
 		return err;
 
-	cn_notify(id, 0);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cn_add_callback);
@@ -295,111 +234,9 @@ void cn_del_callback(struct cb_id *id)
 	struct cn_dev *dev = &cdev;
 
 	cn_queue_del_callback(dev->cbdev, id);
-	cn_notify(id, 1);
 }
 EXPORT_SYMBOL_GPL(cn_del_callback);
 
-/*
- * Checks two connector's control messages to be the same.
- * Returns 1 if they are the same or if the first one is corrupted.
- */
-static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
-{
-	int i;
-	struct cn_notify_req *req1, *req2;
-
-	if (m1->idx_notify_num != m2->idx_notify_num)
-		return 0;
-
-	if (m1->val_notify_num != m2->val_notify_num)
-		return 0;
-
-	if (m1->len != m2->len)
-		return 0;
-
-	if ((m1->idx_notify_num + m1->val_notify_num) * sizeof(*req1) !=
-	    m1->len)
-		return 1;
-
-	req1 = (struct cn_notify_req *)m1->data;
-	req2 = (struct cn_notify_req *)m2->data;
-
-	for (i = 0; i < m1->idx_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	for (i = 0; i < m1->val_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	return 1;
-}
-
-/*
- * Main connector device's callback.
- *
- * Used for notification of a request's processing.
- */
-static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct cn_ctl_msg *ctl;
-	struct cn_ctl_entry *ent;
-	u32 size;
-
-	if (msg->len < sizeof(*ctl))
-		return;
-
-	ctl = (struct cn_ctl_msg *)msg->data;
-
-	size = (sizeof(*ctl) + ((ctl->idx_notify_num +
-				 ctl->val_notify_num) *
-				sizeof(struct cn_notify_req)));
-
-	if (msg->len != size)
-		return;
-
-	if (ctl->len + sizeof(*ctl) != msg->len)
-		return;
-
-	/*
-	 * Remove notification.
-	 */
-	if (ctl->group == 0) {
-		struct cn_ctl_entry *n;
-
-		mutex_lock(&notify_lock);
-		list_for_each_entry_safe(ent, n, &notify_list, notify_entry) {
-			if (cn_ctl_msg_equals(ent->msg, ctl)) {
-				list_del(&ent->notify_entry);
-				kfree(ent);
-			}
-		}
-		mutex_unlock(&notify_lock);
-
-		return;
-	}
-
-	size += sizeof(*ent);
-
-	ent = kzalloc(size, GFP_KERNEL);
-	if (!ent)
-		return;
-
-	ent->msg = (struct cn_ctl_msg *)(ent + 1);
-
-	memcpy(ent->msg, ctl, size - sizeof(*ent));
-
-	mutex_lock(&notify_lock);
-	list_add(&ent->notify_entry, &notify_list);
-	mutex_unlock(&notify_lock);
-}
-
 static int cn_proc_show(struct seq_file *m, void *v)
 {
 	struct cn_queue_dev *dev = cdev.cbdev;
@@ -437,11 +274,8 @@ static const struct file_operations cn_file_ops = {
 static int __devinit cn_init(void)
 {
 	struct cn_dev *dev = &cdev;
-	int err;
 
 	dev->input = cn_rx_skb;
-	dev->id.idx = cn_idx;
-	dev->id.val = cn_val;
 
 	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR,
 					 CN_NETLINK_USERS + 0xf,
@@ -457,14 +291,6 @@ static int __devinit cn_init(void)
 
 	cn_already_initialized = 1;
 
-	err = cn_add_callback(&dev->id, "connector", &cn_callback);
-	if (err) {
-		cn_already_initialized = 0;
-		cn_queue_free_dev(dev->cbdev);
-		netlink_kernel_release(dev->nls);
-		return -EINVAL;
-	}
-
 	proc_net_fops_create(&init_net, "connector", S_IRUGO, &cn_file_ops);
 
 	return 0;
@@ -478,7 +304,6 @@ static void __devexit cn_fini(void)
 
 	proc_net_remove(&init_net, "connector");
 
-	cn_del_callback(&dev->id);
 	cn_queue_free_dev(dev->cbdev);
 	netlink_kernel_release(dev->nls);
 }
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 72ba63eb83c5..3a779ffba60b 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -24,9 +24,6 @@
 
 #include <linux/types.h>
 
-#define CN_IDX_CONNECTOR		0xffffffff
-#define CN_VAL_CONNECTOR		0xffffffff
-
 /*
  * Process Events connector unique ids -- used for message routing
  */
@@ -75,30 +72,6 @@ struct cn_msg {
 	__u8 data[0];
 };
 
-/*
- * Notify structure - requests notification about
- * registering/unregistering idx/val in range [first, first+range].
- */
-struct cn_notify_req {
-	__u32 first;
-	__u32 range;
-};
-
-/*
- * Main notification control message
- * *_notify_num 	- number of appropriate cn_notify_req structures after 
- *				this struct.
- * group 		- notification receiver's idx.
- * len 			- total length of the attached data.
- */
-struct cn_ctl_msg {
-	__u32 idx_notify_num;
-	__u32 val_notify_num;
-	__u32 group;
-	__u32 len;
-	__u8 data[0];
-};
-
 #ifdef __KERNEL__
 
 #include <asm/atomic.h>
@@ -151,11 +124,6 @@ struct cn_callback_entry {
 	u32 seq, group;
 };
 
-struct cn_ctl_entry {
-	struct list_head notify_entry;
-	struct cn_ctl_msg *msg;
-};
-
 struct cn_dev {
 	struct cb_id id;
 
-- 
cgit v1.2.3


From 24551f64d47af9539a7f324343bffeea09d9dcfa Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 12 Jan 2010 21:25:24 +0000
Subject: lmb: Add lmb_free()

We can free memory allocated with lmb_alloc() by removing it from the
list of reserved LMBs. Rework lmb_remove() to allow that possibility
and add lmb_free() which exploits it.

BenH: Removed some useless parenthesis

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/lmb.h |  1 +
 lib/lmb.c           | 13 +++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index ef82b8fcbddb..f3d14333ebed 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -42,6 +42,7 @@ extern void __init lmb_init(void);
 extern void __init lmb_analyze(void);
 extern long lmb_add(u64 base, u64 size);
 extern long lmb_remove(u64 base, u64 size);
+extern long __init lmb_free(u64 base, u64 size);
 extern long __init lmb_reserve(u64 base, u64 size);
 extern u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
 				u64 (*nid_range)(u64, u64, int *));
diff --git a/lib/lmb.c b/lib/lmb.c
index 9cee17142b2c..b1fc52606524 100644
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -205,9 +205,8 @@ long lmb_add(u64 base, u64 size)
 
 }
 
-long lmb_remove(u64 base, u64 size)
+static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size)
 {
-	struct lmb_region *rgn = &(lmb.memory);
 	u64 rgnbegin, rgnend;
 	u64 end = base + size;
 	int i;
@@ -254,6 +253,16 @@ long lmb_remove(u64 base, u64 size)
 	return lmb_add_region(rgn, end, rgnend - end);
 }
 
+long lmb_remove(u64 base, u64 size)
+{
+	return __lmb_remove(&lmb.memory, base, size);
+}
+
+long __init lmb_free(u64 base, u64 size)
+{
+	return __lmb_remove(&lmb.reserved, base, size);
+}
+
 long __init lmb_reserve(u64 base, u64 size)
 {
 	struct lmb_region *_rgn = &lmb.reserved;
-- 
cgit v1.2.3


From add67461240c1dadc7c8d97e66f8f92b556ca523 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:45:12 +0100
Subject: netfilter: add struct net * to target parameters

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |  2 ++
 net/bridge/netfilter/ebtables.c    | 10 ++++++----
 net/ipv4/netfilter/ip_tables.c     |  8 +++++---
 net/ipv6/netfilter/ip6_tables.c    |  8 +++++---
 4 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 026eb78ee83c..365fabe1b16e 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -249,6 +249,7 @@ struct xt_target_param {
  * Other fields see above.
  */
 struct xt_tgchk_param {
+	struct net *net;
 	const char *table;
 	const void *entryinfo;
 	const struct xt_target *target;
@@ -259,6 +260,7 @@ struct xt_tgchk_param {
 
 /* Target destructor parameters */
 struct xt_tgdtor_param {
+	struct net *net;
 	const struct xt_target *target;
 	void *targinfo;
 	u_int8_t family;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 1aa0e4c1f52d..12beb580aa21 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -579,13 +579,14 @@ ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i)
 }
 
 static inline int
-ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i)
+ebt_cleanup_watcher(struct ebt_entry_watcher *w, struct net *net, unsigned int *i)
 {
 	struct xt_tgdtor_param par;
 
 	if (i && (*i)-- == 0)
 		return 1;
 
+	par.net      = net;
 	par.target   = w->u.watcher;
 	par.targinfo = w->data;
 	par.family   = NFPROTO_BRIDGE;
@@ -606,10 +607,11 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
 	/* we're done */
 	if (cnt && (*cnt)-- == 0)
 		return 1;
-	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL);
+	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL);
 	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 
+	par.net      = net;
 	par.target   = t->u.target;
 	par.targinfo = t->data;
 	par.family   = NFPROTO_BRIDGE;
@@ -674,7 +676,7 @@ ebt_check_entry(struct ebt_entry *e,
 	}
 	i = 0;
 
-	mtpar.net	= net;
+	mtpar.net	= tgpar.net       = net;
 	mtpar.table     = tgpar.table     = name;
 	mtpar.entryinfo = tgpar.entryinfo = e;
 	mtpar.hook_mask = tgpar.hook_mask = hookmask;
@@ -730,7 +732,7 @@ ebt_check_entry(struct ebt_entry *e,
 	(*cnt)++;
 	return 0;
 cleanup_watchers:
-	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j);
+	EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, &j);
 cleanup_matches:
 	EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i);
 	return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index cfaba0e2e6fc..7fde8f6950d8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -638,10 +638,11 @@ err:
 	return ret;
 }
 
-static int check_target(struct ipt_entry *e, const char *name)
+static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 {
 	struct ipt_entry_target *t = ipt_get_target(e);
 	struct xt_tgchk_param par = {
+		.net       = net,
 		.table     = name,
 		.entryinfo = e,
 		.target    = t->u.kernel.target,
@@ -697,7 +698,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	}
 	t->u.kernel.target = target;
 
-	ret = check_target(e, name);
+	ret = check_target(e, net, name);
 	if (ret)
 		goto err;
 
@@ -788,6 +789,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i)
 	IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ipt_get_target(e);
 
+	par.net      = net;
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
 	par.family   = NFPROTO_IPV4;
@@ -1675,7 +1677,7 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	if (ret)
 		goto cleanup_matches;
 
-	ret = check_target(e, name);
+	ret = check_target(e, net, name);
 	if (ret)
 		goto cleanup_matches;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9f1d45f2ba8f..0376ed6d5594 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -669,10 +669,11 @@ err:
 	return ret;
 }
 
-static int check_target(struct ip6t_entry *e, const char *name)
+static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
 {
 	struct ip6t_entry_target *t = ip6t_get_target(e);
 	struct xt_tgchk_param par = {
+		.net       = net,
 		.table     = name,
 		.entryinfo = e,
 		.target    = t->u.kernel.target,
@@ -729,7 +730,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	}
 	t->u.kernel.target = target;
 
-	ret = check_target(e, name);
+	ret = check_target(e, net, name);
 	if (ret)
 		goto err;
 
@@ -820,6 +821,7 @@ cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i)
 	IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL);
 	t = ip6t_get_target(e);
 
+	par.net      = net;
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
 	par.family   = NFPROTO_IPV6;
@@ -1710,7 +1712,7 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	if (ret)
 		goto cleanup_matches;
 
-	ret = check_target(e, name);
+	ret = check_target(e, net, name);
 	if (ret)
 		goto cleanup_matches;
 
-- 
cgit v1.2.3


From 0cebe4b4163b6373c9d24c1a192939777bc27e55 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:51:51 +0100
Subject: netfilter: ctnetlink: support selective event delivery

Add two masks for conntrack end expectation events to struct nf_conntrack_ecache
and use them to filter events. Their default value is "all events" when the
event sysctl is on and "no events" when it is off. A following patch will add
specific initializations. Expectation events depend on the ecache struct of
their master conntrack.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h | 18 ++++++++
 include/net/netfilter/nf_conntrack_ecache.h   | 59 +++++++++++++--------------
 net/netfilter/nf_conntrack_core.c             |  2 +-
 net/netfilter/nf_conntrack_netlink.c          |  2 +-
 4 files changed, 48 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index a374787ed9b0..ebfed90733f7 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -74,6 +74,24 @@ enum ip_conntrack_status {
 	IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
 };
 
+/* Connection tracking event types */
+enum ip_conntrack_events {
+	IPCT_NEW,		/* new conntrack */
+	IPCT_RELATED,		/* related conntrack */
+	IPCT_DESTROY,		/* destroyed conntrack */
+	IPCT_REPLY,		/* connection has seen two-way traffic */
+	IPCT_ASSURED,		/* connection status has changed to assured */
+	IPCT_PROTOINFO,		/* protocol information has changed */
+	IPCT_HELPER,		/* new helper has been set */
+	IPCT_MARK,		/* new mark has been set */
+	IPCT_NATSEQADJ,		/* NAT is doing sequence adjustment */
+	IPCT_SECMARK,		/* new security mark has been set */
+};
+
+enum ip_conntrack_expect_events {
+	IPEXP_NEW,		/* new expectation */
+};
+
 #ifdef __KERNEL__
 struct ip_conntrack_stat {
 	unsigned int searched;
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 5e05fb883ab1..96ba5f7dcab6 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -12,28 +12,12 @@
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
-/* Connection tracking event types */
-enum ip_conntrack_events {
-	IPCT_NEW,		/* new conntrack */
-	IPCT_RELATED,		/* related conntrack */
-	IPCT_DESTROY,		/* destroyed conntrack */
-	IPCT_REPLY,		/* connection has seen two-way traffic */
-	IPCT_ASSURED,		/* connection status has changed to assured */
-	IPCT_PROTOINFO,		/* protocol information has changed */
-	IPCT_HELPER,		/* new helper has been set */
-	IPCT_MARK,		/* new mark has been set */
-	IPCT_NATSEQADJ,		/* NAT is doing sequence adjustment */
-	IPCT_SECMARK,		/* new security mark has been set */
-};
-
-enum ip_conntrack_expect_events {
-	IPEXP_NEW,		/* new expectation */
-};
-
 struct nf_conntrack_ecache {
-	unsigned long cache;		/* bitops want long */
-	unsigned long missed;		/* missed events */
-	u32 pid;			/* netlink pid of destroyer */
+	unsigned long cache;	/* bitops want long */
+	unsigned long missed;	/* missed events */
+	u16 ctmask;		/* bitmask of ct events to be delivered */
+	u16 expmask;		/* bitmask of expect events to be delivered */
+	u32 pid;		/* netlink pid of destroyer */
 };
 
 static inline struct nf_conntrack_ecache *
@@ -43,14 +27,24 @@ nf_ct_ecache_find(const struct nf_conn *ct)
 }
 
 static inline struct nf_conntrack_ecache *
-nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp)
+nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
 {
 	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_ecache *e;
 
-	if (!net->ct.sysctl_events)
+	if (!ctmask && !expmask && net->ct.sysctl_events) {
+		ctmask = ~0;
+		expmask = ~0;
+	}
+	if (!ctmask && !expmask)
 		return NULL;
 
-	return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
+	e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
+	if (e) {
+		e->ctmask  = ctmask;
+		e->expmask = expmask;
+	}
+	return e;
 };
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -83,6 +77,9 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 	if (e == NULL)
 		return;
 
+	if (!(e->ctmask & (1 << event)))
+		return;
+
 	set_bit(event, &e->cache);
 }
 
@@ -93,7 +90,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask,
 			      int report)
 {
 	int ret = 0;
-	struct net *net = nf_ct_net(ct);
 	struct nf_ct_event_notifier *notify;
 	struct nf_conntrack_ecache *e;
 
@@ -102,9 +98,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask,
 	if (notify == NULL)
 		goto out_unlock;
 
-	if (!net->ct.sysctl_events)
-		goto out_unlock;
-
 	e = nf_ct_ecache_find(ct);
 	if (e == NULL)
 		goto out_unlock;
@@ -118,6 +111,9 @@ nf_conntrack_eventmask_report(unsigned int eventmask,
 		/* This is a resent of a destroy event? If so, skip missed */
 		unsigned long missed = e->pid ? 0 : e->missed;
 
+		if (!((eventmask | missed) & e->ctmask))
+			goto out_unlock;
+
 		ret = notify->fcn(eventmask | missed, &item);
 		if (unlikely(ret < 0 || missed)) {
 			spin_lock_bh(&ct->lock);
@@ -173,18 +169,19 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 			  u32 pid,
 			  int report)
 {
-	struct net *net = nf_ct_exp_net(exp);
 	struct nf_exp_event_notifier *notify;
+	struct nf_conntrack_ecache *e;
 
 	rcu_read_lock();
 	notify = rcu_dereference(nf_expect_event_cb);
 	if (notify == NULL)
 		goto out_unlock;
 
-	if (!net->ct.sysctl_events)
+	e = nf_ct_ecache_find(exp->master);
+	if (e == NULL)
 		goto out_unlock;
 
-	{
+	if (e->expmask & (1 << event)) {
 		struct nf_exp_event item = {
 			.exp	= exp,
 			.pid	= pid,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 091ff770eb7b..53b8da6ad6b7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -648,7 +648,7 @@ init_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(net, tuple);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ff594eb138c1..f5c0b09e12f1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1281,7 +1281,7 @@ ctnetlink_create_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
-- 
cgit v1.2.3


From b2a15a604d379af323645e330638e2cfcc696aff Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 14:13:03 +0100
Subject: netfilter: nf_conntrack: support conntrack templates

Support initializing selected parameters of new conntrack entries from a
"conntrack template", which is a specially marked conntrack entry attached
to the skb.

Currently the helper and the event delivery masks can be initialized this
way.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h  |  4 +++
 include/net/netfilter/nf_conntrack.h           |  5 +++
 include/net/netfilter/nf_conntrack_helper.h    |  3 +-
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              | 50 +++++++++++++++++---------
 net/netfilter/nf_conntrack_helper.c            | 17 ++++++---
 net/netfilter/nf_conntrack_netlink.c           |  2 +-
 8 files changed, 61 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index ebfed90733f7..c608677dda60 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -72,6 +72,10 @@ enum ip_conntrack_status {
 	/* Connection has fixed timeout. */
 	IPS_FIXED_TIMEOUT_BIT = 10,
 	IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
+
+	/* Conntrack is a template */
+	IPS_TEMPLATE_BIT = 11,
+	IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a0904adfb8f7..5043d61c99a7 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -272,6 +272,11 @@ nf_conntrack_alloc(struct net *net,
 		   const struct nf_conntrack_tuple *repl,
 		   gfp_t gfp);
 
+static inline int nf_ct_is_template(const struct nf_conn *ct)
+{
+	return test_bit(IPS_TEMPLATE_BIT, &ct->status);
+}
+
 /* It's confirmed if it is, or has been in the hash table. */
 static inline int nf_ct_is_confirmed(struct nf_conn *ct)
 {
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 86be7c4816d6..e17aaa3e19fd 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -47,7 +47,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
 
 extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
 
-extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags);
+extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
+				     gfp_t flags);
 
 extern void nf_ct_helper_destroy(struct nf_conn *ct);
 
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 331ead3ebd1b..77627fa80561 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -59,7 +59,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 	/* Previously seen (loopback)?  Ignore.  Do this before
 	   fragment check. */
-	if (skb->nfct)
+	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
 		return NF_ACCEPT;
 #endif
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 0956ebabbff2..55ce22e5de49 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -212,7 +212,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	struct sk_buff *reasm;
 
 	/* Previously seen (loopback)?  */
-	if (skb->nfct)
+	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
 		return NF_ACCEPT;
 
 	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 53b8da6ad6b7..471e2a79d26f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
 static struct nf_conntrack_tuple_hash *
-init_conntrack(struct net *net,
+init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
 	       struct nf_conntrack_l3proto *l3proto,
 	       struct nf_conntrack_l4proto *l4proto,
@@ -628,6 +628,7 @@ init_conntrack(struct net *net,
 	struct nf_conn *ct;
 	struct nf_conn_help *help;
 	struct nf_conntrack_tuple repl_tuple;
+	struct nf_conntrack_ecache *ecache;
 	struct nf_conntrack_expect *exp;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
@@ -648,7 +649,11 @@ init_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+
+	ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
+	nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
+				 ecache ? ecache->expmask : 0,
+			     GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(net, tuple);
@@ -673,7 +678,7 @@ init_conntrack(struct net *net,
 		nf_conntrack_get(&ct->master->ct_general);
 		NF_CT_STAT_INC(net, expect_new);
 	} else {
-		__nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+		__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
 		NF_CT_STAT_INC(net, new);
 	}
 
@@ -694,7 +699,7 @@ init_conntrack(struct net *net,
 
 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
 static inline struct nf_conn *
-resolve_normal_ct(struct net *net,
+resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 		  struct sk_buff *skb,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
@@ -718,7 +723,8 @@ resolve_normal_ct(struct net *net,
 	/* look for tuple match */
 	h = nf_conntrack_find_get(net, &tuple);
 	if (!h) {
-		h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
+		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+				   skb, dataoff);
 		if (!h)
 			return NULL;
 		if (IS_ERR(h))
@@ -755,7 +761,7 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
 {
-	struct nf_conn *ct;
+	struct nf_conn *ct, *tmpl = NULL;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_l3proto *l3proto;
 	struct nf_conntrack_l4proto *l4proto;
@@ -764,10 +770,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	int set_reply = 0;
 	int ret;
 
-	/* Previously seen (loopback or untracked)?  Ignore. */
 	if (skb->nfct) {
-		NF_CT_STAT_INC_ATOMIC(net, ignore);
-		return NF_ACCEPT;
+		/* Previously seen (loopback or untracked)?  Ignore. */
+		tmpl = (struct nf_conn *)skb->nfct;
+		if (!nf_ct_is_template(tmpl)) {
+			NF_CT_STAT_INC_ATOMIC(net, ignore);
+			return NF_ACCEPT;
+		}
+		skb->nfct = NULL;
 	}
 
 	/* rcu_read_lock()ed by nf_hook_slow */
@@ -778,7 +788,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		pr_debug("not prepared to track yet or error occured\n");
 		NF_CT_STAT_INC_ATOMIC(net, error);
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		return -ret;
+		ret = -ret;
+		goto out;
 	}
 
 	l4proto = __nf_ct_l4proto_find(pf, protonum);
@@ -791,22 +802,25 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		if (ret <= 0) {
 			NF_CT_STAT_INC_ATOMIC(net, error);
 			NF_CT_STAT_INC_ATOMIC(net, invalid);
-			return -ret;
+			ret = -ret;
+			goto out;
 		}
 	}
 
-	ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
+	ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
 			       l3proto, l4proto, &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		return NF_ACCEPT;
+		ret = NF_ACCEPT;
+		goto out;
 	}
 
 	if (IS_ERR(ct)) {
 		/* Too stressed to deal. */
 		NF_CT_STAT_INC_ATOMIC(net, drop);
-		return NF_DROP;
+		ret = NF_DROP;
+		goto out;
 	}
 
 	NF_CT_ASSERT(skb->nfct);
@@ -821,11 +835,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		if (ret == -NF_DROP)
 			NF_CT_STAT_INC_ATOMIC(net, drop);
-		return -ret;
+		ret = -ret;
+		goto out;
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
 		nf_conntrack_event_cache(IPCT_REPLY, ct);
+out:
+	if (tmpl)
+		nf_ct_put(tmpl);
 
 	return ret;
 }
@@ -864,7 +882,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 		return;
 
 	rcu_read_lock();
-	__nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+	__nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index c0e461f466ae..8144b0da5515 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -96,13 +96,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
 
-int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags)
+int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
+			      gfp_t flags)
 {
+	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conn_help *help;
 	int ret = 0;
-	struct nf_conntrack_helper *helper;
-	struct nf_conn_help *help = nfct_help(ct);
 
-	helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	if (tmpl != NULL) {
+		help = nfct_help(tmpl);
+		if (help != NULL)
+			helper = help->helper;
+	}
+
+	help = nfct_help(ct);
+	if (helper == NULL)
+		helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 	if (helper == NULL) {
 		if (help)
 			rcu_assign_pointer(help->helper, NULL);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f5c0b09e12f1..09044f9f4b2e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1249,7 +1249,7 @@ ctnetlink_create_conntrack(struct net *net,
 		}
 	} else {
 		/* try an implicit helper assignation */
-		err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+		err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
 		if (err < 0)
 			goto err2;
 	}
-- 
cgit v1.2.3


From d4bfa033ed84e0ae446eff445d107ffd5ee78df3 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 29 Jan 2010 15:03:36 +0100
Subject: HID: make raw reports possible for both feature and output reports

In commit 2da31939a42 ("Bluetooth: Implement raw output support for HIDP
layer"), support for Bluetooth hid_output_raw_report was added, but it
pushes the data to the intr socket instead of the ctrl one. This has been
fixed by 6bf8268f9a91f1 ("Bluetooth: Use the control channel for raw HID reports")

Still, it is necessary to distinguish whether the report in question should be
either FEATURE or OUTPUT. For this, we have to extend the generic HID API,
so that hid_output_raw_report() callback provides means to specify this
value so that it can be passed down to lower level hardware drivers (currently
Bluetooth and USB).

Based on original patch by Bastien Nocera <hadess@hadess.net>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hidraw.c          |  2 +-
 drivers/hid/usbhid/hid-core.c |  5 +++--
 include/linux/hid.h           |  2 +-
 net/bluetooth/hidp/core.c     | 17 ++++++++++++++---
 4 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index cdd136942bca..d04476700b7b 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -134,7 +134,7 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t
 		goto out;
 	}
 
-	ret = dev->hid_output_raw_report(dev, buf, count);
+	ret = dev->hid_output_raw_report(dev, buf, count, HID_OUTPUT_REPORT);
 out:
 	kfree(buf);
 	return ret;
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index e2997a8d5e1b..caa16c057ce2 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -774,7 +774,8 @@ static int hid_alloc_buffers(struct usb_device *dev, struct hid_device *hid)
 	return 0;
 }
 
-static int usbhid_output_raw_report(struct hid_device *hid, __u8 *buf, size_t count)
+static int usbhid_output_raw_report(struct hid_device *hid, __u8 *buf, size_t count,
+		unsigned char report_type)
 {
 	struct usbhid_device *usbhid = hid->driver_data;
 	struct usb_device *dev = hid_to_usb_dev(hid);
@@ -785,7 +786,7 @@ static int usbhid_output_raw_report(struct hid_device *hid, __u8 *buf, size_t co
 	ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 		HID_REQ_SET_REPORT,
 		USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-		((HID_OUTPUT_REPORT + 1) << 8) | *buf,
+		((report_type + 1) << 8) | *buf,
 		interface->desc.bInterfaceNumber, buf + 1, count - 1,
 		USB_CTRL_SET_TIMEOUT);
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 87093652dda8..3661a626941d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -501,7 +501,7 @@ struct hid_device {							/* device report descriptor */
 	void (*hiddev_report_event) (struct hid_device *, struct hid_report *);
 
 	/* handler for raw output data, used by hidraw */
-	int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t);
+	int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t, unsigned char);
 
 	/* debugging support via debugfs */
 	unsigned short debug;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 6cf526d06e21..37ba153c4cd4 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -313,10 +313,21 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep
 	return hidp_queue_report(session, buf, rsize);
 }
 
-static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count)
+static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count,
+		unsigned char report_type)
 {
-	if (hidp_send_ctrl_message(hid->driver_data,
-			HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE,
+	switch (report_type) {
+	case HID_FEATURE_REPORT:
+		report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
+		break;
+	case HID_OUTPUT_REPORT:
+		report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (hidp_send_ctrl_message(hid->driver_data, report_type,
 			data, count))
 		return -ENOMEM;
 	return count;
-- 
cgit v1.2.3


From 84f3bb9ae9db90f7fb15d98b55279a58ab1b2363 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 17:17:06 +0100
Subject: netfilter: xtables: add CT target

Add a new target for the raw table, which can be used to specify conntrack
parameters for specific connections, f.i. the conntrack helper.

The target attaches a "template" connection tracking entry to the skb, which
is used by the conntrack core when initializing a new conntrack.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild              |   1 +
 include/linux/netfilter/xt_CT.h             |  17 +++
 include/net/netfilter/nf_conntrack_helper.h |   3 +
 net/netfilter/Kconfig                       |  12 +++
 net/netfilter/Makefile                      |   1 +
 net/netfilter/nf_conntrack_helper.c         |  19 ++++
 net/netfilter/xt_CT.c                       | 158 ++++++++++++++++++++++++++++
 7 files changed, 211 insertions(+)
 create mode 100644 include/linux/netfilter/xt_CT.h
 create mode 100644 net/netfilter/xt_CT.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 2aea50399c0b..a5a63e41b8af 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -6,6 +6,7 @@ header-y += nfnetlink_queue.h
 header-y += xt_CLASSIFY.h
 header-y += xt_CONNMARK.h
 header-y += xt_CONNSECMARK.h
+header-y += xt_CT.h
 header-y += xt_DSCP.h
 header-y += xt_LED.h
 header-y += xt_MARK.h
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
new file mode 100644
index 000000000000..7fd0effe1316
--- /dev/null
+++ b/include/linux/netfilter/xt_CT.h
@@ -0,0 +1,17 @@
+#ifndef _XT_CT_H
+#define _XT_CT_H
+
+#define XT_CT_NOTRACK	0x1
+
+struct xt_ct_target_info {
+	u_int16_t	flags;
+	u_int16_t	__unused;
+	u_int32_t	ct_events;
+	u_int32_t	exp_events;
+	char		helper[16];
+
+	/* Used internally by the kernel */
+	struct nf_conn	*ct __attribute__((aligned(8)));
+};
+
+#endif /* _XT_CT_H */
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index e17aaa3e19fd..32c305dbdab6 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -42,6 +42,9 @@ struct nf_conntrack_helper {
 extern struct nf_conntrack_helper *
 __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum);
 
+extern struct nf_conntrack_helper *
+nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum);
+
 extern int nf_conntrack_helper_register(struct nf_conntrack_helper *);
 extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 634d14affc8d..4469d45261f4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -341,6 +341,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_CT
+	tristate '"CT" target support'
+	depends on NF_CONNTRACK
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
+	help
+	  This options adds a `CT' target, which allows to specify initial
+	  connection tracking parameters like events to be delivered and
+	  the helper to be used.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_DSCP
 	tristate '"DSCP" and "TOS" target support'
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 49f62ee4e9ff..f873644f02f6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8144b0da5515..a74a5769877b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -83,6 +83,25 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
 }
 EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find);
 
+struct nf_conntrack_helper *
+nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
+{
+	struct nf_conntrack_helper *h;
+
+	h = __nf_conntrack_helper_find(name, l3num, protonum);
+#ifdef CONFIG_MODULES
+	if (h == NULL) {
+		if (request_module("nfct-helper-%s", name) == 0)
+			h = __nf_conntrack_helper_find(name, l3num, protonum);
+	}
+#endif
+	if (h != NULL && !try_module_get(h->me))
+		h = NULL;
+
+	return h;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
+
 struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
 	struct nf_conn_help *help;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
new file mode 100644
index 000000000000..8183a054256f
--- /dev/null
+++ b/net/netfilter/xt_CT.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/selinux.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CT.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+
+static unsigned int xt_ct_target(struct sk_buff *skb,
+				 const struct xt_target_param *par)
+{
+	const struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
+
+	/* Previously seen (loopback)? Ignore. */
+	if (skb->nfct != NULL)
+		return XT_CONTINUE;
+
+	atomic_inc(&ct->ct_general.use);
+	skb->nfct = &ct->ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+
+	return XT_CONTINUE;
+}
+
+static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
+{
+	if (par->family == AF_INET) {
+		const struct ipt_entry *e = par->entryinfo;
+
+		if (e->ip.invflags & IPT_INV_PROTO)
+			return 0;
+		return e->ip.proto;
+	} else if (par->family == AF_INET6) {
+		const struct ip6t_entry *e = par->entryinfo;
+
+		if (e->ipv6.invflags & IP6T_INV_PROTO)
+			return 0;
+		return e->ipv6.proto;
+	} else
+		return 0;
+}
+
+static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conntrack_tuple t;
+	struct nf_conn_help *help;
+	struct nf_conn *ct;
+	u8 proto;
+
+	if (info->flags & ~XT_CT_NOTRACK)
+		return false;
+
+	if (info->flags & XT_CT_NOTRACK) {
+		ct = &nf_conntrack_untracked;
+		atomic_inc(&ct->ct_general.use);
+		goto out;
+	}
+
+	if (nf_ct_l3proto_try_module_get(par->family) < 0)
+		goto err1;
+
+	memset(&t, 0, sizeof(t));
+	ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL);
+	if (IS_ERR(ct))
+		goto err2;
+
+	if ((info->ct_events || info->exp_events) &&
+	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
+				  GFP_KERNEL))
+		goto err3;
+
+	if (info->helper[0]) {
+		proto = xt_ct_find_proto(par);
+		if (!proto)
+			goto err3;
+
+		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+		if (help == NULL)
+			goto err3;
+
+		help->helper = nf_conntrack_helper_try_module_get(info->helper,
+								  par->family,
+								  proto);
+		if (help->helper == NULL)
+			goto err3;
+	}
+
+	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
+	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
+out:
+	info->ct = ct;
+	return true;
+
+err3:
+	nf_conntrack_free(ct);
+err2:
+	nf_ct_l3proto_module_put(par->family);
+err1:
+	return false;
+}
+
+static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
+	struct nf_conn_help *help;
+
+	if (ct != &nf_conntrack_untracked) {
+		help = nfct_help(ct);
+		if (help)
+			module_put(help->helper->me);
+
+		nf_ct_l3proto_module_put(par->family);
+	}
+	nf_ct_put(info->ct);
+}
+
+static struct xt_target xt_ct_tg __read_mostly = {
+	.name		= "CT",
+	.family		= NFPROTO_UNSPEC,
+	.targetsize	= XT_ALIGN(sizeof(struct xt_ct_target_info)),
+	.checkentry	= xt_ct_tg_check,
+	.destroy	= xt_ct_tg_destroy,
+	.target		= xt_ct_target,
+	.table		= "raw",
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_ct_tg_init(void)
+{
+	return xt_register_target(&xt_ct_tg);
+}
+
+static void __exit xt_ct_tg_exit(void)
+{
+	xt_unregister_target(&xt_ct_tg);
+}
+
+module_init(xt_ct_tg_init);
+module_exit(xt_ct_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: connection tracking target");
+MODULE_ALIAS("ipt_CT");
+MODULE_ALIAS("ip6t_CT");
-- 
cgit v1.2.3


From cd757645fbdc34a8343c04bb0e74e06fccc2cb10 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Sat, 30 Jan 2010 10:25:18 +0530
Subject: perf: Make bp_len type to u64 generic across the arch

Change 'bp_len' type to __u64 to make it work across archs as
the s390 architecture watch point length can be upto 2^64.

reference:
	http://lkml.org/lkml/2010/1/25/212

This is an ABI change that is not backward compatible with
the previous hardware breakpoint info layout integrated in this
development cycle, a rebuilt of perf tools is necessary for
versions based on 2.6.33-rc1 - 2.6.33-rc6 to work with a
kernel based on this patch.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: "K. Prasad" <prasad@linux.vnet.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin <schwidefsky@de.ibm.com>
LKML-Reference: <20100130045518.GA20776@in.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/hw_breakpoint.h | 2 +-
 include/linux/perf_event.h    | 6 ++----
 kernel/hw_breakpoint.c        | 2 +-
 kernel/perf_event.c           | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 070ba0621738..5977b724f7c6 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -44,7 +44,7 @@ static inline int hw_breakpoint_type(struct perf_event *bp)
 	return bp->attr.bp_type;
 }
 
-static inline int hw_breakpoint_len(struct perf_event *bp)
+static inline unsigned long hw_breakpoint_len(struct perf_event *bp)
 {
 	return bp->attr.bp_len;
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8fa71874113f..a177698d95e2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -211,11 +211,9 @@ struct perf_event_attr {
 		__u32		wakeup_watermark; /* bytes before wakeup   */
 	};
 
-	__u32			__reserved_2;
-
-	__u64			bp_addr;
 	__u32			bp_type;
-	__u32			bp_len;
+	__u64			bp_addr;
+	__u64			bp_len;
 };
 
 /*
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 8a5c7d55ac9f..967e66143e11 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -360,8 +360,8 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
 	u64 old_addr = bp->attr.bp_addr;
+	u64 old_len = bp->attr.bp_len;
 	int old_type = bp->attr.bp_type;
-	int old_len = bp->attr.bp_len;
 	int err = 0;
 
 	perf_event_disable(bp);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index d27746bd3a06..2b19297742cb 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4580,7 +4580,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (attr->type >= PERF_TYPE_MAX)
 		return -EINVAL;
 
-	if (attr->__reserved_1 || attr->__reserved_2)
+	if (attr->__reserved_1)
 		return -EINVAL;
 
 	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-- 
cgit v1.2.3


From 002345925e6c45861f60db6f4fc6236713fd8847 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Wed, 3 Feb 2010 15:36:43 -0800
Subject: syslog: distinguish between /proc/kmsg and syscalls

This allows the LSM to distinguish between syslog functions originating
from /proc/kmsg access and direct syscalls.  By default, the commoncaps
will now no longer require CAP_SYS_ADMIN to read an opened /proc/kmsg
file descriptor.  For example the kernel syslog reader can now drop
privileges after opening /proc/kmsg, instead of staying privileged with
CAP_SYS_ADMIN.  MAC systems that implement security_syslog have unchanged
behavior.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/proc/kmsg.c             | 14 +++++++-------
 include/linux/security.h   | 11 ++++++-----
 include/linux/syslog.h     | 29 +++++++++++++++++++++++++++++
 kernel/printk.c            |  7 ++++---
 security/commoncap.c       |  7 ++++++-
 security/security.c        |  4 ++--
 security/selinux/hooks.c   |  5 +++--
 security/smack/smack_lsm.c |  4 ++--
 8 files changed, 59 insertions(+), 22 deletions(-)
 create mode 100644 include/linux/syslog.h

(limited to 'include/linux')

diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 7ca78346d3f0..6a3d843a1088 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -12,37 +12,37 @@
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/fs.h>
+#include <linux/syslog.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
 extern wait_queue_head_t log_wait;
 
-extern int do_syslog(int type, char __user *bug, int count);
-
 static int kmsg_open(struct inode * inode, struct file * file)
 {
-	return do_syslog(1,NULL,0);
+	return do_syslog(1, NULL, 0, SYSLOG_FROM_FILE);
 }
 
 static int kmsg_release(struct inode * inode, struct file * file)
 {
-	(void) do_syslog(0,NULL,0);
+	(void) do_syslog(0, NULL, 0, SYSLOG_FROM_FILE);
 	return 0;
 }
 
 static ssize_t kmsg_read(struct file *file, char __user *buf,
 			 size_t count, loff_t *ppos)
 {
-	if ((file->f_flags & O_NONBLOCK) && !do_syslog(9, NULL, 0))
+	if ((file->f_flags & O_NONBLOCK) &&
+	    !do_syslog(9, NULL, 0, SYSLOG_FROM_FILE))
 		return -EAGAIN;
-	return do_syslog(2, buf, count);
+	return do_syslog(2, buf, count, SYSLOG_FROM_FILE);
 }
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &log_wait, wait);
-	if (do_syslog(9, NULL, 0))
+	if (do_syslog(9, NULL, 0, SYSLOG_FROM_FILE))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
diff --git a/include/linux/security.h b/include/linux/security.h
index 26eca85b2417..a4dc74d86ac6 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -76,7 +76,7 @@ extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp);
 extern int cap_task_setioprio(struct task_struct *p, int ioprio);
 extern int cap_task_setnice(struct task_struct *p, int nice);
-extern int cap_syslog(int type);
+extern int cap_syslog(int type, bool from_file);
 extern int cap_vm_enough_memory(struct mm_struct *mm, long pages);
 
 struct msghdr;
@@ -1349,6 +1349,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	logging to the console.
  *	See the syslog(2) manual page for an explanation of the @type values.
  *	@type contains the type of action.
+ *	@from_file indicates the context of action (if it came from /proc).
  *	Return 0 if permission is granted.
  * @settime:
  *	Check permission to change the system time.
@@ -1463,7 +1464,7 @@ struct security_operations {
 	int (*sysctl) (struct ctl_table *table, int op);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
 	int (*quota_on) (struct dentry *dentry);
-	int (*syslog) (int type);
+	int (*syslog) (int type, bool from_file);
 	int (*settime) (struct timespec *ts, struct timezone *tz);
 	int (*vm_enough_memory) (struct mm_struct *mm, long pages);
 
@@ -1762,7 +1763,7 @@ int security_acct(struct file *file);
 int security_sysctl(struct ctl_table *table, int op);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
 int security_quota_on(struct dentry *dentry);
-int security_syslog(int type);
+int security_syslog(int type, bool from_file);
 int security_settime(struct timespec *ts, struct timezone *tz);
 int security_vm_enough_memory(long pages);
 int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
@@ -2008,9 +2009,9 @@ static inline int security_quota_on(struct dentry *dentry)
 	return 0;
 }
 
-static inline int security_syslog(int type)
+static inline int security_syslog(int type, bool from_file)
 {
-	return cap_syslog(type);
+	return cap_syslog(type, from_file);
 }
 
 static inline int security_settime(struct timespec *ts, struct timezone *tz)
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
new file mode 100644
index 000000000000..5f02b1817be1
--- /dev/null
+++ b/include/linux/syslog.h
@@ -0,0 +1,29 @@
+/*  Syslog internals
+ *
+ *  Copyright 2010 Canonical, Ltd.
+ *  Author: Kees Cook <kees.cook@canonical.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_SYSLOG_H
+#define _LINUX_SYSLOG_H
+
+#define SYSLOG_FROM_CALL 0
+#define SYSLOG_FROM_FILE 1
+
+int do_syslog(int type, char __user *buf, int count, bool from_file);
+
+#endif /* _LINUX_SYSLOG_H */
diff --git a/kernel/printk.c b/kernel/printk.c
index 17463ca2e229..809cf9a258a0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -35,6 +35,7 @@
 #include <linux/kexec.h>
 #include <linux/ratelimit.h>
 #include <linux/kmsg_dump.h>
+#include <linux/syslog.h>
 
 #include <asm/uaccess.h>
 
@@ -273,14 +274,14 @@ static inline void boot_delay_msec(void)
  *	9 -- Return number of unread characters in the log buffer
  *     10 -- Return size of the log buffer
  */
-int do_syslog(int type, char __user *buf, int len)
+int do_syslog(int type, char __user *buf, int len, bool from_file)
 {
 	unsigned i, j, limit, count;
 	int do_clear = 0;
 	char c;
 	int error = 0;
 
-	error = security_syslog(type);
+	error = security_syslog(type, from_file);
 	if (error)
 		return error;
 
@@ -417,7 +418,7 @@ out:
 
 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
 {
-	return do_syslog(type, buf, len);
+	return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
 }
 
 /*
diff --git a/security/commoncap.c b/security/commoncap.c
index f800fdb3de94..677fad9d5cba 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -27,6 +27,7 @@
 #include <linux/sched.h>
 #include <linux/prctl.h>
 #include <linux/securebits.h>
+#include <linux/syslog.h>
 
 /*
  * If a non-root user executes a setuid-root binary in
@@ -888,12 +889,16 @@ error:
 /**
  * cap_syslog - Determine whether syslog function is permitted
  * @type: Function requested
+ * @from_file: Whether this request came from an open file (i.e. /proc)
  *
  * Determine whether the current process is permitted to use a particular
  * syslog function, returning 0 if permission is granted, -ve if not.
  */
-int cap_syslog(int type)
+int cap_syslog(int type, bool from_file)
 {
+	/* /proc/kmsg can open be opened by CAP_SYS_ADMIN */
+	if (type != 1 && from_file)
+		return 0;
 	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	return 0;
diff --git a/security/security.c b/security/security.c
index 440afe5eb54c..971092c06f31 100644
--- a/security/security.c
+++ b/security/security.c
@@ -203,9 +203,9 @@ int security_quota_on(struct dentry *dentry)
 	return security_ops->quota_on(dentry);
 }
 
-int security_syslog(int type)
+int security_syslog(int type, bool from_file)
 {
-	return security_ops->syslog(type);
+	return security_ops->syslog(type, from_file);
 }
 
 int security_settime(struct timespec *ts, struct timezone *tz)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9a2ee845e9d4..a4862a0730fa 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -76,6 +76,7 @@
 #include <linux/selinux.h>
 #include <linux/mutex.h>
 #include <linux/posix-timers.h>
+#include <linux/syslog.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -2049,11 +2050,11 @@ static int selinux_quota_on(struct dentry *dentry)
 	return dentry_has_perm(cred, NULL, dentry, FILE__QUOTAON);
 }
 
-static int selinux_syslog(int type)
+static int selinux_syslog(int type, bool from_file)
 {
 	int rc;
 
-	rc = cap_syslog(type);
+	rc = cap_syslog(type, from_file);
 	if (rc)
 		return rc;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 529c9ca65878..a5721b373f53 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -157,12 +157,12 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
  *
  * Returns 0 on success, error code otherwise.
  */
-static int smack_syslog(int type)
+static int smack_syslog(int type, bool from_file)
 {
 	int rc;
 	char *sp = current_security();
 
-	rc = cap_syslog(type);
+	rc = cap_syslog(type, from_file);
 	if (rc != 0)
 		return rc;
 
-- 
cgit v1.2.3


From d78ca3cd733d8a2c3dcd88471beb1a15d973eed8 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Wed, 3 Feb 2010 15:37:13 -0800
Subject: syslog: use defined constants instead of raw numbers

Right now the syslog "type" action are just raw numbers which makes
the source difficult to follow.  This patch replaces the raw numbers
with defined constants for some level of sanity.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/proc/kmsg.c           | 10 +++++-----
 include/linux/syslog.h   | 23 +++++++++++++++++++++++
 kernel/printk.c          | 45 +++++++++++++++++++--------------------------
 security/commoncap.c     |  5 +++--
 security/selinux/hooks.c | 21 +++++++++++----------
 5 files changed, 61 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 6a3d843a1088..cfe90a48a6e8 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait;
 
 static int kmsg_open(struct inode * inode, struct file * file)
 {
-	return do_syslog(1, NULL, 0, SYSLOG_FROM_FILE);
+	return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE);
 }
 
 static int kmsg_release(struct inode * inode, struct file * file)
 {
-	(void) do_syslog(0, NULL, 0, SYSLOG_FROM_FILE);
+	(void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE);
 	return 0;
 }
 
@@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf,
 			 size_t count, loff_t *ppos)
 {
 	if ((file->f_flags & O_NONBLOCK) &&
-	    !do_syslog(9, NULL, 0, SYSLOG_FROM_FILE))
+	    !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
 		return -EAGAIN;
-	return do_syslog(2, buf, count, SYSLOG_FROM_FILE);
+	return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE);
 }
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &log_wait, wait);
-	if (do_syslog(9, NULL, 0, SYSLOG_FROM_FILE))
+	if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 5f02b1817be1..38911391a139 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -21,6 +21,29 @@
 #ifndef _LINUX_SYSLOG_H
 #define _LINUX_SYSLOG_H
 
+/* Close the log.  Currently a NOP. */
+#define SYSLOG_ACTION_CLOSE          0
+/* Open the log. Currently a NOP. */
+#define SYSLOG_ACTION_OPEN           1
+/* Read from the log. */
+#define SYSLOG_ACTION_READ           2
+/* Read all messages remaining in the ring buffer. */
+#define SYSLOG_ACTION_READ_ALL       3
+/* Read and clear all messages remaining in the ring buffer */
+#define SYSLOG_ACTION_READ_CLEAR     4
+/* Clear ring buffer. */
+#define SYSLOG_ACTION_CLEAR          5
+/* Disable printk's to console */
+#define SYSLOG_ACTION_CONSOLE_OFF    6
+/* Enable printk's to console */
+#define SYSLOG_ACTION_CONSOLE_ON     7
+/* Set level of messages printed to console */
+#define SYSLOG_ACTION_CONSOLE_LEVEL  8
+/* Return number of unread characters in the log buffer */
+#define SYSLOG_ACTION_SIZE_UNREAD    9
+/* Return size of the log buffer */
+#define SYSLOG_ACTION_SIZE_BUFFER   10
+
 #define SYSLOG_FROM_CALL 0
 #define SYSLOG_FROM_FILE 1
 
diff --git a/kernel/printk.c b/kernel/printk.c
index 809cf9a258a0..3e162d867098 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -259,21 +259,6 @@ static inline void boot_delay_msec(void)
 }
 #endif
 
-/*
- * Commands to do_syslog:
- *
- * 	0 -- Close the log.  Currently a NOP.
- * 	1 -- Open the log. Currently a NOP.
- * 	2 -- Read from the log.
- * 	3 -- Read all messages remaining in the ring buffer.
- * 	4 -- Read and clear all messages remaining in the ring buffer
- * 	5 -- Clear ring buffer.
- * 	6 -- Disable printk's to console
- * 	7 -- Enable printk's to console
- *	8 -- Set level of messages printed to console
- *	9 -- Return number of unread characters in the log buffer
- *     10 -- Return size of the log buffer
- */
 int do_syslog(int type, char __user *buf, int len, bool from_file)
 {
 	unsigned i, j, limit, count;
@@ -286,11 +271,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 		return error;
 
 	switch (type) {
-	case 0:		/* Close log */
+	case SYSLOG_ACTION_CLOSE:	/* Close log */
 		break;
-	case 1:		/* Open log */
+	case SYSLOG_ACTION_OPEN:	/* Open log */
 		break;
-	case 2:		/* Read from log */
+	case SYSLOG_ACTION_READ:	/* Read from log */
 		error = -EINVAL;
 		if (!buf || len < 0)
 			goto out;
@@ -321,10 +306,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 		if (!error)
 			error = i;
 		break;
-	case 4:		/* Read/clear last kernel messages */
+	/* Read/clear last kernel messages */
+	case SYSLOG_ACTION_READ_CLEAR:
 		do_clear = 1;
 		/* FALL THRU */
-	case 3:		/* Read last kernel messages */
+	/* Read last kernel messages */
+	case SYSLOG_ACTION_READ_ALL:
 		error = -EINVAL;
 		if (!buf || len < 0)
 			goto out;
@@ -377,21 +364,25 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 			}
 		}
 		break;
-	case 5:		/* Clear ring buffer */
+	/* Clear ring buffer */
+	case SYSLOG_ACTION_CLEAR:
 		logged_chars = 0;
 		break;
-	case 6:		/* Disable logging to console */
+	/* Disable logging to console */
+	case SYSLOG_ACTION_CONSOLE_OFF:
 		if (saved_console_loglevel == -1)
 			saved_console_loglevel = console_loglevel;
 		console_loglevel = minimum_console_loglevel;
 		break;
-	case 7:		/* Enable logging to console */
+	/* Enable logging to console */
+	case SYSLOG_ACTION_CONSOLE_ON:
 		if (saved_console_loglevel != -1) {
 			console_loglevel = saved_console_loglevel;
 			saved_console_loglevel = -1;
 		}
 		break;
-	case 8:		/* Set level of messages printed to console */
+	/* Set level of messages printed to console */
+	case SYSLOG_ACTION_CONSOLE_LEVEL:
 		error = -EINVAL;
 		if (len < 1 || len > 8)
 			goto out;
@@ -402,10 +393,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 		saved_console_loglevel = -1;
 		error = 0;
 		break;
-	case 9:		/* Number of chars in the log buffer */
+	/* Number of chars in the log buffer */
+	case SYSLOG_ACTION_SIZE_UNREAD:
 		error = log_end - log_start;
 		break;
-	case 10:	/* Size of the log buffer */
+	/* Size of the log buffer */
+	case SYSLOG_ACTION_SIZE_BUFFER:
 		error = log_buf_len;
 		break;
 	default:
diff --git a/security/commoncap.c b/security/commoncap.c
index 677fad9d5cba..cf01b2eebb60 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -897,9 +897,10 @@ error:
 int cap_syslog(int type, bool from_file)
 {
 	/* /proc/kmsg can open be opened by CAP_SYS_ADMIN */
-	if (type != 1 && from_file)
+	if (type != SYSLOG_ACTION_OPEN && from_file)
 		return 0;
-	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
+	if ((type != SYSLOG_ACTION_READ_ALL &&
+	     type != SYSLOG_ACTION_SIZE_BUFFER) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a4862a0730fa..6b36ce2eef2e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2059,20 +2059,21 @@ static int selinux_syslog(int type, bool from_file)
 		return rc;
 
 	switch (type) {
-	case 3:		/* Read last kernel messages */
-	case 10:	/* Return size of the log buffer */
+	case SYSLOG_ACTION_READ_ALL:	/* Read last kernel messages */
+	case SYSLOG_ACTION_SIZE_BUFFER:	/* Return size of the log buffer */
 		rc = task_has_system(current, SYSTEM__SYSLOG_READ);
 		break;
-	case 6:		/* Disable logging to console */
-	case 7:		/* Enable logging to console */
-	case 8:		/* Set level of messages printed to console */
+	case SYSLOG_ACTION_CONSOLE_OFF:	/* Disable logging to console */
+	case SYSLOG_ACTION_CONSOLE_ON:	/* Enable logging to console */
+	/* Set level of messages printed to console */
+	case SYSLOG_ACTION_CONSOLE_LEVEL:
 		rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE);
 		break;
-	case 0:		/* Close log */
-	case 1:		/* Open log */
-	case 2:		/* Read from log */
-	case 4:		/* Read/clear last kernel messages */
-	case 5:		/* Clear ring buffer */
+	case SYSLOG_ACTION_CLOSE:	/* Close log */
+	case SYSLOG_ACTION_OPEN:	/* Open log */
+	case SYSLOG_ACTION_READ:	/* Read from log */
+	case SYSLOG_ACTION_READ_CLEAR:	/* Read/clear last kernel messages */
+	case SYSLOG_ACTION_CLEAR:	/* Clear ring buffer */
 	default:
 		rc = task_has_system(current, SYSTEM__SYSLOG_MOD);
 		break;
-- 
cgit v1.2.3


From 8a83a00b0735190384a348156837918271034144 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 30 Jan 2010 12:23:03 +0000
Subject: net: maintain namespace isolation between vlan and real device

In the vlan and macvlan drivers, the start_xmit function forwards
data to the dev_queue_xmit function for another device, which may
potentially belong to a different namespace.

To make sure that classification stays within a single namespace,
this resets the potentially critical fields.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     |  2 +-
 include/linux/netdevice.h |  9 +++++++++
 net/8021q/vlan_dev.c      |  2 +-
 net/core/dev.c            | 35 +++++++++++++++++++++++++++++++----
 4 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index fa0dc514dbaf..d32e0bdfc5e9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -269,7 +269,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 xmit_world:
-	skb->dev = vlan->lowerdev;
+	skb_set_dev(skb, vlan->lowerdev);
 	return dev_queue_xmit(skb);
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 93a32a5ca74f..622ba5aa93c4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1004,6 +1004,15 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev)
 	return 0;
 }
 
+#ifndef CONFIG_NET_NS
+static inline void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
+{
+	skb->dev = dev;
+}
+#else /* CONFIG_NET_NS */
+void skb_set_dev(struct sk_buff *skb, struct net_device *dev);
+#endif
+
 static inline bool netdev_uses_trailer_tags(struct net_device *dev)
 {
 #ifdef CONFIG_NET_DSA_TAG_TRAILER
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a9e1f1785614..9e83272fc5b0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -322,7 +322,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	}
 
 
-	skb->dev = vlan_dev_info(dev)->real_dev;
+	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 2cba5c521e56..94c1eeed25e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1448,13 +1448,10 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 	if (skb->len > (dev->mtu + dev->hard_header_len))
 		return NET_RX_DROP;
 
-	skb_dst_drop(skb);
+	skb_set_dev(skb, dev);
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->mark = 0;
-	secpath_reset(skb);
-	nf_reset(skb);
 	return netif_rx(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1614,6 +1611,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
+/**
+ * skb_dev_set -- assign a new device to a buffer
+ * @skb: buffer for the new device
+ * @dev: network device
+ *
+ * If an skb is owned by a device already, we have to reset
+ * all data private to the namespace a device belongs to
+ * before assigning it a new device.
+ */
+#ifdef CONFIG_NET_NS
+void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
+{
+	skb_dst_drop(skb);
+	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
+		secpath_reset(skb);
+		nf_reset(skb);
+		skb_init_secmark(skb);
+		skb->mark = 0;
+		skb->priority = 0;
+		skb->nf_trace = 0;
+		skb->ipvs_property = 0;
+#ifdef CONFIG_NET_SCHED
+		skb->tc_index = 0;
+#endif
+	}
+	skb->dev = dev;
+}
+EXPORT_SYMBOL(skb_set_dev);
+#endif /* CONFIG_NET_NS */
+
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
-- 
cgit v1.2.3


From fc0663d6b5e6d8e9b57f872a644c0aafd82361b7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 30 Jan 2010 12:23:40 +0000
Subject: macvlan: allow multiple driver backends

This makes it possible to hook into the macvlan driver
from another kernel module. In particular, the goal is
to extend it with the macvtap backend that provides
a tun/tap compatible interface directly on the macvlan
device.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 113 ++++++++++++++++++++-------------------------
 include/linux/if_macvlan.h |  70 ++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d32e0bdfc5e9..40faa368b07a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,31 +39,6 @@ struct macvlan_port {
 	struct list_head	vlans;
 };
 
-/**
- *	struct macvlan_rx_stats - MACVLAN percpu rx stats
- *	@rx_packets: number of received packets
- *	@rx_bytes: number of received bytes
- *	@multicast: number of received multicast packets
- *	@rx_errors: number of errors
- */
-struct macvlan_rx_stats {
-	unsigned long rx_packets;
-	unsigned long rx_bytes;
-	unsigned long multicast;
-	unsigned long rx_errors;
-};
-
-struct macvlan_dev {
-	struct net_device	*dev;
-	struct list_head	list;
-	struct hlist_node	hlist;
-	struct macvlan_port	*port;
-	struct net_device	*lowerdev;
-	struct macvlan_rx_stats *rx_stats;
-	enum macvlan_mode	mode;
-};
-
-
 static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 					       const unsigned char *addr)
 {
@@ -118,31 +93,17 @@ static int macvlan_addr_busy(const struct macvlan_port *port,
 	return 0;
 }
 
-static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
-				    unsigned int len, bool success,
-				    bool multicast)
-{
-	struct macvlan_rx_stats *rx_stats;
-
-	rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id());
-	if (likely(success)) {
-		rx_stats->rx_packets++;;
-		rx_stats->rx_bytes += len;
-		if (multicast)
-			rx_stats->multicast++;
-	} else {
-		rx_stats->rx_errors++;
-	}
-}
 
-static int macvlan_broadcast_one(struct sk_buff *skb, struct net_device *dev,
+static int macvlan_broadcast_one(struct sk_buff *skb,
+				 const struct macvlan_dev *vlan,
 				 const struct ethhdr *eth, bool local)
 {
+	struct net_device *dev = vlan->dev;
 	if (!skb)
 		return NET_RX_DROP;
 
 	if (local)
-		return dev_forward_skb(dev, skb);
+		return vlan->forward(dev, skb);
 
 	skb->dev = dev;
 	if (!compare_ether_addr_64bits(eth->h_dest,
@@ -151,7 +112,7 @@ static int macvlan_broadcast_one(struct sk_buff *skb, struct net_device *dev,
 	else
 		skb->pkt_type = PACKET_MULTICAST;
 
-	return netif_rx(skb);
+	return vlan->receive(skb);
 }
 
 static void macvlan_broadcast(struct sk_buff *skb,
@@ -175,7 +136,7 @@ static void macvlan_broadcast(struct sk_buff *skb,
 				continue;
 
 			nskb = skb_clone(skb, GFP_ATOMIC);
-			err = macvlan_broadcast_one(nskb, vlan->dev, eth,
+			err = macvlan_broadcast_one(nskb, vlan, eth,
 					 mode == MACVLAN_MODE_BRIDGE);
 			macvlan_count_rx(vlan, skb->len + ETH_HLEN,
 					 err == NET_RX_SUCCESS, 1);
@@ -238,7 +199,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 	skb->dev = dev;
 	skb->pkt_type = PACKET_HOST;
 
-	netif_rx(skb);
+	vlan->receive(skb);
 	return NULL;
 }
 
@@ -260,7 +221,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 		dest = macvlan_hash_lookup(port, eth->h_dest);
 		if (dest && dest->mode == MACVLAN_MODE_BRIDGE) {
 			unsigned int length = skb->len + ETH_HLEN;
-			int ret = dev_forward_skb(dest->dev, skb);
+			int ret = dest->forward(dest->dev, skb);
 			macvlan_count_rx(dest, length,
 					 ret == NET_RX_SUCCESS, 0);
 
@@ -273,8 +234,8 @@ xmit_world:
 	return dev_queue_xmit(skb);
 }
 
-static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
-				      struct net_device *dev)
+netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
+			       struct net_device *dev)
 {
 	int i = skb_get_queue_mapping(skb);
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
@@ -290,6 +251,7 @@ static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(macvlan_start_xmit);
 
 static int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 			       unsigned short type, const void *daddr,
@@ -623,8 +585,11 @@ static int macvlan_get_tx_queues(struct net *net,
 	return 0;
 }
 
-static int macvlan_newlink(struct net *src_net, struct net_device *dev,
-			   struct nlattr *tb[], struct nlattr *data[])
+int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
+			   struct nlattr *tb[], struct nlattr *data[],
+			   int (*receive)(struct sk_buff *skb),
+			   int (*forward)(struct net_device *dev,
+					  struct sk_buff *skb))
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct macvlan_port *port;
@@ -664,6 +629,8 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 	vlan->lowerdev = lowerdev;
 	vlan->dev      = dev;
 	vlan->port     = port;
+	vlan->receive  = receive;
+	vlan->forward  = forward;
 
 	vlan->mode     = MACVLAN_MODE_VEPA;
 	if (data && data[IFLA_MACVLAN_MODE])
@@ -677,8 +644,17 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 	netif_stacked_transfer_operstate(lowerdev, dev);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(macvlan_common_newlink);
 
-static void macvlan_dellink(struct net_device *dev, struct list_head *head)
+static int macvlan_newlink(struct net *src_net, struct net_device *dev,
+			   struct nlattr *tb[], struct nlattr *data[])
+{
+	return macvlan_common_newlink(src_net, dev, tb, data,
+				      netif_rx,
+				      dev_forward_skb);
+}
+
+void macvlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct macvlan_port *port = vlan->port;
@@ -689,6 +665,7 @@ static void macvlan_dellink(struct net_device *dev, struct list_head *head)
 	if (list_empty(&port->vlans))
 		macvlan_port_destroy(port->dev);
 }
+EXPORT_SYMBOL_GPL(macvlan_dellink);
 
 static int macvlan_changelink(struct net_device *dev,
 		struct nlattr *tb[], struct nlattr *data[])
@@ -720,19 +697,27 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
 	[IFLA_MACVLAN_MODE] = { .type = NLA_U32 },
 };
 
-static struct rtnl_link_ops macvlan_link_ops __read_mostly = {
+int macvlan_link_register(struct rtnl_link_ops *ops)
+{
+	/* common fields */
+	ops->priv_size		= sizeof(struct macvlan_dev);
+	ops->get_tx_queues	= macvlan_get_tx_queues;
+	ops->setup		= macvlan_setup;
+	ops->validate		= macvlan_validate;
+	ops->maxtype		= IFLA_MACVLAN_MAX;
+	ops->policy		= macvlan_policy;
+	ops->changelink		= macvlan_changelink;
+	ops->get_size		= macvlan_get_size;
+	ops->fill_info		= macvlan_fill_info;
+
+	return rtnl_link_register(ops);
+};
+EXPORT_SYMBOL_GPL(macvlan_link_register);
+
+static struct rtnl_link_ops macvlan_link_ops = {
 	.kind		= "macvlan",
-	.priv_size	= sizeof(struct macvlan_dev),
-	.get_tx_queues  = macvlan_get_tx_queues,
-	.setup		= macvlan_setup,
-	.validate	= macvlan_validate,
 	.newlink	= macvlan_newlink,
 	.dellink	= macvlan_dellink,
-	.maxtype	= IFLA_MACVLAN_MAX,
-	.policy		= macvlan_policy,
-	.changelink	= macvlan_changelink,
-	.get_size	= macvlan_get_size,
-	.fill_info	= macvlan_fill_info,
 };
 
 static int macvlan_device_event(struct notifier_block *unused,
@@ -761,7 +746,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 		break;
 	case NETDEV_UNREGISTER:
 		list_for_each_entry_safe(vlan, next, &port->vlans, list)
-			macvlan_dellink(vlan->dev, NULL);
+			vlan->dev->rtnl_link_ops->dellink(vlan->dev, NULL);
 		break;
 	}
 	return NOTIFY_DONE;
@@ -778,7 +763,7 @@ static int __init macvlan_init_module(void)
 	register_netdevice_notifier(&macvlan_notifier_block);
 	macvlan_handle_frame_hook = macvlan_handle_frame;
 
-	err = rtnl_link_register(&macvlan_link_ops);
+	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 5f200bac3749..9a11544bb0b1 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -1,6 +1,76 @@
 #ifndef _LINUX_IF_MACVLAN_H
 #define _LINUX_IF_MACVLAN_H
 
+#include <linux/if_link.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+
+struct macvlan_port;
+struct macvtap_queue;
+
+/**
+ *	struct macvlan_rx_stats - MACVLAN percpu rx stats
+ *	@rx_packets: number of received packets
+ *	@rx_bytes: number of received bytes
+ *	@multicast: number of received multicast packets
+ *	@rx_errors: number of errors
+ */
+struct macvlan_rx_stats {
+	unsigned long rx_packets;
+	unsigned long rx_bytes;
+	unsigned long multicast;
+	unsigned long rx_errors;
+};
+
+struct macvlan_dev {
+	struct net_device	*dev;
+	struct list_head	list;
+	struct hlist_node	hlist;
+	struct macvlan_port	*port;
+	struct net_device	*lowerdev;
+	struct macvlan_rx_stats *rx_stats;
+	enum macvlan_mode	mode;
+	int (*receive)(struct sk_buff *skb);
+	int (*forward)(struct net_device *dev, struct sk_buff *skb);
+};
+
+static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
+				    unsigned int len, bool success,
+				    bool multicast)
+{
+	struct macvlan_rx_stats *rx_stats;
+
+	rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id());
+	if (likely(success)) {
+		rx_stats->rx_packets++;;
+		rx_stats->rx_bytes += len;
+		if (multicast)
+			rx_stats->multicast++;
+	} else {
+		rx_stats->rx_errors++;
+	}
+}
+
+extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
+				  struct nlattr *tb[], struct nlattr *data[],
+				  int (*receive)(struct sk_buff *skb),
+				  int (*forward)(struct net_device *dev,
+						 struct sk_buff *skb));
+
+extern void macvlan_count_rx(const struct macvlan_dev *vlan,
+			     unsigned int len, bool success,
+			     bool multicast);
+
+extern void macvlan_dellink(struct net_device *dev, struct list_head *head);
+
+extern int macvlan_link_register(struct rtnl_link_ops *ops);
+
+extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev);
+
+
 extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *);
 
 #endif /* _LINUX_IF_MACVLAN_H */
-- 
cgit v1.2.3


From 20d29d7a916a47bf533b5709437fe735b6b5b79e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 30 Jan 2010 12:24:26 +0000
Subject: net: macvtap driver

In order to use macvlan with qemu and other tools that require
a tap file descriptor, the macvtap driver adds a small backend
with a character device with the same interface as the tun
driver, with a minimum set of features.

Macvtap interfaces are created in the same way as macvlan
interfaces using ip link, but the netif is just used as a
handle for configuration and accounting, while the data
goes through the chardev. Each macvtap interface has its
own character device, simplifying permission management
significantly over the generic tun/tap driver.

Cc: Patrick McHardy <kaber@trash.net>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: David S. Miller" <davem@davemloft.net>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Or Gerlitz <ogerlitz@voltaire.com>
Cc: netdev@vger.kernel.org
Cc: bridge@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig        |  12 +
 drivers/net/Makefile       |   1 +
 drivers/net/macvtap.c      | 581 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/if_macvlan.h |   1 +
 4 files changed, 595 insertions(+)
 create mode 100644 drivers/net/macvtap.c

(limited to 'include/linux')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index cb0e534418e3..411e20703110 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -90,6 +90,18 @@ config MACVLAN
 	  To compile this driver as a module, choose M here: the module
 	  will be called macvlan.
 
+config MACVTAP
+	tristate "MAC-VLAN based tap driver (EXPERIMENTAL)"
+	depends on MACVLAN
+	help
+	  This adds a specialized tap character device driver that is based
+	  on the MAC-VLAN network interface, called macvtap. A macvtap device
+	  can be added in the same way as a macvlan device, using 'type
+	  macvlan', and then be accessed through the tap user space interface.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called macvtap.
+
 config EQUALIZER
 	tristate "EQL (serial line load balancing) support"
 	---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 0b763cbe9b1f..95958032cd31 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -169,6 +169,7 @@ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
 obj-$(CONFIG_DUMMY) += dummy.o
 obj-$(CONFIG_IFB) += ifb.o
 obj-$(CONFIG_MACVLAN) += macvlan.o
+obj-$(CONFIG_MACVTAP) += macvtap.o
 obj-$(CONFIG_DE600) += de600.o
 obj-$(CONFIG_DE620) += de620.o
 obj-$(CONFIG_LANCE) += lance.o
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
new file mode 100644
index 000000000000..ad1f6ef89308
--- /dev/null
+++ b/drivers/net/macvtap.c
@@ -0,0 +1,581 @@
+#include <linux/etherdevice.h>
+#include <linux/if_macvlan.h>
+#include <linux/interrupt.h>
+#include <linux/nsproxy.h>
+#include <linux/compat.h>
+#include <linux/if_tun.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+
+#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
+#include <net/sock.h>
+
+/*
+ * A macvtap queue is the central object of this driver, it connects
+ * an open character device to a macvlan interface. There can be
+ * multiple queues on one interface, which map back to queues
+ * implemented in hardware on the underlying device.
+ *
+ * macvtap_proto is used to allocate queues through the sock allocation
+ * mechanism.
+ *
+ * TODO: multiqueue support is currently not implemented, even though
+ * macvtap is basically prepared for that. We will need to add this
+ * here as well as in virtio-net and qemu to get line rate on 10gbit
+ * adapters from a guest.
+ */
+struct macvtap_queue {
+	struct sock sk;
+	struct socket sock;
+	struct macvlan_dev *vlan;
+	struct file *file;
+};
+
+static struct proto macvtap_proto = {
+	.name = "macvtap",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof (struct macvtap_queue),
+};
+
+/*
+ * Minor number matches netdev->ifindex, so need a potentially
+ * large value. This also makes it possible to split the
+ * tap functionality out again in the future by offering it
+ * from other drivers besides macvtap. As long as every device
+ * only has one tap, the interface numbers assure that the
+ * device nodes are unique.
+ */
+static unsigned int macvtap_major;
+#define MACVTAP_NUM_DEVS 65536
+static struct class *macvtap_class;
+static struct cdev macvtap_cdev;
+
+/*
+ * RCU usage:
+ * The macvtap_queue is referenced both from the chardev struct file
+ * and from the struct macvlan_dev using rcu_read_lock.
+ *
+ * We never actually update the contents of a macvtap_queue atomically
+ * with RCU but it is used for race-free destruction of a queue when
+ * either the file or the macvlan_dev goes away. Pointers back to
+ * the dev and the file are implicitly valid as long as the queue
+ * exists.
+ *
+ * The callbacks from macvlan are always done with rcu_read_lock held
+ * already, while in the file_operations, we get it ourselves.
+ *
+ * When destroying a queue, we remove the pointers from the file and
+ * from the dev and then synchronize_rcu to make sure no thread is
+ * still using the queue. There may still be references to the struct
+ * sock inside of the queue from outbound SKBs, but these never
+ * reference back to the file or the dev. The data structure is freed
+ * through __sk_free when both our references and any pending SKBs
+ * are gone.
+ *
+ * macvtap_lock is only used to prevent multiple concurrent open()
+ * calls to assign a new vlan->tap pointer. It could be moved into
+ * the macvlan_dev itself but is extremely rarely used.
+ */
+static DEFINE_SPINLOCK(macvtap_lock);
+
+/*
+ * Choose the next free queue, for now there is only one
+ */
+static int macvtap_set_queue(struct net_device *dev, struct file *file,
+				struct macvtap_queue *q)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	int err = -EBUSY;
+
+	spin_lock(&macvtap_lock);
+	if (rcu_dereference(vlan->tap))
+		goto out;
+
+	err = 0;
+	q->vlan = vlan;
+	rcu_assign_pointer(vlan->tap, q);
+
+	q->file = file;
+	rcu_assign_pointer(file->private_data, q);
+
+out:
+	spin_unlock(&macvtap_lock);
+	return err;
+}
+
+/*
+ * We must destroy each queue exactly once, when either
+ * the netdev or the file go away.
+ *
+ * Using the spinlock makes sure that we don't get
+ * to the queue again after destroying it.
+ *
+ * synchronize_rcu serializes with the packet flow
+ * that uses rcu_read_lock.
+ */
+static void macvtap_del_queue(struct macvtap_queue **qp)
+{
+	struct macvtap_queue *q;
+
+	spin_lock(&macvtap_lock);
+	q = rcu_dereference(*qp);
+	if (!q) {
+		spin_unlock(&macvtap_lock);
+		return;
+	}
+
+	rcu_assign_pointer(q->vlan->tap, NULL);
+	rcu_assign_pointer(q->file->private_data, NULL);
+	spin_unlock(&macvtap_lock);
+
+	synchronize_rcu();
+	sock_put(&q->sk);
+}
+
+/*
+ * Since we only support one queue, just dereference the pointer.
+ */
+static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
+					       struct sk_buff *skb)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	return rcu_dereference(vlan->tap);
+}
+
+static void macvtap_del_queues(struct net_device *dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	macvtap_del_queue(&vlan->tap);
+}
+
+static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file)
+{
+	rcu_read_lock_bh();
+	return rcu_dereference(file->private_data);
+}
+
+static inline void macvtap_file_put_queue(void)
+{
+	rcu_read_unlock_bh();
+}
+
+/*
+ * Forward happens for data that gets sent from one macvlan
+ * endpoint to another one in bridge mode. We just take
+ * the skb and put it into the receive queue.
+ */
+static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
+{
+	struct macvtap_queue *q = macvtap_get_queue(dev, skb);
+	if (!q)
+		return -ENOLINK;
+
+	skb_queue_tail(&q->sk.sk_receive_queue, skb);
+	wake_up(q->sk.sk_sleep);
+	return 0;
+}
+
+/*
+ * Receive is for data from the external interface (lowerdev),
+ * in case of macvtap, we can treat that the same way as
+ * forward, which macvlan cannot.
+ */
+static int macvtap_receive(struct sk_buff *skb)
+{
+	skb_push(skb, ETH_HLEN);
+	return macvtap_forward(skb->dev, skb);
+}
+
+static int macvtap_newlink(struct net *src_net,
+			   struct net_device *dev,
+			   struct nlattr *tb[],
+			   struct nlattr *data[])
+{
+	struct device *classdev;
+	dev_t devt;
+	int err;
+
+	err = macvlan_common_newlink(src_net, dev, tb, data,
+				     macvtap_receive, macvtap_forward);
+	if (err)
+		goto out;
+
+	devt = MKDEV(MAJOR(macvtap_major), dev->ifindex);
+
+	classdev = device_create(macvtap_class, &dev->dev, devt,
+				 dev, "tap%d", dev->ifindex);
+	if (IS_ERR(classdev)) {
+		err = PTR_ERR(classdev);
+		macvtap_del_queues(dev);
+	}
+
+out:
+	return err;
+}
+
+static void macvtap_dellink(struct net_device *dev,
+			    struct list_head *head)
+{
+	device_destroy(macvtap_class,
+		       MKDEV(MAJOR(macvtap_major), dev->ifindex));
+
+	macvtap_del_queues(dev);
+	macvlan_dellink(dev, head);
+}
+
+static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
+	.kind		= "macvtap",
+	.newlink	= macvtap_newlink,
+	.dellink	= macvtap_dellink,
+};
+
+
+static void macvtap_sock_write_space(struct sock *sk)
+{
+	if (!sock_writeable(sk) ||
+	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+		return;
+
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_sync(sk->sk_sleep);
+}
+
+static int macvtap_open(struct inode *inode, struct file *file)
+{
+	struct net *net = current->nsproxy->net_ns;
+	struct net_device *dev = dev_get_by_index(net, iminor(inode));
+	struct macvtap_queue *q;
+	int err;
+
+	err = -ENODEV;
+	if (!dev)
+		goto out;
+
+	/* check if this is a macvtap device */
+	err = -EINVAL;
+	if (dev->rtnl_link_ops != &macvtap_link_ops)
+		goto out;
+
+	err = -ENOMEM;
+	q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
+					     &macvtap_proto);
+	if (!q)
+		goto out;
+
+	init_waitqueue_head(&q->sock.wait);
+	q->sock.type = SOCK_RAW;
+	q->sock.state = SS_CONNECTED;
+	sock_init_data(&q->sock, &q->sk);
+	q->sk.sk_allocation = GFP_ATOMIC; /* for now */
+	q->sk.sk_write_space = macvtap_sock_write_space;
+
+	err = macvtap_set_queue(dev, file, q);
+	if (err)
+		sock_put(&q->sk);
+
+out:
+	if (dev)
+		dev_put(dev);
+
+	return err;
+}
+
+static int macvtap_release(struct inode *inode, struct file *file)
+{
+	macvtap_del_queue((struct macvtap_queue **)&file->private_data);
+	return 0;
+}
+
+static unsigned int macvtap_poll(struct file *file, poll_table * wait)
+{
+	struct macvtap_queue *q = macvtap_file_get_queue(file);
+	unsigned int mask = POLLERR;
+
+	if (!q)
+		goto out;
+
+	mask = 0;
+	poll_wait(file, &q->sock.wait, wait);
+
+	if (!skb_queue_empty(&q->sk.sk_receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+
+	if (sock_writeable(&q->sk) ||
+	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) &&
+	     sock_writeable(&q->sk)))
+		mask |= POLLOUT | POLLWRNORM;
+
+out:
+	macvtap_file_put_queue();
+	return mask;
+}
+
+/* Get packet from user space buffer */
+static ssize_t macvtap_get_user(struct macvtap_queue *q,
+				const struct iovec *iv, size_t count,
+				int noblock)
+{
+	struct sk_buff *skb;
+	size_t len = count;
+	int err;
+
+	if (unlikely(len < ETH_HLEN))
+		return -EINVAL;
+
+	skb = sock_alloc_send_skb(&q->sk, NET_IP_ALIGN + len, noblock, &err);
+
+	if (!skb) {
+		macvlan_count_rx(q->vlan, 0, false, false);
+		return err;
+	}
+
+	skb_reserve(skb, NET_IP_ALIGN);
+	skb_put(skb, count);
+
+	if (skb_copy_datagram_from_iovec(skb, 0, iv, 0, len)) {
+		macvlan_count_rx(q->vlan, 0, false, false);
+		kfree_skb(skb);
+		return -EFAULT;
+	}
+
+	skb_set_network_header(skb, ETH_HLEN);
+
+	macvlan_start_xmit(skb, q->vlan->dev);
+
+	return count;
+}
+
+static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
+				 unsigned long count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	ssize_t result = -ENOLINK;
+	struct macvtap_queue *q = macvtap_file_get_queue(file);
+
+	if (!q)
+		goto out;
+
+	result = macvtap_get_user(q, iv, iov_length(iv, count),
+			      file->f_flags & O_NONBLOCK);
+out:
+	macvtap_file_put_queue();
+	return result;
+}
+
+/* Put packet to the user space buffer */
+static ssize_t macvtap_put_user(struct macvtap_queue *q,
+				const struct sk_buff *skb,
+				const struct iovec *iv, int len)
+{
+	struct macvlan_dev *vlan = q->vlan;
+	int ret;
+
+	len = min_t(int, skb->len, len);
+
+	ret = skb_copy_datagram_const_iovec(skb, 0, iv, 0, len);
+
+	macvlan_count_rx(vlan, len, ret == 0, 0);
+
+	return ret ? ret : len;
+}
+
+static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
+				unsigned long count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct macvtap_queue *q = macvtap_file_get_queue(file);
+
+	DECLARE_WAITQUEUE(wait, current);
+	struct sk_buff *skb;
+	ssize_t len, ret = 0;
+
+	if (!q) {
+		ret = -ENOLINK;
+		goto out;
+	}
+
+	len = iov_length(iv, count);
+	if (len < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	add_wait_queue(q->sk.sk_sleep, &wait);
+	while (len) {
+		current->state = TASK_INTERRUPTIBLE;
+
+		/* Read frames from the queue */
+		skb = skb_dequeue(&q->sk.sk_receive_queue);
+		if (!skb) {
+			if (file->f_flags & O_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
+			if (signal_pending(current)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
+			/* Nothing to read, let's sleep */
+			schedule();
+			continue;
+		}
+		ret = macvtap_put_user(q, skb, iv, len);
+		kfree_skb(skb);
+		break;
+	}
+
+	current->state = TASK_RUNNING;
+	remove_wait_queue(q->sk.sk_sleep, &wait);
+
+out:
+	macvtap_file_put_queue();
+	return ret;
+}
+
+/*
+ * provide compatibility with generic tun/tap interface
+ */
+static long macvtap_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct macvtap_queue *q;
+	void __user *argp = (void __user *)arg;
+	struct ifreq __user *ifr = argp;
+	unsigned int __user *up = argp;
+	unsigned int u;
+	char devname[IFNAMSIZ];
+
+	switch (cmd) {
+	case TUNSETIFF:
+		/* ignore the name, just look at flags */
+		if (get_user(u, &ifr->ifr_flags))
+			return -EFAULT;
+		if (u != (IFF_TAP | IFF_NO_PI))
+			return -EINVAL;
+		return 0;
+
+	case TUNGETIFF:
+		q = macvtap_file_get_queue(file);
+		if (!q)
+			return -ENOLINK;
+		memcpy(devname, q->vlan->dev->name, sizeof(devname));
+		macvtap_file_put_queue();
+
+		if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) ||
+		    put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags))
+			return -EFAULT;
+		return 0;
+
+	case TUNGETFEATURES:
+		if (put_user((IFF_TAP | IFF_NO_PI), up))
+			return -EFAULT;
+		return 0;
+
+	case TUNSETSNDBUF:
+		if (get_user(u, up))
+			return -EFAULT;
+
+		q = macvtap_file_get_queue(file);
+		q->sk.sk_sndbuf = u;
+		macvtap_file_put_queue();
+		return 0;
+
+	case TUNSETOFFLOAD:
+		/* let the user check for future flags */
+		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+			  TUN_F_TSO_ECN | TUN_F_UFO))
+			return -EINVAL;
+
+		/* TODO: add support for these, so far we don't
+			 support any offload */
+		if (arg & (TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+			 TUN_F_TSO_ECN | TUN_F_UFO))
+			return -EINVAL;
+
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+static long macvtap_compat_ioctl(struct file *file, unsigned int cmd,
+				 unsigned long arg)
+{
+	return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations macvtap_fops = {
+	.owner		= THIS_MODULE,
+	.open		= macvtap_open,
+	.release	= macvtap_release,
+	.aio_read	= macvtap_aio_read,
+	.aio_write	= macvtap_aio_write,
+	.poll		= macvtap_poll,
+	.llseek		= no_llseek,
+	.unlocked_ioctl	= macvtap_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= macvtap_compat_ioctl,
+#endif
+};
+
+static int macvtap_init(void)
+{
+	int err;
+
+	err = alloc_chrdev_region(&macvtap_major, 0,
+				MACVTAP_NUM_DEVS, "macvtap");
+	if (err)
+		goto out1;
+
+	cdev_init(&macvtap_cdev, &macvtap_fops);
+	err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS);
+	if (err)
+		goto out2;
+
+	macvtap_class = class_create(THIS_MODULE, "macvtap");
+	if (IS_ERR(macvtap_class)) {
+		err = PTR_ERR(macvtap_class);
+		goto out3;
+	}
+
+	err = macvlan_link_register(&macvtap_link_ops);
+	if (err)
+		goto out4;
+
+	return 0;
+
+out4:
+	class_unregister(macvtap_class);
+out3:
+	cdev_del(&macvtap_cdev);
+out2:
+	unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS);
+out1:
+	return err;
+}
+module_init(macvtap_init);
+
+static void macvtap_exit(void)
+{
+	rtnl_link_unregister(&macvtap_link_ops);
+	class_unregister(macvtap_class);
+	cdev_del(&macvtap_cdev);
+	unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS);
+}
+module_exit(macvtap_exit);
+
+MODULE_ALIAS_RTNL_LINK("macvtap");
+MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9a11544bb0b1..51f1512045e9 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -34,6 +34,7 @@ struct macvlan_dev {
 	enum macvlan_mode	mode;
 	int (*receive)(struct sk_buff *skb);
 	int (*forward)(struct net_device *dev, struct sk_buff *skb);
+	struct macvtap_queue	*tap;
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
-- 
cgit v1.2.3


From 1621e0940294c20e302faf401f41204de7252e22 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 1 Feb 2010 09:44:19 +0000
Subject: net: CONFIG_COMPAT redux

Ifdef out
	struct proto_ops::compat_ioctl
	struct proto_ops::compat_setsockopt
	struct proto_ops::compat_getsockopt
to make structures smaller on COMPAT=n kernels.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 5e8083cacc8b..4157b5d42bd6 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -174,18 +174,22 @@ struct proto_ops {
 				      struct poll_table_struct *wait);
 	int		(*ioctl)     (struct socket *sock, unsigned int cmd,
 				      unsigned long arg);
+#ifdef CONFIG_COMPAT
 	int	 	(*compat_ioctl) (struct socket *sock, unsigned int cmd,
 				      unsigned long arg);
+#endif
 	int		(*listen)    (struct socket *sock, int len);
 	int		(*shutdown)  (struct socket *sock, int flags);
 	int		(*setsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, unsigned int optlen);
 	int		(*getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
+#ifdef CONFIG_COMPAT
 	int		(*compat_setsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, unsigned int optlen);
 	int		(*compat_getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
+#endif
 	int		(*sendmsg)   (struct kiocb *iocb, struct socket *sock,
 				      struct msghdr *m, size_t total_len);
 	int		(*recvmsg)   (struct kiocb *iocb, struct socket *sock,
-- 
cgit v1.2.3


From f7acede65d6b65919aee5b6a360a17cedb11f2f7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 Jan 2010 13:30:11 +0100
Subject: libata: fix ata_id_logical_per_physical_sectors

The value we get from the low byte of the ATA_ID_SECTOR_SIZE word is not not
a plain multiple, but the log of it, so fix the helper to give the correct
answer.  Without this we'll get an incorrect minimal I/O size in the block
limits VPD page for 4k sector drives.

Also change the return value of ata_id_logical_per_physical_sectors to u16
for the unlikely case of very large logical sectors.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/ata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 38a6948ce0c2..20f31567ccee 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -647,9 +647,9 @@ static inline int ata_id_has_large_logical_sectors(const u16 *id)
 	return id[ATA_ID_SECTOR_SIZE] & (1 << 13);
 }
 
-static inline u8 ata_id_logical_per_physical_sectors(const u16 *id)
+static inline u16 ata_id_logical_per_physical_sectors(const u16 *id)
 {
-	return id[ATA_ID_SECTOR_SIZE] & 0xf;
+	return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf);
 }
 
 static inline int ata_id_has_lba48(const u16 *id)
-- 
cgit v1.2.3


From 0b7024ac4df5821347141c18e680b7166bc1cb20 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 2 Feb 2010 21:08:26 -0800
Subject: Input: add match() method to input hanlders

Get rid of blacklist in input handler structure and instead allow
handlers to define their own match() method to perform fine-grained
filtering of supported devices.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/char/keyboard.c | 24 ++++++++++++++++--------
 drivers/input/input.c   | 13 ++++++-------
 drivers/input/joydev.c  | 32 +++++++++++++++-----------------
 include/linux/input.h   |  6 +++---
 4 files changed, 40 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index cbf64b985ef4..ada25bb8941e 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -1323,6 +1323,21 @@ static void kbd_event(struct input_handle *handle, unsigned int event_type,
 	schedule_console_callback();
 }
 
+static bool kbd_match(struct input_handler *handler, struct input_dev *dev)
+{
+	int i;
+
+	if (test_bit(EV_SND, dev->evbit))
+		return true;
+
+	if (test_bit(EV_KEY, dev->evbit))
+		for (i = KEY_RESERVED; i < BTN_MISC; i++)
+			if (test_bit(i, dev->keybit))
+				return true;
+
+	return false;
+}
+
 /*
  * When a keyboard (or other input device) is found, the kbd_connect
  * function is called. The function then looks at the device, and if it
@@ -1334,14 +1349,6 @@ static int kbd_connect(struct input_handler *handler, struct input_dev *dev,
 {
 	struct input_handle *handle;
 	int error;
-	int i;
-
-	for (i = KEY_RESERVED; i < BTN_MISC; i++)
-		if (test_bit(i, dev->keybit))
-			break;
-
-	if (i == BTN_MISC && !test_bit(EV_SND, dev->evbit))
-		return -ENODEV;
 
 	handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
 	if (!handle)
@@ -1407,6 +1414,7 @@ MODULE_DEVICE_TABLE(input, kbd_ids);
 
 static struct input_handler kbd_handler = {
 	.event		= kbd_event,
+	.match		= kbd_match,
 	.connect	= kbd_connect,
 	.disconnect	= kbd_disconnect,
 	.start		= kbd_start,
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 7080a9d4b840..dae49eba6ccd 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -723,12 +723,13 @@ EXPORT_SYMBOL(input_set_keycode);
 		if (i != BITS_TO_LONGS(max)) \
 			continue;
 
-static const struct input_device_id *input_match_device(const struct input_device_id *id,
+static const struct input_device_id *input_match_device(struct input_handler *handler,
 							struct input_dev *dev)
 {
+	const struct input_device_id *id;
 	int i;
 
-	for (; id->flags || id->driver_info; id++) {
+	for (id = handler->id_table; id->flags || id->driver_info; id++) {
 
 		if (id->flags & INPUT_DEVICE_ID_MATCH_BUS)
 			if (id->bustype != dev->id.bustype)
@@ -756,7 +757,8 @@ static const struct input_device_id *input_match_device(const struct input_devic
 		MATCH_BIT(ffbit,  FF_MAX);
 		MATCH_BIT(swbit,  SW_MAX);
 
-		return id;
+		if (!handler->match || handler->match(handler, dev))
+			return id;
 	}
 
 	return NULL;
@@ -767,10 +769,7 @@ static int input_attach_handler(struct input_dev *dev, struct input_handler *han
 	const struct input_device_id *id;
 	int error;
 
-	if (handler->blacklist && input_match_device(handler->blacklist, dev))
-		return -ENODEV;
-
-	id = input_match_device(handler->id_table, dev);
+	id = input_match_device(handler, dev);
 	if (!id)
 		return -ENODEV;
 
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index b1bd6dd32286..63e71f2a7acc 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -775,6 +775,20 @@ static void joydev_cleanup(struct joydev *joydev)
 		input_close_device(handle);
 }
 
+
+static bool joydev_match(struct input_handler *handler, struct input_dev *dev)
+{
+	/* Avoid touchpads and touchscreens */
+	if (test_bit(EV_KEY, dev->evbit) && test_bit(BTN_TOUCH, dev->keybit))
+		return false;
+
+	/* Avoid tablets, digitisers and similar devices */
+	if (test_bit(EV_KEY, dev->evbit) && test_bit(BTN_DIGI, dev->keybit))
+		return false;
+
+	return true;
+}
+
 static int joydev_connect(struct input_handler *handler, struct input_dev *dev,
 			  const struct input_device_id *id)
 {
@@ -894,22 +908,6 @@ static void joydev_disconnect(struct input_handle *handle)
 	put_device(&joydev->dev);
 }
 
-static const struct input_device_id joydev_blacklist[] = {
-	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
-				INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT_MASK(EV_KEY) },
-		.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
-	},	/* Avoid itouchpads and touchscreens */
-	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
-				INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT_MASK(EV_KEY) },
-		.keybit = { [BIT_WORD(BTN_DIGI)] = BIT_MASK(BTN_DIGI) },
-	},	/* Avoid tablets, digitisers and similar devices */
-	{ }	/* Terminating entry */
-};
-
 static const struct input_device_id joydev_ids[] = {
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
@@ -936,13 +934,13 @@ MODULE_DEVICE_TABLE(input, joydev_ids);
 
 static struct input_handler joydev_handler = {
 	.event		= joydev_event,
+	.match		= joydev_match,
 	.connect	= joydev_connect,
 	.disconnect	= joydev_disconnect,
 	.fops		= &joydev_fops,
 	.minor		= JOYDEV_MINOR_BASE,
 	.name		= "joydev",
 	.id_table	= joydev_ids,
-	.blacklist	= joydev_blacklist,
 };
 
 static int __init joydev_init(void)
diff --git a/include/linux/input.h b/include/linux/input.h
index 6c9d3d49fa91..8dc5d724c703 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1200,6 +1200,8 @@ struct input_handle;
  *	it may not sleep
  * @filter: similar to @event; separates normal event handlers from
  *	"filters".
+ * @match: called after comparing device's id with handler's id_table
+ *	to perform fine-grained matching between device and handler
  * @connect: called when attaching a handler to an input device
  * @disconnect: disconnects a handler from input device
  * @start: starts handler for given handle. This function is called by
@@ -1211,8 +1213,6 @@ struct input_handle;
  * @name: name of the handler, to be shown in /proc/bus/input/handlers
  * @id_table: pointer to a table of input_device_ids this driver can
  *	handle
- * @blacklist: pointer to a table of input_device_ids this driver should
- *	ignore even if they match @id_table
  * @h_list: list of input handles associated with the handler
  * @node: for placing the driver onto input_handler_list
  *
@@ -1235,6 +1235,7 @@ struct input_handler {
 
 	void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
 	bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value);
+	bool (*match)(struct input_handler *handler, struct input_dev *dev);
 	int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id);
 	void (*disconnect)(struct input_handle *handle);
 	void (*start)(struct input_handle *handle);
@@ -1244,7 +1245,6 @@ struct input_handler {
 	const char *name;
 
 	const struct input_device_id *id_table;
-	const struct input_device_id *blacklist;
 
 	struct list_head	h_list;
 	struct list_head	node;
-- 
cgit v1.2.3


From 2cfa19780d61740f65790c5bae363b759d7c96fa Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 2 Feb 2010 16:49:11 -0500
Subject: ftrace/alternatives: Introducing *_text_reserved functions

Introducing *_text_reserved functions for checking the text
address range is partially reserved or not. This patch provides
checking routines for x86 smp alternatives and dynamic ftrace.
Since both functions modify fixed pieces of kernel text, they
should reserve and protect those from other dynamic text
modifier, like kprobes.

This will also be extended when introducing other subsystems
which modify fixed pieces of kernel text. Dynamic text modifiers
should avoid those.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: przemyslaw@pawelczyk.it
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <20100202214911.4694.16587.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/alternative.h |  5 +++++
 arch/x86/kernel/alternative.c      | 16 ++++++++++++++++
 include/linux/ftrace.h             |  6 ++++++
 kernel/trace/ftrace.c              | 15 +++++++++++++++
 4 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 69b74a7b877f..ac80b7d70014 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
 					void *text, void *text_end);
 extern void alternatives_smp_module_del(struct module *mod);
 extern void alternatives_smp_switch(int smp);
+extern int alternatives_text_reserved(void *start, void *end);
 #else
 static inline void alternatives_smp_module_add(struct module *mod, char *name,
 					       void *locks, void *locks_end,
 					       void *text, void *text_end) {}
 static inline void alternatives_smp_module_del(struct module *mod) {}
 static inline void alternatives_smp_switch(int smp) {}
+static inline int alternatives_text_reserved(void *start, void *end)
+{
+	return 0;
+}
 #endif	/* CONFIG_SMP */
 
 /* alternative assembly primitive: */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index de7353c0ce9c..3c13284ff86d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -390,6 +390,22 @@ void alternatives_smp_switch(int smp)
 	mutex_unlock(&smp_alt);
 }
 
+/* Return 1 if the address range is reserved for smp-alternatives */
+int alternatives_text_reserved(void *start, void *end)
+{
+	struct smp_alt_module *mod;
+	u8 **ptr;
+
+	list_for_each_entry(mod, &smp_alt_modules, next) {
+		if (mod->text > end || mod->text_end < start)
+			continue;
+		for (ptr = mod->locks; ptr < mod->locks_end; ptr++)
+			if (start <= *ptr && end >= *ptr)
+				return 1;
+	}
+
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_PARAVIRT
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0b4f97d24d7f..9d127efed43c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -134,6 +134,8 @@ extern void
 unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops);
 extern void unregister_ftrace_function_probe_all(char *glob);
 
+extern int ftrace_text_reserved(void *start, void *end);
+
 enum {
 	FTRACE_FL_FREE		= (1 << 0),
 	FTRACE_FL_FAILED	= (1 << 1),
@@ -250,6 +252,10 @@ static inline int unregister_ftrace_command(char *cmd_name)
 {
 	return -EINVAL;
 }
+static inline int ftrace_text_reserved(void *start, void *end)
+{
+	return 0;
+}
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1e6640f80454..3d90661a5f40 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1025,6 +1025,21 @@ static void ftrace_bug(int failed, unsigned long ip)
 }
 
 
+/* Return 1 if the address range is reserved for ftrace */
+int ftrace_text_reserved(void *start, void *end)
+{
+	struct dyn_ftrace *rec;
+	struct ftrace_page *pg;
+
+	do_for_each_ftrace_rec(pg, rec) {
+		if (rec->ip <= (unsigned long)end &&
+		    rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
+			return 1;
+	} while_for_each_ftrace_rec();
+	return 0;
+}
+
+
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
-- 
cgit v1.2.3


From f24bb999d2b9f2950e5cac5b69bffedf73c24ea4 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 2 Feb 2010 16:49:25 -0500
Subject: ftrace: Remove record freezing

Remove record freezing. Because kprobes never puts probe on
ftrace's mcount call anymore, it doesn't need ftrace to check
whether kprobes on it.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: przemyslaw@pawelczyk.it
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20100202214925.4694.73469.stgit@dhcp-100-2-132.bos.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace.h |  1 -
 kernel/trace/ftrace.c  | 39 ---------------------------------------
 2 files changed, 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9d127efed43c..eb054ae95605 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -143,7 +143,6 @@ enum {
 	FTRACE_FL_ENABLED	= (1 << 3),
 	FTRACE_FL_NOTRACE	= (1 << 4),
 	FTRACE_FL_CONVERTED	= (1 << 5),
-	FTRACE_FL_FROZEN	= (1 << 6),
 };
 
 struct dyn_ftrace {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3d90661a5f40..1904797f4a8a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,7 +22,6 @@
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
 #include <linux/uaccess.h>
-#include <linux/kprobes.h>
 #include <linux/ftrace.h>
 #include <linux/sysctl.h>
 #include <linux/ctype.h>
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records;
 		}				\
 	}
 
-#ifdef CONFIG_KPROBES
-
-static int frozen_record_count;
-
-static inline void freeze_record(struct dyn_ftrace *rec)
-{
-	if (!(rec->flags & FTRACE_FL_FROZEN)) {
-		rec->flags |= FTRACE_FL_FROZEN;
-		frozen_record_count++;
-	}
-}
-
-static inline void unfreeze_record(struct dyn_ftrace *rec)
-{
-	if (rec->flags & FTRACE_FL_FROZEN) {
-		rec->flags &= ~FTRACE_FL_FROZEN;
-		frozen_record_count--;
-	}
-}
-
-static inline int record_frozen(struct dyn_ftrace *rec)
-{
-	return rec->flags & FTRACE_FL_FROZEN;
-}
-#else
-# define freeze_record(rec)			({ 0; })
-# define unfreeze_record(rec)			({ 0; })
-# define record_frozen(rec)			({ 0; })
-#endif /* CONFIG_KPROBES */
-
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
 	rec->freelist = ftrace_free_records;
@@ -1091,14 +1060,6 @@ static void ftrace_replace_code(int enable)
 		    !(rec->flags & FTRACE_FL_CONVERTED))
 			continue;
 
-		/* ignore updates to this record's mcount site */
-		if (get_kprobe((void *)rec->ip)) {
-			freeze_record(rec);
-			continue;
-		} else {
-			unfreeze_record(rec);
-		}
-
 		failed = __ftrace_replace_code(rec, enable);
 		if (failed) {
 			rec->flags |= FTRACE_FL_FAILED;
-- 
cgit v1.2.3


From 9e3af04f8787315f63f55b191bb9a06741dbf183 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <ext-mika.1.westerberg@nokia.com>
Date: Thu, 4 Feb 2010 00:48:00 -0800
Subject: Input: gpio-keys - add support for disabling gpios through sysfs

Now gpio-keys input driver exports 4 new attributes to userland through
sysfs:
	/sys/devices/platform/gpio-keys/keys [ro]
	/sys/devices/platform/gpio-keys/switches [ro]
	/sys/devices/platform/gpio-keys/disabled_keys [rw]
	/sys/devices/platform/gpio-keys/disables_switches [rw]

With these attributes, userland program can read which keys and
switches can be disabled and then disable/enable them as needed.
Keys and switches are exported as stringified bitmap of codes
(keycodes or switch codes). For example keys 15, 89, 100, 101,
102 are exported as: '15,89,100-102'.

Description of the attributes:
	keys - bitmap of keys which can be disabled
	switches - bitmap of switches which can be disabled
	disabled_keys - bitmap of currently disabled keys
			(bit 1 means disabled, 0 enabled)
	disabled_switches - bitmap of currently disabled switches
			(bit 1 means disabled, 0 enabled)

Signed-off-by: Mika Westerberg <ext-mika.1.westerberg@nokia.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/gpio_keys.c | 318 +++++++++++++++++++++++++++++++++++--
 include/linux/gpio_keys.h          |   1 +
 2 files changed, 308 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 1aff3b76effd..2b708aa85553 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -30,13 +30,289 @@ struct gpio_button_data {
 	struct input_dev *input;
 	struct timer_list timer;
 	struct work_struct work;
+	bool disabled;
 };
 
 struct gpio_keys_drvdata {
 	struct input_dev *input;
+	struct mutex disable_lock;
+	unsigned int n_buttons;
 	struct gpio_button_data data[0];
 };
 
+/*
+ * SYSFS interface for enabling/disabling keys and switches:
+ *
+ * There are 4 attributes under /sys/devices/platform/gpio-keys/
+ *	keys [ro]              - bitmap of keys (EV_KEY) which can be
+ *	                         disabled
+ *	switches [ro]          - bitmap of switches (EV_SW) which can be
+ *	                         disabled
+ *	disabled_keys [rw]     - bitmap of keys currently disabled
+ *	disabled_switches [rw] - bitmap of switches currently disabled
+ *
+ * Userland can change these values and hence disable event generation
+ * for each key (or switch). Disabling a key means its interrupt line
+ * is disabled.
+ *
+ * For example, if we have following switches set up as gpio-keys:
+ *	SW_DOCK = 5
+ *	SW_CAMERA_LENS_COVER = 9
+ *	SW_KEYPAD_SLIDE = 10
+ *	SW_FRONT_PROXIMITY = 11
+ * This is read from switches:
+ *	11-9,5
+ * Next we want to disable proximity (11) and dock (5), we write:
+ *	11,5
+ * to file disabled_switches. Now proximity and dock IRQs are disabled.
+ * This can be verified by reading the file disabled_switches:
+ *	11,5
+ * If we now want to enable proximity (11) switch we write:
+ *	5
+ * to disabled_switches.
+ *
+ * We can disable only those keys which don't allow sharing the irq.
+ */
+
+/**
+ * get_n_events_by_type() - returns maximum number of events per @type
+ * @type: type of button (%EV_KEY, %EV_SW)
+ *
+ * Return value of this function can be used to allocate bitmap
+ * large enough to hold all bits for given type.
+ */
+static inline int get_n_events_by_type(int type)
+{
+	BUG_ON(type != EV_SW && type != EV_KEY);
+
+	return (type == EV_KEY) ? KEY_CNT : SW_CNT;
+}
+
+/**
+ * gpio_keys_disable_button() - disables given GPIO button
+ * @bdata: button data for button to be disabled
+ *
+ * Disables button pointed by @bdata. This is done by masking
+ * IRQ line. After this function is called, button won't generate
+ * input events anymore. Note that one can only disable buttons
+ * that don't share IRQs.
+ *
+ * Make sure that @bdata->disable_lock is locked when entering
+ * this function to avoid races when concurrent threads are
+ * disabling buttons at the same time.
+ */
+static void gpio_keys_disable_button(struct gpio_button_data *bdata)
+{
+	if (!bdata->disabled) {
+		/*
+		 * Disable IRQ and possible debouncing timer.
+		 */
+		disable_irq(gpio_to_irq(bdata->button->gpio));
+		if (bdata->button->debounce_interval)
+			del_timer_sync(&bdata->timer);
+
+		bdata->disabled = true;
+	}
+}
+
+/**
+ * gpio_keys_enable_button() - enables given GPIO button
+ * @bdata: button data for button to be disabled
+ *
+ * Enables given button pointed by @bdata.
+ *
+ * Make sure that @bdata->disable_lock is locked when entering
+ * this function to avoid races with concurrent threads trying
+ * to enable the same button at the same time.
+ */
+static void gpio_keys_enable_button(struct gpio_button_data *bdata)
+{
+	if (bdata->disabled) {
+		enable_irq(gpio_to_irq(bdata->button->gpio));
+		bdata->disabled = false;
+	}
+}
+
+/**
+ * gpio_keys_attr_show_helper() - fill in stringified bitmap of buttons
+ * @ddata: pointer to drvdata
+ * @buf: buffer where stringified bitmap is written
+ * @type: button type (%EV_KEY, %EV_SW)
+ * @only_disabled: does caller want only those buttons that are
+ *                 currently disabled or all buttons that can be
+ *                 disabled
+ *
+ * This function writes buttons that can be disabled to @buf. If
+ * @only_disabled is true, then @buf contains only those buttons
+ * that are currently disabled. Returns 0 on success or negative
+ * errno on failure.
+ */
+static ssize_t gpio_keys_attr_show_helper(struct gpio_keys_drvdata *ddata,
+					  char *buf, unsigned int type,
+					  bool only_disabled)
+{
+	int n_events = get_n_events_by_type(type);
+	unsigned long *bits;
+	ssize_t ret;
+	int i;
+
+	bits = kcalloc(BITS_TO_LONGS(n_events), sizeof(*bits), GFP_KERNEL);
+	if (!bits)
+		return -ENOMEM;
+
+	for (i = 0; i < ddata->n_buttons; i++) {
+		struct gpio_button_data *bdata = &ddata->data[i];
+
+		if (bdata->button->type != type)
+			continue;
+
+		if (only_disabled && !bdata->disabled)
+			continue;
+
+		__set_bit(bdata->button->code, bits);
+	}
+
+	ret = bitmap_scnlistprintf(buf, PAGE_SIZE - 2, bits, n_events);
+	buf[ret++] = '\n';
+	buf[ret] = '\0';
+
+	kfree(bits);
+
+	return ret;
+}
+
+/**
+ * gpio_keys_attr_store_helper() - enable/disable buttons based on given bitmap
+ * @ddata: pointer to drvdata
+ * @buf: buffer from userspace that contains stringified bitmap
+ * @type: button type (%EV_KEY, %EV_SW)
+ *
+ * This function parses stringified bitmap from @buf and disables/enables
+ * GPIO buttons accordinly. Returns 0 on success and negative error
+ * on failure.
+ */
+static ssize_t gpio_keys_attr_store_helper(struct gpio_keys_drvdata *ddata,
+					   const char *buf, unsigned int type)
+{
+	int n_events = get_n_events_by_type(type);
+	unsigned long *bits;
+	ssize_t error;
+	int i;
+
+	bits = kcalloc(BITS_TO_LONGS(n_events), sizeof(*bits), GFP_KERNEL);
+	if (!bits)
+		return -ENOMEM;
+
+	error = bitmap_parselist(buf, bits, n_events);
+	if (error)
+		goto out;
+
+	/* First validate */
+	for (i = 0; i < ddata->n_buttons; i++) {
+		struct gpio_button_data *bdata = &ddata->data[i];
+
+		if (bdata->button->type != type)
+			continue;
+
+		if (test_bit(bdata->button->code, bits) &&
+		    !bdata->button->can_disable) {
+			error = -EINVAL;
+			goto out;
+		}
+	}
+
+	mutex_lock(&ddata->disable_lock);
+
+	for (i = 0; i < ddata->n_buttons; i++) {
+		struct gpio_button_data *bdata = &ddata->data[i];
+
+		if (bdata->button->type != type)
+			continue;
+
+		if (test_bit(bdata->button->code, bits))
+			gpio_keys_disable_button(bdata);
+		else
+			gpio_keys_enable_button(bdata);
+	}
+
+	mutex_unlock(&ddata->disable_lock);
+
+out:
+	kfree(bits);
+	return error;
+}
+
+#define ATTR_SHOW_FN(name, type, only_disabled)				\
+static ssize_t gpio_keys_show_##name(struct device *dev,		\
+				     struct device_attribute *attr,	\
+				     char *buf)				\
+{									\
+	struct platform_device *pdev = to_platform_device(dev);		\
+	struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);	\
+									\
+	return gpio_keys_attr_show_helper(ddata, buf,			\
+					  type, only_disabled);		\
+}
+
+ATTR_SHOW_FN(keys, EV_KEY, false);
+ATTR_SHOW_FN(switches, EV_SW, false);
+ATTR_SHOW_FN(disabled_keys, EV_KEY, true);
+ATTR_SHOW_FN(disabled_switches, EV_SW, true);
+
+/*
+ * ATTRIBUTES:
+ *
+ * /sys/devices/platform/gpio-keys/keys [ro]
+ * /sys/devices/platform/gpio-keys/switches [ro]
+ */
+static DEVICE_ATTR(keys, S_IRUGO, gpio_keys_show_keys, NULL);
+static DEVICE_ATTR(switches, S_IRUGO, gpio_keys_show_switches, NULL);
+
+#define ATTR_STORE_FN(name, type)					\
+static ssize_t gpio_keys_store_##name(struct device *dev,		\
+				      struct device_attribute *attr,	\
+				      const char *buf,			\
+				      size_t count)			\
+{									\
+	struct platform_device *pdev = to_platform_device(dev);		\
+	struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev);	\
+	ssize_t error;							\
+									\
+	error = gpio_keys_attr_store_helper(ddata, buf, type);		\
+	if (error)							\
+		return error;						\
+									\
+	return count;							\
+}
+
+ATTR_STORE_FN(disabled_keys, EV_KEY);
+ATTR_STORE_FN(disabled_switches, EV_SW);
+
+/*
+ * ATTRIBUTES:
+ *
+ * /sys/devices/platform/gpio-keys/disabled_keys [rw]
+ * /sys/devices/platform/gpio-keys/disables_switches [rw]
+ */
+static DEVICE_ATTR(disabled_keys, S_IWUSR | S_IRUGO,
+		   gpio_keys_show_disabled_keys,
+		   gpio_keys_store_disabled_keys);
+static DEVICE_ATTR(disabled_switches, S_IWUSR | S_IRUGO,
+		   gpio_keys_show_disabled_switches,
+		   gpio_keys_store_disabled_switches);
+
+static struct attribute *gpio_keys_attrs[] = {
+	&dev_attr_keys.attr,
+	&dev_attr_switches.attr,
+	&dev_attr_disabled_keys.attr,
+	&dev_attr_disabled_switches.attr,
+	NULL,
+};
+
+static struct attribute_group gpio_keys_attr_group = {
+	.attrs = gpio_keys_attrs,
+};
+
 static void gpio_keys_report_event(struct gpio_button_data *bdata)
 {
 	struct gpio_keys_button *button = bdata->button;
@@ -79,11 +355,13 @@ static irqreturn_t gpio_keys_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int __devinit gpio_keys_setup_key(struct device *dev,
+static int __devinit gpio_keys_setup_key(struct platform_device *pdev,
 					 struct gpio_button_data *bdata,
 					 struct gpio_keys_button *button)
 {
 	char *desc = button->desc ? button->desc : "gpio_keys";
+	struct device *dev = &pdev->dev;
+	unsigned long irqflags;
 	int irq, error;
 
 	setup_timer(&bdata->timer, gpio_keys_timer, (unsigned long)bdata);
@@ -112,10 +390,15 @@ static int __devinit gpio_keys_setup_key(struct device *dev,
 		goto fail3;
 	}
 
-	error = request_irq(irq, gpio_keys_isr,
-			    IRQF_SHARED |
-			    IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-			    desc, bdata);
+	irqflags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING;
+	/*
+	 * If platform has specified that the button can be disabled,
+	 * we don't want it to share the interrupt line.
+	 */
+	if (!button->can_disable)
+		irqflags |= IRQF_SHARED;
+
+	error = request_irq(irq, gpio_keys_isr, irqflags, desc, bdata);
 	if (error) {
 		dev_err(dev, "Unable to claim irq %d; error %d\n",
 			irq, error);
@@ -149,6 +432,10 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
 		goto fail1;
 	}
 
+	ddata->input = input;
+	ddata->n_buttons = pdata->nbuttons;
+	mutex_init(&ddata->disable_lock);
+
 	platform_set_drvdata(pdev, ddata);
 
 	input->name = pdev->name;
@@ -164,8 +451,6 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
 	if (pdata->rep)
 		__set_bit(EV_REP, input->evbit);
 
-	ddata->input = input;
-
 	for (i = 0; i < pdata->nbuttons; i++) {
 		struct gpio_keys_button *button = &pdata->buttons[i];
 		struct gpio_button_data *bdata = &ddata->data[i];
@@ -174,7 +459,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
 		bdata->input = input;
 		bdata->button = button;
 
-		error = gpio_keys_setup_key(dev, bdata, button);
+		error = gpio_keys_setup_key(pdev, bdata, button);
 		if (error)
 			goto fail2;
 
@@ -184,13 +469,20 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
 		input_set_capability(input, type, button->code);
 	}
 
-	error = input_register_device(input);
+	error = sysfs_create_group(&pdev->dev.kobj, &gpio_keys_attr_group);
 	if (error) {
-		dev_err(dev, "Unable to register input device, "
-			"error: %d\n", error);
+		dev_err(dev, "Unable to export keys/switches, error: %d\n",
+			error);
 		goto fail2;
 	}
 
+	error = input_register_device(input);
+	if (error) {
+		dev_err(dev, "Unable to register input device, error: %d\n",
+			error);
+		goto fail3;
+	}
+
 	/* get current state of buttons */
 	for (i = 0; i < pdata->nbuttons; i++)
 		gpio_keys_report_event(&ddata->data[i]);
@@ -200,6 +492,8 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
 
 	return 0;
 
+ fail3:
+	sysfs_remove_group(&pdev->dev.kobj, &gpio_keys_attr_group);
  fail2:
 	while (--i >= 0) {
 		free_irq(gpio_to_irq(pdata->buttons[i].gpio), &ddata->data[i]);
@@ -224,6 +518,8 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev)
 	struct input_dev *input = ddata->input;
 	int i;
 
+	sysfs_remove_group(&pdev->dev.kobj, &gpio_keys_attr_group);
+
 	device_init_wakeup(&pdev->dev, 0);
 
 	for (i = 0; i < pdata->nbuttons; i++) {
diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index 1289fa7623ca..cd0b3f30f48e 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -10,6 +10,7 @@ struct gpio_keys_button {
 	int type;		/* input event type (EV_KEY, EV_SW) */
 	int wakeup;		/* configure the button as a wake-up source */
 	int debounce_interval;	/* debounce ticks interval in msecs */
+	bool can_disable;
 };
 
 struct gpio_keys_platform_data {
-- 
cgit v1.2.3


From fce877e3a429940a986e085a41e8b57f2d922e36 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 29 Jan 2010 13:25:12 +0100
Subject: bitops: Ensure the compile time HWEIGHT is only used for such

Avoid accidental misuse by failing to compile things

Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 10 +++++++---
 include/linux/bitops.h           | 33 ++++++++++++++++++++++-----------
 2 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5b91992b6b25..96cfc1a4fe9f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -93,13 +93,16 @@ struct cpu_hw_events {
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 };
 
-#define EVENT_CONSTRAINT(c, n, m) {	\
+#define __EVENT_CONSTRAINT(c, n, m, w) {\
 	{ .idxmsk64[0] = (n) },		\
 	.code = (c),			\
 	.cmask = (m),			\
-	.weight = HWEIGHT64((u64)(n)),	\
+	.weight = (w),			\
 }
 
+#define EVENT_CONSTRAINT(c, n, m)	\
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+
 #define INTEL_EVENT_CONSTRAINT(c, n)	\
 	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 
@@ -2622,7 +2625,8 @@ void __init init_hw_perf_events(void)
 	register_die_notifier(&perf_event_nmi_notifier);
 
 	unconstrained = (struct event_constraint)
-		EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
+		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
+				   0, x86_pmu.num_events);
 
 	pr_info("... version:                %d\n",     x86_pmu.version);
 	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index ba0fd1eb4af7..25b8b2f33ae9 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -45,19 +45,30 @@ static inline unsigned long hweight_long(unsigned long w)
 	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
 }
 
-#define HWEIGHT8(w)			\
-      (	(!!((w) & (1ULL << 0))) +	\
-	(!!((w) & (1ULL << 1))) +	\
-	(!!((w) & (1ULL << 2))) +	\
-	(!!((w) & (1ULL << 3))) +	\
-	(!!((w) & (1ULL << 4))) +	\
-	(!!((w) & (1ULL << 5))) +	\
-	(!!((w) & (1ULL << 6))) +	\
+/*
+ * Clearly slow versions of the hweightN() functions, their benefit is
+ * of course compile time evaluation of constant arguments.
+ */
+#define HWEIGHT8(w)					\
+      (	BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) +	\
+	(!!((w) & (1ULL << 0))) +			\
+	(!!((w) & (1ULL << 1))) +			\
+	(!!((w) & (1ULL << 2))) +			\
+	(!!((w) & (1ULL << 3))) +			\
+	(!!((w) & (1ULL << 4))) +			\
+	(!!((w) & (1ULL << 5))) +			\
+	(!!((w) & (1ULL << 6))) +			\
 	(!!((w) & (1ULL << 7)))	)
 
-#define HWEIGHT16(w) (HWEIGHT8(w)  + HWEIGHT8(w >> 8))
-#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16(w >> 16))
-#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32(w >> 32))
+#define HWEIGHT16(w) (HWEIGHT8(w)  + HWEIGHT8((w) >> 8))
+#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16))
+#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32))
+
+/*
+ * Type invariant version that simply casts things to the
+ * largest type.
+ */
+#define HWEIGHT(w)   HWEIGHT64((u64)(w))
 
 /**
  * rol32 - rotate a 32-bit value left
-- 
cgit v1.2.3


From 447a194b393f32699607fd99617a40abd6a95114 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 1 Feb 2010 14:50:01 +0200
Subject: perf_events, x86: Fix bug in hw_perf_enable()

We cannot assume that because hwc->idx == assign[i], we can avoid
reprogramming the counter in hw_perf_enable().

The event may have been scheduled out and another event may have been
programmed into this counter. Thus, we need a more robust way of
verifying if the counter still contains config/data related to an event.

This patch adds a generation number to each counter on each cpu. Using
this mechanism we can verify reliabilty whether the content of a counter
corresponds to an event.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4b66dc67.0b38560a.1635.ffffae18@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 34 ++++++++++++++++++++++++++++------
 include/linux/perf_event.h       |  2 ++
 2 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 96cfc1a4fe9f..a920f173a220 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -90,6 +90,7 @@ struct cpu_hw_events {
 	int			n_events;
 	int			n_added;
 	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+	u64			tags[X86_PMC_IDX_MAX];
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 };
 
@@ -1142,6 +1143,8 @@ static int __hw_perf_event_init(struct perf_event *event)
 	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
 
 	hwc->idx = -1;
+	hwc->last_cpu = -1;
+	hwc->last_tag = ~0ULL;
 
 	/*
 	 * Count user and OS events unless requested not to.
@@ -1457,11 +1460,14 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
 	return n;
 }
 
-
 static inline void x86_assign_hw_event(struct perf_event *event,
-				struct hw_perf_event *hwc, int idx)
+				struct cpu_hw_events *cpuc, int i)
 {
-	hwc->idx = idx;
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->idx = cpuc->assign[i];
+	hwc->last_cpu = smp_processor_id();
+	hwc->last_tag = ++cpuc->tags[i];
 
 	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
 		hwc->config_base = 0;
@@ -1480,6 +1486,15 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 	}
 }
 
+static inline int match_prev_assignment(struct hw_perf_event *hwc,
+					struct cpu_hw_events *cpuc,
+					int i)
+{
+	return hwc->idx == cpuc->assign[i] &&
+		hwc->last_cpu == smp_processor_id() &&
+		hwc->last_tag == cpuc->tags[i];
+}
+
 static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
 
 void hw_perf_enable(void)
@@ -1508,7 +1523,14 @@ void hw_perf_enable(void)
 			event = cpuc->event_list[i];
 			hwc = &event->hw;
 
-			if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
+			/*
+			 * we can avoid reprogramming counter if:
+			 * - assigned same counter as last time
+			 * - running on same CPU as last time
+			 * - no other event has used the counter since
+			 */
+			if (hwc->idx == -1 ||
+			    match_prev_assignment(hwc, cpuc, i))
 				continue;
 
 			__x86_pmu_disable(event, cpuc);
@@ -1522,12 +1544,12 @@ void hw_perf_enable(void)
 			hwc = &event->hw;
 
 			if (hwc->idx == -1) {
-				x86_assign_hw_event(event, hwc, cpuc->assign[i]);
+				x86_assign_hw_event(event, cpuc, i);
 				x86_perf_event_set_period(event, hwc, hwc->idx);
 			}
 			/*
 			 * need to mark as active because x86_pmu_disable()
-			 * clear active_mask and eventsp[] yet it preserves
+			 * clear active_mask and events[] yet it preserves
 			 * idx
 			 */
 			set_bit(hwc->idx, cpuc->active_mask);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 556b0f4a668e..071a7db52549 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -478,9 +478,11 @@ struct hw_perf_event {
 	union {
 		struct { /* hardware */
 			u64		config;
+			u64		last_tag;
 			unsigned long	config_base;
 			unsigned long	event_base;
 			int		idx;
+			int		last_cpu;
 		};
 		struct { /* software */
 			s64		remaining;
-- 
cgit v1.2.3


From 2a61aa401638529cd4231f6106980d307fba98fa Mon Sep 17 00:00:00 2001
From: Adam Buchbinder <adam.buchbinder@gmail.com>
Date: Fri, 11 Dec 2009 16:35:40 -0500
Subject: Fix misspellings of "invocation" in comments.

Some comments misspell "invocation"; this fixes them. No code
changes.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/buffer.c            | 2 +-
 fs/mpage.c             | 2 +-
 include/linux/mmzone.h | 2 +-
 kernel/sched_cpupri.c  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6fa530256bfd..1d920bab5e70 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2893,7 +2893,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
 
 	/*
 	 * The page straddles i_size.  It must be zeroed out on each and every
-	 * writepage invokation because it may be mmapped.  "A file is mapped
+	 * writepage invocation because it may be mmapped.  "A file is mapped
 	 * in multiples of the page size.  For a file that is not a multiple of
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
diff --git a/fs/mpage.c b/fs/mpage.c
index 42381bd6543b..598d54e200eb 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -561,7 +561,7 @@ page_is_mapped:
 	if (page->index >= end_index) {
 		/*
 		 * The page straddles i_size.  It must be zeroed out on each
-		 * and every writepage invokation because it may be mmapped.
+		 * and every writepage invocation because it may be mmapped.
 		 * "A file is mapped in multiples of the page size.  For a file
 		 * that is not a multiple of the page size, the remaining memory
 		 * is zeroed when mapped, and writes to that region are not
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 30fe668c2542..e60a340fe890 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -349,7 +349,7 @@ struct zone {
 	 * prev_priority holds the scanning priority for this zone.  It is
 	 * defined as the scanning priority at which we achieved our reclaim
 	 * target at the previous try_to_free_pages() or balance_pgdat()
-	 * invokation.
+	 * invocation.
 	 *
 	 * We use prev_priority as a measure of how much stress page reclaim is
 	 * under - it drives the swappiness decision: whether to unmap mapped
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 597b33099dfa..3db4b1a0e921 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -58,7 +58,7 @@ static int convert_prio(int prio)
  * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
  *
  * Note: This function returns the recommended CPUs as calculated during the
- * current invokation.  By the time the call returns, the CPUs may have in
+ * current invocation.  By the time the call returns, the CPUs may have in
  * fact changed priorities any number of times.  While not ideal, it is not
  * an issue of correctness since the normal rebalancer logic will correct
  * any discrepancies created by racing against the uncertainty of the current
-- 
cgit v1.2.3


From 6683ece36e3531fc8c75f69e7165c5f20930be88 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 4 Feb 2010 10:22:25 -0800
Subject: net: use helpers to access mc list V2

This patch introduces the similar helpers as those already done for uc list.
However multicast lists are no list_head lists but "mademanually". The three
macros added by this patch will make the transition of mc_list to list_head
smooth in two steps:

1) convert all drivers to use these macros (with the original iterator of type
   "struct dev_mc_list")
2) once all drivers are converted, convert list type and iterators to "struct
   netdev_hw_addr" in one patch.

>From now on, drivers can (and should) use "netdev_for_each_mc_addr" to iterate
over the addresses with iterator of type "struct netdev_hw_addr". Also macros
"netdev_mc_count" and "netdev_mc_empty" to read list's length. This is the state
which should be reached in all drivers.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 622ba5aa93c4..e535700a3b72 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -268,6 +268,12 @@ struct netdev_hw_addr_list {
 #define netdev_for_each_uc_addr(ha, dev) \
 	list_for_each_entry(ha, &dev->uc.list, list)
 
+#define netdev_mc_count(dev) ((dev)->mc_count)
+#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
+
+#define netdev_for_each_mc_addr(mclist, dev) \
+	for (mclist = dev->mc_list; mclist; mclist = mclist->next)
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
-- 
cgit v1.2.3


From f8f76db1db369f3a130ac3fd33e2eee5f1610d9c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 4 Feb 2010 10:23:02 -0800
Subject: libphy: add phy_find_first function

Many drivers do this in them manually. Now they can use this function.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 16 ++++++++++++++++
 include/linux/phy.h          |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index adbc0fded130..db1794546c56 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -276,6 +276,22 @@ int phy_device_register(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_device_register);
 
+/**
+ * phy_find_first - finds the first PHY device on the bus
+ * @bus: the target MII bus
+ */
+struct phy_device *phy_find_first(struct mii_bus *bus)
+{
+	int addr;
+
+	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+		if (bus->phy_map[addr])
+			return bus->phy_map[addr];
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(phy_find_first);
+
 /**
  * phy_prepare_link - prepares the PHY layer to monitor link status
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6a7eb402165d..14d7fdf6a90a 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -452,6 +452,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
 		const char *bus_id, u32 flags, phy_interface_t interface);
+struct phy_device *phy_find_first(struct mii_bus *bus);
 int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
 		void (*handler)(struct net_device *), u32 flags,
 		phy_interface_t interface);
-- 
cgit v1.2.3


From 8ee2bf9ab792d0c02b13ca3acbd036debb7745d9 Mon Sep 17 00:00:00 2001
From: Sriramakrishnan <srk@ti.com>
Date: Thu, 19 Nov 2009 15:58:25 +0530
Subject: TI Davinci EMAC : Re-use driver for other platforms.

The davinci EMAC peripheral is also available on other TI
platforms -notably TI AM3517 SoC. This patch modifies the
config option and the platform structure header files so that
the driver can be reused on non-davinci platforms as well.

Signed-off-by: Sriramakrishnan <srk@ti.com>
Acked-by: Chaithrika U S <chaithrika@ti.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
---
 arch/arm/mach-davinci/common.c              |  2 +-
 arch/arm/mach-davinci/include/mach/da8xx.h  |  2 +-
 arch/arm/mach-davinci/include/mach/dm365.h  |  2 +-
 arch/arm/mach-davinci/include/mach/dm644x.h |  2 +-
 arch/arm/mach-davinci/include/mach/dm646x.h |  2 +-
 arch/arm/mach-davinci/include/mach/emac.h   | 36 -----------------------------
 drivers/net/Kconfig                         |  2 +-
 drivers/net/davinci_emac.c                  |  3 +--
 include/linux/davinci_emac.h                | 36 +++++++++++++++++++++++++++++
 9 files changed, 43 insertions(+), 44 deletions(-)
 delete mode 100644 arch/arm/mach-davinci/include/mach/emac.h
 create mode 100644 include/linux/davinci_emac.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index c2de94cde56a..94f27cbcd55a 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -11,13 +11,13 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/etherdevice.h>
+#include <linux/davinci_emac.h>
 
 #include <asm/tlb.h>
 #include <asm/mach/map.h>
 
 #include <mach/common.h>
 #include <mach/cputype.h>
-#include <mach/emac.h>
 
 #include "clock.h"
 
diff --git a/arch/arm/mach-davinci/include/mach/da8xx.h b/arch/arm/mach-davinci/include/mach/da8xx.h
index d43a4b6b6d76..d9a7f11894c4 100644
--- a/arch/arm/mach-davinci/include/mach/da8xx.h
+++ b/arch/arm/mach-davinci/include/mach/da8xx.h
@@ -13,10 +13,10 @@
 
 #include <video/da8xx-fb.h>
 
+#include <linux/davinci_emac.h>
 #include <mach/serial.h>
 #include <mach/edma.h>
 #include <mach/i2c.h>
-#include <mach/emac.h>
 #include <mach/asp.h>
 #include <mach/mmc.h>
 #include <mach/usb.h>
diff --git a/arch/arm/mach-davinci/include/mach/dm365.h b/arch/arm/mach-davinci/include/mach/dm365.h
index 9fc5a64a5364..c3610eb39869 100644
--- a/arch/arm/mach-davinci/include/mach/dm365.h
+++ b/arch/arm/mach-davinci/include/mach/dm365.h
@@ -14,8 +14,8 @@
 #define __ASM_ARCH_DM665_H
 
 #include <linux/platform_device.h>
+#include <linux/davinci_emac.h>
 #include <mach/hardware.h>
-#include <mach/emac.h>
 #include <mach/asp.h>
 #include <mach/keyscan.h>
 #include <media/davinci/vpfe_capture.h>
diff --git a/arch/arm/mach-davinci/include/mach/dm644x.h b/arch/arm/mach-davinci/include/mach/dm644x.h
index 44e8f0fae9ea..1a8b09ccc3c8 100644
--- a/arch/arm/mach-davinci/include/mach/dm644x.h
+++ b/arch/arm/mach-davinci/include/mach/dm644x.h
@@ -22,8 +22,8 @@
 #ifndef __ASM_ARCH_DM644X_H
 #define __ASM_ARCH_DM644X_H
 
+#include <linux/davinci_emac.h>
 #include <mach/hardware.h>
-#include <mach/emac.h>
 #include <mach/asp.h>
 #include <media/davinci/vpfe_capture.h>
 
diff --git a/arch/arm/mach-davinci/include/mach/dm646x.h b/arch/arm/mach-davinci/include/mach/dm646x.h
index 8221153bb2af..846da98b619a 100644
--- a/arch/arm/mach-davinci/include/mach/dm646x.h
+++ b/arch/arm/mach-davinci/include/mach/dm646x.h
@@ -12,11 +12,11 @@
 #define __ASM_ARCH_DM646X_H
 
 #include <mach/hardware.h>
-#include <mach/emac.h>
 #include <mach/asp.h>
 #include <linux/i2c.h>
 #include <linux/videodev2.h>
 #include <linux/clk.h>
+#include <linux/davinci_emac.h>
 
 #define DM646X_EMAC_BASE		(0x01C80000)
 #define DM646X_EMAC_CNTRL_OFFSET	(0x0000)
diff --git a/arch/arm/mach-davinci/include/mach/emac.h b/arch/arm/mach-davinci/include/mach/emac.h
deleted file mode 100644
index beff4fb7c845..000000000000
--- a/arch/arm/mach-davinci/include/mach/emac.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * TI DaVinci EMAC platform support
- *
- * Author: Kevin Hilman, Deep Root Systems, LLC
- *
- * 2007 (c) Deep Root Systems, LLC. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-#ifndef _MACH_DAVINCI_EMAC_H
-#define _MACH_DAVINCI_EMAC_H
-
-#include <linux/if_ether.h>
-#include <linux/memory.h>
-
-struct emac_platform_data {
-	char mac_addr[ETH_ALEN];
-	u32 ctrl_reg_offset;
-	u32 ctrl_mod_reg_offset;
-	u32 ctrl_ram_offset;
-	u32 mdio_reg_offset;
-	u32 ctrl_ram_size;
-	u32 phy_mask;
-	u32 mdio_max_freq;
-	u8 rmii_en;
-	u8 version;
-};
-
-enum {
-	EMAC_VERSION_1,	/* DM644x */
-	EMAC_VERSION_2,	/* DM646x */
-};
-
-void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context);
-#endif
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index dd9a09c72dff..18300625b05b 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -920,7 +920,7 @@ config NET_NETX
 
 config TI_DAVINCI_EMAC
 	tristate "TI DaVinci EMAC Support"
-	depends on ARM && ARCH_DAVINCI
+	depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 )
 	select PHYLIB
 	help
 	  This driver supports TI's DaVinci Ethernet .
diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 33c4fe26178c..9ebac35d3af0 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -62,12 +62,11 @@
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
+#include <linux/davinci_emac.h>
 
 #include <asm/irq.h>
 #include <asm/page.h>
 
-#include <mach/emac.h>
-
 static int debug_level;
 module_param(debug_level, int, 0);
 MODULE_PARM_DESC(debug_level, "DaVinci EMAC debug level (NETIF_MSG bits)");
diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
new file mode 100644
index 000000000000..ff5548747df4
--- /dev/null
+++ b/include/linux/davinci_emac.h
@@ -0,0 +1,36 @@
+/*
+ * TI DaVinci EMAC platform support
+ *
+ * Author: Kevin Hilman, Deep Root Systems, LLC
+ *
+ * 2007 (c) Deep Root Systems, LLC. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef _LINUX_DAVINCI_EMAC_H
+#define _LINUX_DAVINCI_EMAC_H
+
+#include <linux/if_ether.h>
+#include <linux/memory.h>
+
+struct emac_platform_data {
+	char mac_addr[ETH_ALEN];
+	u32 ctrl_reg_offset;
+	u32 ctrl_mod_reg_offset;
+	u32 ctrl_ram_offset;
+	u32 mdio_reg_offset;
+	u32 ctrl_ram_size;
+	u32 phy_mask;
+	u32 mdio_max_freq;
+	u8 rmii_en;
+	u8 version;
+};
+
+enum {
+	EMAC_VERSION_1,	/* DM644x */
+	EMAC_VERSION_2,	/* DM646x */
+};
+
+void davinci_get_mac_addr(struct memory_accessor *mem_acc, void *context);
+#endif
-- 
cgit v1.2.3


From 01a9af36cd9d25fc71e28192974732d8053bd1c0 Mon Sep 17 00:00:00 2001
From: Sriramakrishnan <srk@ti.com>
Date: Thu, 19 Nov 2009 15:58:26 +0530
Subject: TI Davinci EMAC : add platform specific interrupt enable/disable
 logic.

On certain SOCs, the EMAC controller is interfaced with a wrapper logic
for handling interrupts. This  patch implements a platform
specific hook to cater to platforms that require custom interrupt
handling logic

Signed-off-by: Sriramakrishnan <srk@ti.com>
Acked-by: Chaithrika U S <chaithrika@ti.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
---
 drivers/net/davinci_emac.c   | 11 +++++++++++
 include/linux/davinci_emac.h |  2 ++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 9ebac35d3af0..c735b62baa03 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -487,6 +487,9 @@ struct emac_priv {
 	struct mii_bus *mii_bus;
 	struct phy_device *phydev;
 	spinlock_t lock;
+	/*platform specific members*/
+	void (*int_enable) (void);
+	void (*int_disable) (void);
 };
 
 /* clock frequency for EMAC */
@@ -1001,6 +1004,8 @@ static void emac_int_disable(struct emac_priv *priv)
 		emac_ctrl_write(EMAC_DM646X_CMRXINTEN, 0x0);
 		emac_ctrl_write(EMAC_DM646X_CMTXINTEN, 0x0);
 		/* NOTE: Rx Threshold and Misc interrupts are not disabled */
+		if (priv->int_disable)
+			priv->int_disable();
 
 		local_irq_restore(flags);
 
@@ -1020,6 +1025,9 @@ static void emac_int_disable(struct emac_priv *priv)
 static void emac_int_enable(struct emac_priv *priv)
 {
 	if (priv->version == EMAC_VERSION_2) {
+		if (priv->int_enable)
+			priv->int_enable();
+
 		emac_ctrl_write(EMAC_DM646X_CMRXINTEN, 0xff);
 		emac_ctrl_write(EMAC_DM646X_CMTXINTEN, 0xff);
 
@@ -2659,6 +2667,9 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 	priv->phy_mask = pdata->phy_mask;
 	priv->rmii_en = pdata->rmii_en;
 	priv->version = pdata->version;
+	priv->int_enable = pdata->interrupt_enable;
+	priv->int_disable = pdata->interrupt_disable;
+
 	emac_dev = &ndev->dev;
 	/* Get EMAC platform data */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
index ff5548747df4..6d894efd8a3f 100644
--- a/include/linux/davinci_emac.h
+++ b/include/linux/davinci_emac.h
@@ -25,6 +25,8 @@ struct emac_platform_data {
 	u32 mdio_max_freq;
 	u8 rmii_en;
 	u8 version;
+	void (*interrupt_enable) (void);
+	void (*interrupt_disable) (void);
 };
 
 enum {
-- 
cgit v1.2.3


From ad021ae8862209864dc8ebd3b7d3a55ce84b9ea2 Mon Sep 17 00:00:00 2001
From: Sriramakrishnan <srk@ti.com>
Date: Thu, 19 Nov 2009 15:58:27 +0530
Subject: TI Davinci EMAC : Abstract Buffer address translation logic.

When programming the DMA engine, the next pointers must be
programmed with physical address as seen from the DMA master
address space. This address may be different from physical
address of the buffer RAM area. This patch abstracts the
buffer address translation logic.

Signed-off-by: Sriramakrishnan <srk@ti.com>
Acked-by: Chaithrika U S <chaithrika@ti.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
---
 drivers/net/davinci_emac.c   | 41 ++++++++++++++++++++++++-----------------
 include/linux/davinci_emac.h |  1 +
 2 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index c735b62baa03..1605bc225b0c 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -464,6 +464,7 @@ struct emac_priv {
 	void __iomem *ctrl_base;
 	void __iomem *emac_ctrl_ram;
 	u32 ctrl_ram_size;
+	u32 hw_ram_addr;
 	struct emac_txch *txch[EMAC_DEF_MAX_TX_CH];
 	struct emac_rxch *rxch[EMAC_DEF_MAX_RX_CH];
 	u32 link; /* 1=link on, 0=link off */
@@ -497,11 +498,9 @@ static struct clk *emac_clk;
 static unsigned long emac_bus_frequency;
 static unsigned long mdio_max_freq;
 
-/* EMAC internal utility function */
-static inline u32 emac_virt_to_phys(void __iomem *addr)
-{
-	return (u32 __force) io_v2p(addr);
-}
+#define emac_virt_to_phys(addr, priv) \
+	(((u32 __force)(addr) - (u32 __force)(priv->emac_ctrl_ram)) \
+	+ priv->hw_ram_addr)
 
 /* Cache macros - Packet buffers would be from skb pool which is cached */
 #define EMAC_VIRT_NOCACHE(addr) (addr)
@@ -1309,7 +1308,7 @@ static int emac_tx_bdproc(struct emac_priv *priv, u32 ch, u32 budget)
 	curr_bd = txch->active_queue_head;
 	if (NULL == curr_bd) {
 		emac_write(EMAC_TXCP(ch),
-			   emac_virt_to_phys(txch->last_hw_bdprocessed));
+			   emac_virt_to_phys(txch->last_hw_bdprocessed, priv));
 		txch->no_active_pkts++;
 		spin_unlock_irqrestore(&priv->tx_lock, flags);
 		return 0;
@@ -1319,7 +1318,7 @@ static int emac_tx_bdproc(struct emac_priv *priv, u32 ch, u32 budget)
 	while ((curr_bd) &&
 	      ((frame_status & EMAC_CPPI_OWNERSHIP_BIT) == 0) &&
 	      (pkts_processed < budget)) {
-		emac_write(EMAC_TXCP(ch), emac_virt_to_phys(curr_bd));
+		emac_write(EMAC_TXCP(ch), emac_virt_to_phys(curr_bd, priv));
 		txch->active_queue_head = curr_bd->next;
 		if (frame_status & EMAC_CPPI_EOQ_BIT) {
 			if (curr_bd->next) {	/* misqueued packet */
@@ -1406,7 +1405,7 @@ static int emac_send(struct emac_priv *priv, struct emac_netpktobj *pkt, u32 ch)
 		txch->active_queue_tail = curr_bd;
 		if (1 != txch->queue_active) {
 			emac_write(EMAC_TXHDP(ch),
-					emac_virt_to_phys(curr_bd));
+					emac_virt_to_phys(curr_bd, priv));
 			txch->queue_active = 1;
 		}
 		++txch->queue_reinit;
@@ -1418,10 +1417,11 @@ static int emac_send(struct emac_priv *priv, struct emac_netpktobj *pkt, u32 ch)
 		tail_bd->next = curr_bd;
 		txch->active_queue_tail = curr_bd;
 		tail_bd = EMAC_VIRT_NOCACHE(tail_bd);
-		tail_bd->h_next = (int)emac_virt_to_phys(curr_bd);
+		tail_bd->h_next = (int)emac_virt_to_phys(curr_bd, priv);
 		frame_status = tail_bd->mode;
 		if (frame_status & EMAC_CPPI_EOQ_BIT) {
-			emac_write(EMAC_TXHDP(ch), emac_virt_to_phys(curr_bd));
+			emac_write(EMAC_TXHDP(ch),
+				emac_virt_to_phys(curr_bd, priv));
 			frame_status &= ~(EMAC_CPPI_EOQ_BIT);
 			tail_bd->mode = frame_status;
 			++txch->end_of_queue_add;
@@ -1611,7 +1611,8 @@ static int emac_init_rxch(struct emac_priv *priv, u32 ch, char *param)
 		}
 
 		/* populate the hardware descriptor */
-		curr_bd->h_next = emac_virt_to_phys(rxch->active_queue_head);
+		curr_bd->h_next = emac_virt_to_phys(rxch->active_queue_head,
+				priv);
 		/* FIXME buff_ptr = dma_map_single(... data_ptr ...) */
 		curr_bd->buff_ptr = virt_to_phys(curr_bd->data_ptr);
 		curr_bd->off_b_len = rxch->buf_size;
@@ -1886,7 +1887,7 @@ static void emac_addbd_to_rx_queue(struct emac_priv *priv, u32 ch,
 		rxch->active_queue_tail = curr_bd;
 		if (0 != rxch->queue_active) {
 			emac_write(EMAC_RXHDP(ch),
-				   emac_virt_to_phys(rxch->active_queue_head));
+			   emac_virt_to_phys(rxch->active_queue_head, priv));
 			rxch->queue_active = 1;
 		}
 	} else {
@@ -1897,11 +1898,11 @@ static void emac_addbd_to_rx_queue(struct emac_priv *priv, u32 ch,
 		rxch->active_queue_tail = curr_bd;
 		tail_bd->next = curr_bd;
 		tail_bd = EMAC_VIRT_NOCACHE(tail_bd);
-		tail_bd->h_next = emac_virt_to_phys(curr_bd);
+		tail_bd->h_next = emac_virt_to_phys(curr_bd, priv);
 		frame_status = tail_bd->mode;
 		if (frame_status & EMAC_CPPI_EOQ_BIT) {
 			emac_write(EMAC_RXHDP(ch),
-					emac_virt_to_phys(curr_bd));
+					emac_virt_to_phys(curr_bd, priv));
 			frame_status &= ~(EMAC_CPPI_EOQ_BIT);
 			tail_bd->mode = frame_status;
 			++rxch->end_of_queue_add;
@@ -1994,7 +1995,7 @@ static int emac_rx_bdproc(struct emac_priv *priv, u32 ch, u32 budget)
 		curr_pkt->num_bufs = 1;
 		curr_pkt->pkt_length =
 			(frame_status & EMAC_RX_BD_PKT_LENGTH_MASK);
-		emac_write(EMAC_RXCP(ch), emac_virt_to_phys(curr_bd));
+		emac_write(EMAC_RXCP(ch), emac_virt_to_phys(curr_bd, priv));
 		++rxch->processed_bd;
 		last_bd = curr_bd;
 		curr_bd = last_bd->next;
@@ -2005,7 +2006,7 @@ static int emac_rx_bdproc(struct emac_priv *priv, u32 ch, u32 budget)
 			if (curr_bd) {
 				++rxch->mis_queued_packets;
 				emac_write(EMAC_RXHDP(ch),
-					   emac_virt_to_phys(curr_bd));
+					   emac_virt_to_phys(curr_bd, priv));
 			} else {
 				++rxch->end_of_queue;
 				rxch->queue_active = 0;
@@ -2106,7 +2107,7 @@ static int emac_hw_enable(struct emac_priv *priv)
 		emac_write(EMAC_RXINTMASKSET, BIT(ch));
 		rxch->queue_active = 1;
 		emac_write(EMAC_RXHDP(ch),
-			   emac_virt_to_phys(rxch->active_queue_head));
+			   emac_virt_to_phys(rxch->active_queue_head, priv));
 	}
 
 	/* Enable MII */
@@ -2702,6 +2703,12 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev)
 	priv->ctrl_ram_size = pdata->ctrl_ram_size;
 	priv->emac_ctrl_ram = priv->remap_addr + pdata->ctrl_ram_offset;
 
+	if (pdata->hw_ram_addr)
+		priv->hw_ram_addr = pdata->hw_ram_addr;
+	else
+		priv->hw_ram_addr = (u32 __force)res->start +
+					pdata->ctrl_ram_offset;
+
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!res) {
 		dev_err(emac_dev, "DaVinci EMAC: Error getting irq res\n");
diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
index 6d894efd8a3f..7c930dba477c 100644
--- a/include/linux/davinci_emac.h
+++ b/include/linux/davinci_emac.h
@@ -19,6 +19,7 @@ struct emac_platform_data {
 	u32 ctrl_reg_offset;
 	u32 ctrl_mod_reg_offset;
 	u32 ctrl_ram_offset;
+	u32 hw_ram_addr;
 	u32 mdio_reg_offset;
 	u32 ctrl_ram_size;
 	u32 phy_mask;
-- 
cgit v1.2.3


From bfd5f4a3d605e0f6054df0b59fe0907ff7e696d3 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 4 Feb 2010 20:24:10 -0800
Subject: packet: Add GSO/csum offload support.

This patch adds GSO/checksum offload to af_packet sockets using
virtio_net_hdr. Based on Rusty's patch to add this support to tun.
It allows GSO/checksum offload to be enabled when using raw socket
backend with virtio_net.
Adds PACKET_VNET_HDR socket option to prepend virtio_net_hdr in the
receive path and process/skip virtio_net_hdr in the send path. This
option is only allowed with SOCK_RAW sockets attached to ethernet
type devices.

v2 updates
----------
Michael's Comments
- Perform length check in packet_snd() when GSO is off even when
  vnet_hdr is present.
- Check for SKB_GSO_FCOE type and return -EINVAL
- don't allow tx/rx ring when vnet_hdr is enabled.
Herbert's Comments
- Removed ethernet specific code.
- protocol value is assumed to be passed in by the caller.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |   1 +
 net/packet/af_packet.c    | 187 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 177 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 4021d47cc437..aa57a5f993fc 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -46,6 +46,7 @@ struct sockaddr_ll {
 #define PACKET_RESERVE			12
 #define PACKET_TX_RING			13
 #define PACKET_LOSS			14
+#define PACKET_VNET_HDR			15
 
 struct tpacket_stats {
 	unsigned int	tp_packets;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 53633c5fdb1d..178e2937bbaa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -80,6 +80,7 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -193,7 +194,8 @@ struct packet_sock {
 	struct mutex		pg_vec_lock;
 	unsigned int		running:1,	/* prot_hook is attached*/
 				auxdata:1,
-				origdev:1;
+				origdev:1,
+				has_vnet_hdr:1;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 	struct packet_mclist	*mclist;
@@ -1056,6 +1058,30 @@ out:
 }
 #endif
 
+static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
+					       size_t reserve, size_t len,
+					       size_t linear, int noblock,
+					       int *err)
+{
+	struct sk_buff *skb;
+
+	/* Under a page?  Don't bother with paged skb. */
+	if (prepad + len < PAGE_SIZE || !linear)
+		linear = len;
+
+	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
+				   err);
+	if (!skb)
+		return NULL;
+
+	skb_reserve(skb, reserve);
+	skb_put(skb, linear);
+	skb->data_len = len - linear;
+	skb->len += len - linear;
+
+	return skb;
+}
+
 static int packet_snd(struct socket *sock,
 			  struct msghdr *msg, size_t len)
 {
@@ -1066,14 +1092,17 @@ static int packet_snd(struct socket *sock,
 	__be16 proto;
 	unsigned char *addr;
 	int ifindex, err, reserve = 0;
+	struct virtio_net_hdr vnet_hdr = { 0 };
+	int offset = 0;
+	int vnet_hdr_len;
+	struct packet_sock *po = pkt_sk(sk);
+	unsigned short gso_type = 0;
 
 	/*
 	 *	Get and verify the address.
 	 */
 
 	if (saddr == NULL) {
-		struct packet_sock *po = pkt_sk(sk);
-
 		ifindex	= po->ifindex;
 		proto	= po->num;
 		addr	= NULL;
@@ -1100,25 +1129,74 @@ static int packet_snd(struct socket *sock,
 	if (!(dev->flags & IFF_UP))
 		goto out_unlock;
 
+	if (po->has_vnet_hdr) {
+		vnet_hdr_len = sizeof(vnet_hdr);
+
+		err = -EINVAL;
+		if (len < vnet_hdr_len)
+			goto out_unlock;
+
+		len -= vnet_hdr_len;
+
+		err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
+				       vnet_hdr_len);
+		if (err < 0)
+			goto out_unlock;
+
+		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+		    (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
+		      vnet_hdr.hdr_len))
+			vnet_hdr.hdr_len = vnet_hdr.csum_start +
+						 vnet_hdr.csum_offset + 2;
+
+		err = -EINVAL;
+		if (vnet_hdr.hdr_len > len)
+			goto out_unlock;
+
+		if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+			switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+			case VIRTIO_NET_HDR_GSO_TCPV4:
+				gso_type = SKB_GSO_TCPV4;
+				break;
+			case VIRTIO_NET_HDR_GSO_TCPV6:
+				gso_type = SKB_GSO_TCPV6;
+				break;
+			case VIRTIO_NET_HDR_GSO_UDP:
+				gso_type = SKB_GSO_UDP;
+				break;
+			default:
+				goto out_unlock;
+			}
+
+			if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
+				gso_type |= SKB_GSO_TCP_ECN;
+
+			if (vnet_hdr.gso_size == 0)
+				goto out_unlock;
+
+		}
+	}
+
 	err = -EMSGSIZE;
-	if (len > dev->mtu+reserve)
+	if (!gso_type && (len > dev->mtu+reserve))
 		goto out_unlock;
 
-	skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
-				msg->msg_flags & MSG_DONTWAIT, &err);
+	err = -ENOBUFS;
+	skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
+			       LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
+			       msg->msg_flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto out_unlock;
 
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb_reset_network_header(skb);
+	skb_set_network_header(skb, reserve);
 
 	err = -EINVAL;
 	if (sock->type == SOCK_DGRAM &&
-	    dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
+	    (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
 		goto out_free;
 
 	/* Returns -EFAULT on error */
-	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
 	if (err)
 		goto out_free;
 
@@ -1127,6 +1205,25 @@ static int packet_snd(struct socket *sock,
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
+	if (po->has_vnet_hdr) {
+		if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+			if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
+						  vnet_hdr.csum_offset)) {
+				err = -EINVAL;
+				goto out_free;
+			}
+		}
+
+		skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
+		skb_shinfo(skb)->gso_type = gso_type;
+
+		/* Header must be checked, and gso_segs computed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+
+		len += vnet_hdr_len;
+	}
+
 	/*
 	 *	Now send it
 	 */
@@ -1420,6 +1517,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sk_buff *skb;
 	int copied, err;
 	struct sockaddr_ll *sll;
+	int vnet_hdr_len = 0;
 
 	err = -EINVAL;
 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1451,6 +1549,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
+	if (pkt_sk(sk)->has_vnet_hdr) {
+		struct virtio_net_hdr vnet_hdr = { 0 };
+
+		err = -EINVAL;
+		vnet_hdr_len = sizeof(vnet_hdr);
+		if ((len -= vnet_hdr_len) < 0)
+			goto out_free;
+
+		if (skb_is_gso(skb)) {
+			struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+			/* This is a hint as to how much should be linear. */
+			vnet_hdr.hdr_len = skb_headlen(skb);
+			vnet_hdr.gso_size = sinfo->gso_size;
+			if (sinfo->gso_type & SKB_GSO_TCPV4)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+			else if (sinfo->gso_type & SKB_GSO_TCPV6)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+			else if (sinfo->gso_type & SKB_GSO_UDP)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
+			else if (sinfo->gso_type & SKB_GSO_FCOE)
+				goto out_free;
+			else
+				BUG();
+			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+				vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+		} else
+			vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+			vnet_hdr.csum_start = skb->csum_start -
+							skb_headroom(skb);
+			vnet_hdr.csum_offset = skb->csum_offset;
+		} /* else everything is zero */
+
+		err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
+				     vnet_hdr_len);
+		if (err < 0)
+			goto out_free;
+	}
+
 	/*
 	 *	If the address length field is there to be filled in, we fill
 	 *	it in now.
@@ -1502,7 +1642,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	 *	Free or return the buffer as appropriate. Again this
 	 *	hides all the races and re-entrancy issues from us.
 	 */
-	err = (flags&MSG_TRUNC) ? skb->len : copied;
+	err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -1740,6 +1880,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen < sizeof(req))
 			return -EINVAL;
+		if (pkt_sk(sk)->has_vnet_hdr)
+			return -EINVAL;
 		if (copy_from_user(&req, optval, sizeof(req)))
 			return -EFAULT;
 		return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
@@ -1826,6 +1968,22 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->origdev = !!val;
 		return 0;
 	}
+	case PACKET_VNET_HDR:
+	{
+		int val;
+
+		if (sock->type != SOCK_RAW)
+			return -EINVAL;
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+			return -EBUSY;
+		if (optlen < sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->has_vnet_hdr = !!val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1874,6 +2032,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			len = sizeof(int);
 		val = po->origdev;
 
+		data = &val;
+		break;
+	case PACKET_VNET_HDR:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->has_vnet_hdr;
+
 		data = &val;
 		break;
 #ifdef CONFIG_PACKET_MMAP
-- 
cgit v1.2.3


From 947af2943576400628bba085eaa6b85143526133 Mon Sep 17 00:00:00 2001
From: Stefan Weil <weil@mail.berlios.de>
Date: Thu, 7 Jan 2010 00:03:52 +0100
Subject: Fix spelling of 'platform' in comments and doc

Replace platfrom -> platform.

This is a frequent spelling bug.

Signed-off-by: Stefan Weil <weil@mail.berlios.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/driver-model/platform.txt       | 2 +-
 arch/arm/mach-davinci/include/mach/i2c.h      | 2 +-
 arch/arm/mach-s3c2410/include/mach/spi-gpio.h | 2 +-
 arch/arm/plat-s3c/include/plat/nand.h         | 2 +-
 arch/blackfin/include/asm/nand.h              | 4 ++--
 arch/powerpc/kernel/legacy_serial.c           | 2 +-
 drivers/mtd/maps/plat-ram.c                   | 2 +-
 drivers/net/cxgb3/sge.c                       | 4 ++--
 drivers/video/mbx/mbxfb.c                     | 2 +-
 include/linux/dm9000.h                        | 2 +-
 10 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt
index 2e2c2ea90ceb..41f41632ee55 100644
--- a/Documentation/driver-model/platform.txt
+++ b/Documentation/driver-model/platform.txt
@@ -192,7 +192,7 @@ command line. This will execute all matching early_param() callbacks.
 User specified early platform devices will be registered at this point.
 For the early serial console case the user can specify port on the
 kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
-the class string, "serial" is the name of the platfrom driver and
+the class string, "serial" is the name of the platform driver and
 0 is the platform device id. If the id is -1 then the dot and the
 id can be omitted.
 
diff --git a/arch/arm/mach-davinci/include/mach/i2c.h b/arch/arm/mach-davinci/include/mach/i2c.h
index c248e9b7e825..44bdea13cc8c 100644
--- a/arch/arm/mach-davinci/include/mach/i2c.h
+++ b/arch/arm/mach-davinci/include/mach/i2c.h
@@ -1,5 +1,5 @@
 /*
- * DaVinci I2C controller platfrom_device info
+ * DaVinci I2C controller platform_device info
  *
  * Author: Vladimir Barinov, MontaVista Software, Inc. <source@mvista.com>
  *
diff --git a/arch/arm/mach-s3c2410/include/mach/spi-gpio.h b/arch/arm/mach-s3c2410/include/mach/spi-gpio.h
index 980a099e209c..dcef2287cb38 100644
--- a/arch/arm/mach-s3c2410/include/mach/spi-gpio.h
+++ b/arch/arm/mach-s3c2410/include/mach/spi-gpio.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2006 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
- * S3C2410 - SPI Controller platfrom_device info
+ * S3C2410 - SPI Controller platform_device info
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/arch/arm/plat-s3c/include/plat/nand.h b/arch/arm/plat-s3c/include/plat/nand.h
index 226147b7e026..b64115fa93a4 100644
--- a/arch/arm/plat-s3c/include/plat/nand.h
+++ b/arch/arm/plat-s3c/include/plat/nand.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2004 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
- * S3C2410 - NAND device controller platfrom_device info
+ * S3C2410 - NAND device controller platform_device info
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/arch/blackfin/include/asm/nand.h b/arch/blackfin/include/asm/nand.h
index 3ae8b569edfc..3a1e79dfc8d9 100644
--- a/arch/blackfin/include/asm/nand.h
+++ b/arch/blackfin/include/asm/nand.h
@@ -1,5 +1,5 @@
 /*
- * BF5XX - NAND flash controller platfrom_device info
+ * BF5XX - NAND flash controller platform_device info
  *
  * Copyright 2007-2008 Analog Devices, Inc.
  *
@@ -8,7 +8,7 @@
 
 /* struct bf5xx_nand_platform
  *
- * define a interface between platfrom board specific code and
+ * define a interface between platform board specific code and
  * bf54x NFC driver.
  *
  * nr_partitions = number of partitions pointed to be partitoons (or zero)
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 9ddfaef1a184..035ada5443ee 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -469,7 +469,7 @@ static int __init serial_dev_init(void)
 		return -ENODEV;
 
 	/*
-	 * Before we register the platfrom serial devices, we need
+	 * Before we register the platform serial devices, we need
 	 * to fixup their interrupts and their IO ports.
 	 */
 	DBG("Fixing serial ports interrupts and IO ports ...\n");
diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c
index dafb91944e70..76a76be5a7bd 100644
--- a/drivers/mtd/maps/plat-ram.c
+++ b/drivers/mtd/maps/plat-ram.c
@@ -4,7 +4,7 @@
  *	http://www.simtec.co.uk/products/SWLINUX/
  *	Ben Dooks <ben@simtec.co.uk>
  *
- * Generic platfrom device based RAM map
+ * Generic platform device based RAM map
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index bdbd14727e4b..5dbc125822b9 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -196,13 +196,13 @@ static inline void refill_rspq(struct adapter *adapter,
 /**
  *	need_skb_unmap - does the platform need unmapping of sk_buffs?
  *
- *	Returns true if the platfrom needs sk_buff unmapping.  The compiler
+ *	Returns true if the platform needs sk_buff unmapping.  The compiler
  *	optimizes away unecessary code if this returns true.
  */
 static inline int need_skb_unmap(void)
 {
 	/*
-	 * This structure is used to tell if the platfrom needs buffer
+	 * This structure is used to tell if the platform needs buffer
 	 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
 	 */
 	struct dummy {
diff --git a/drivers/video/mbx/mbxfb.c b/drivers/video/mbx/mbxfb.c
index 01f77bcc68f9..afea9abbd678 100644
--- a/drivers/video/mbx/mbxfb.c
+++ b/drivers/video/mbx/mbxfb.c
@@ -693,7 +693,7 @@ static void __devinit setup_memc(struct fb_info *fbi)
 	unsigned long tmp;
 	int i;
 
-	/* FIXME: use platfrom specific parameters */
+	/* FIXME: use platform specific parameters */
 	/* setup SDRAM controller */
 	write_reg_dly((LMCFG_LMC_DS | LMCFG_LMC_TS | LMCFG_LMD_TS |
 		LMCFG_LMA_TS),
diff --git a/include/linux/dm9000.h b/include/linux/dm9000.h
index c30879cf93bc..96e87693d933 100644
--- a/include/linux/dm9000.h
+++ b/include/linux/dm9000.h
@@ -23,7 +23,7 @@
 #define DM9000_PLATF_NO_EEPROM	(0x0010)
 #define DM9000_PLATF_SIMPLE_PHY (0x0020)  /* Use NSR to find LinkStatus */
 
-/* platfrom data for platfrom device structure's platfrom_data field */
+/* platform data for platform device structure's platform_data field */
 
 struct dm9000_plat_data {
 	unsigned int	flags;
-- 
cgit v1.2.3


From 17622339af2536b32cf29699ddd4ba0fe79a61d5 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Tue, 2 Feb 2010 14:41:39 -0800
Subject: clocksource: add argument to resume callback

Pass the clocksource as an argument to the clocksource resume callback.
Needed so we can point out which CMT channel the sh_cmt.c driver shall
resume.

Signed-off-by: Magnus Damm <damm@opensource.se>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/kernel/time.c     | 2 +-
 arch/x86/kernel/hpet.c      | 2 +-
 arch/x86/kernel/tsc.c       | 2 +-
 include/linux/clocksource.h | 2 +-
 kernel/time/clocksource.c   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index a35c661e5e89..47a192781b0a 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -61,7 +61,7 @@ unsigned long long sched_clock(void)
 
 #ifdef CONFIG_PARAVIRT
 static void
-paravirt_clocksource_resume(void)
+paravirt_clocksource_resume(struct clocksource *cs)
 {
 	if (pv_time_ops.clocksource_resume)
 		pv_time_ops.clocksource_resume();
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad80a1c718c6..ee4fa1bfcb33 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -266,7 +266,7 @@ static void hpet_resume_device(void)
 	force_hpet_resume();
 }
 
-static void hpet_resume_counter(void)
+static void hpet_resume_counter(struct clocksource *cs)
 {
 	hpet_resume_device();
 	hpet_restart_counter();
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 597683aa5ba0..9eeb9be26aa4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void)
 }
 #endif
 
-static void resume_tsc(void)
+static void resume_tsc(struct clocksource *cs)
 {
 	clocksource_tsc.cycle_last = 0;
 }
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 8a4a130cc196..0de7e72c3995 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -172,7 +172,7 @@ struct clocksource {
 	u64 max_idle_ns;
 	unsigned long flags;
 	cycle_t (*vread)(void);
-	void (*resume)(void);
+	void (*resume)(struct clocksource *cs);
 #ifdef CONFIG_IA64
 	void *fsys_mmio;        /* used by fsyscall asm code */
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      ((mmio) = (addr))
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e85c23404d34..08adacb2a1ed 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -449,7 +449,7 @@ void clocksource_resume(void)
 
 	list_for_each_entry(cs, &clocksource_list, list)
 		if (cs->resume)
-			cs->resume();
+			cs->resume(cs);
 
 	clocksource_resume_watchdog();
 }
-- 
cgit v1.2.3


From c54a42b19fbaae4e9f212322ecca25a6bc95c1ba Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Tue, 2 Feb 2010 14:41:41 -0800
Subject: clocksource: add suspend callback

Add a clocksource suspend callback.  This callback can be used by the
clocksource driver to shutdown and perform any kind of late suspend
activities even though the clocksource driver itself is a non-sysdev
driver.

One example where this is useful is to fix the sh_cmt.c platform driver
that today suspends using the platform bus and shuts down the clocksource
too early.

With this callback in place the sh_cmt driver will suspend using the
clocksource and clockevent hooks and leave the platform device pm
callbacks unused.

Signed-off-by: Magnus Damm <damm@opensource.se>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |  3 +++
 kernel/time/clocksource.c   | 12 ++++++++++++
 kernel/time/timekeeping.c   |  1 +
 3 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 0de7e72c3995..4bca8b60cdf7 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -154,6 +154,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @max_idle_ns:	max idle time permitted by the clocksource (nsecs)
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
+ * @suspend:		suspend function for the clocksource, if necessary
  * @resume:		resume function for the clocksource, if necessary
  */
 struct clocksource {
@@ -172,6 +173,7 @@ struct clocksource {
 	u64 max_idle_ns;
 	unsigned long flags;
 	cycle_t (*vread)(void);
+	void (*suspend)(struct clocksource *cs);
 	void (*resume)(struct clocksource *cs);
 #ifdef CONFIG_IA64
 	void *fsys_mmio;        /* used by fsyscall asm code */
@@ -277,6 +279,7 @@ extern void clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
+extern void clocksource_suspend(void);
 extern void clocksource_resume(void);
 extern struct clocksource * __init __weak clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 08adacb2a1ed..bd246660902c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -440,6 +440,18 @@ static inline int clocksource_watchdog_kthread(void *data) { return 0; }
 
 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 
+/**
+ * clocksource_suspend - suspend the clocksource(s)
+ */
+void clocksource_suspend(void)
+{
+	struct clocksource *cs;
+
+	list_for_each_entry_reverse(cs, &clocksource_list, list)
+		if (cs->suspend)
+			cs->suspend(cs);
+}
+
 /**
  * clocksource_resume - resume the clocksource(s)
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7faaa32fbf4f..843d8a711b16 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -622,6 +622,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
 	clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+	clocksource_suspend();
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2938429501b73f6aeb312236eac7ed0416a07cd5 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 5 Feb 2010 16:09:11 +1100
Subject: percpu: add __percpu for sparse

This is to make the annotation of percpu variables during the next merge
window less painfull.

Extracted from a patch by Rusty Russell.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 5be3dab4a695..188fcae10a99 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -15,6 +15,7 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu	__attribute__((noderef, address_space(3)))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __acquire(x) (void)0
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
+# define __percpu
 #endif
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From 95a8b6efc5d07103583f706c8a5889437d537939 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Tue, 2 Feb 2010 14:38:13 -0800
Subject: pci: Update pci_set_vga_state() to call arch functions

Update pci_set_vga_state to call arch dependent functions to enable Legacy
VGA I/O transactions to be redirected to correct target.

[akpm@linux-foundation.org: make pci_register_set_vga_state() __init]
Signed-off-by: Mike Travis <travis@sgi.com>
LKML-Reference: <201002022238.o12McE1J018723@imap1.linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Robin Holt <holt@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 drivers/pci/pci.c   | 25 ++++++++++++++++++++++++-
 include/linux/pci.h |  5 +++++
 2 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 315fea47e784..ac2a576133f9 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2615,6 +2615,23 @@ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 	return 0;
 }
 
+/* Some architectures require additional programming to enable VGA */
+static arch_set_vga_state_t arch_set_vga_state;
+
+void __init pci_register_set_vga_state(arch_set_vga_state_t func)
+{
+	arch_set_vga_state = func;	/* NULL disables */
+}
+
+static int pci_set_vga_state_arch(struct pci_dev *dev, bool decode,
+		      unsigned int command_bits, bool change_bridge)
+{
+	if (arch_set_vga_state)
+		return arch_set_vga_state(dev, decode, command_bits,
+						change_bridge);
+	return 0;
+}
+
 /**
  * pci_set_vga_state - set VGA decode state on device and parents if requested
  * @dev: the PCI device
@@ -2628,9 +2645,15 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode,
 	struct pci_bus *bus;
 	struct pci_dev *bridge;
 	u16 cmd;
+	int rc;
 
 	WARN_ON(command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY));
 
+	/* ARCH specific VGA enables */
+	rc = pci_set_vga_state_arch(dev, decode, command_bits, change_bridge);
+	if (rc)
+		return rc;
+
 	pci_read_config_word(dev, PCI_COMMAND, &cmd);
 	if (decode == true)
 		cmd |= command_bits;
@@ -2845,6 +2868,7 @@ EXPORT_SYMBOL(pcim_pin_device);
 EXPORT_SYMBOL(pci_disable_device);
 EXPORT_SYMBOL(pci_find_capability);
 EXPORT_SYMBOL(pci_bus_find_capability);
+EXPORT_SYMBOL(pci_register_set_vga_state);
 EXPORT_SYMBOL(pci_release_regions);
 EXPORT_SYMBOL(pci_request_regions);
 EXPORT_SYMBOL(pci_request_regions_exclusive);
@@ -2877,4 +2901,3 @@ EXPORT_SYMBOL(pci_target_state);
 EXPORT_SYMBOL(pci_prepare_to_sleep);
 EXPORT_SYMBOL(pci_back_from_sleep);
 EXPORT_SYMBOL_GPL(pci_set_pcie_reset_state);
-
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 174e5392e51e..5da166e560c2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -955,6 +955,11 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 }
 #endif /* CONFIG_PCI_DOMAINS */
 
+/* some architectures require additional setup to direct VGA traffic */
+typedef int (*arch_set_vga_state_t)(struct pci_dev *pdev, bool decode,
+		      unsigned int command_bits, bool change_bridge);
+extern void pci_register_set_vga_state(arch_set_vga_state_t func);
+
 #else /* CONFIG_PCI is not enabled */
 
 /*
-- 
cgit v1.2.3


From 8eb988c70e7709b7bd1a69f0ec53d19ac20dea84 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 20 Jan 2010 15:35:41 -0500
Subject: fix ima breakage

The "Untangling ima mess, part 2 with counters" patch messed
up the counters.  Based on conversations with Al Viro, this patch
streamlines ima_path_check() by removing the counter maintaince.
The counters are now updated independently, from measuring the file,
in __dentry_open() and alloc_file() by calling ima_counts_get().
ima_path_check() is called from nfsd and do_filp_open().
It also did not measure all files that should have been measured.
Reason: ima_path_check() got bogus value passed as mask.
[AV: mea culpa]
[AV: add missing nfsd bits]

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        |   6 +-
 fs/nfsd/vfs.c                     |   3 +-
 include/linux/ima.h               |   4 +-
 security/integrity/ima/ima_main.c | 236 +++++++++++++++-----------------------
 4 files changed, 97 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 94a5e60779f9..cd77b6375efd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,8 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(&filp->f_path, filp->f_mode &
-				       (MAY_READ | MAY_WRITE | MAY_EXEC));
+			error = ima_path_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1797,8 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(&filp->f_path, filp->f_mode &
-			       (MAY_READ | MAY_WRITE | MAY_EXEC));
+		error = ima_path_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 325959e264ce..32477e3a645c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,8 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(&(*filp)->f_path,
-				  access & (MAY_READ | MAY_WRITE | MAY_EXEC));
+	host_err = ima_path_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 99dc6d5cf7e5..aa55a8f1f5b9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct path *path, int mask);
+extern int ima_path_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct path *path, int mask)
+static inline int ima_path_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index a89f44d5e030..75aee18f6163 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -84,6 +84,36 @@ out:
 	return found;
 }
 
+/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
+ *
+ * When opening a file for read, if the file is already open for write,
+ * the file could change, resulting in a file measurement error.
+ *
+ * Opening a file for write, if the file is already open for read, results
+ * in a time of measure, time of use (ToMToU) error.
+ *
+ * In either case invalidate the PCR.
+ */
+enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
+static void ima_read_write_check(enum iint_pcr_error error,
+				 struct ima_iint_cache *iint,
+				 struct inode *inode,
+				 const unsigned char *filename)
+{
+	switch (error) {
+	case TOMTOU:
+		if (iint->readcount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "ToMToU");
+		break;
+	case OPEN_WRITERS:
+		if (iint->writecount > 0)
+			ima_add_violation(inode, filename, "invalid_pcr",
+					  "open_writers");
+		break;
+	}
+}
+
 /*
  * Update the counts given an fmode_t
  */
@@ -98,6 +128,47 @@ static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
 		iint->writecount++;
 }
 
+/*
+ * ima_counts_get - increment file counts
+ *
+ * Maintain read/write counters for all files, but only
+ * invalidate the PCR for measured files:
+ * 	- Opening a file for write when already open for read,
+ *	  results in a time of measure, time of use (ToMToU) error.
+ *	- Opening a file for read when already open for write,
+ * 	  could result in a file measurement error.
+ *
+ */
+void ima_counts_get(struct file *file)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	fmode_t mode = file->f_mode;
+	struct ima_iint_cache *iint;
+	int rc;
+
+	if (!ima_initialized || !S_ISREG(inode->i_mode))
+		return;
+	iint = ima_iint_find_get(inode);
+	if (!iint)
+		return;
+	mutex_lock(&iint->mutex);
+	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
+	if (rc < 0)
+		goto out;
+
+	if (mode & FMODE_WRITE) {
+		ima_read_write_check(TOMTOU, iint, inode, dentry->d_name.name);
+		goto out;
+	}
+	ima_read_write_check(OPEN_WRITERS, iint, inode, dentry->d_name.name);
+out:
+	ima_inc_counts(iint, file->f_mode);
+	mutex_unlock(&iint->mutex);
+
+	kref_put(&iint->refcount, iint_free);
+}
+
 /*
  * Decrement ima counts
  */
@@ -153,123 +224,6 @@ void ima_file_free(struct file *file)
 	kref_put(&iint->refcount, iint_free);
 }
 
-/* ima_read_write_check - reflect possible reading/writing errors in the PCR.
- *
- * When opening a file for read, if the file is already open for write,
- * the file could change, resulting in a file measurement error.
- *
- * Opening a file for write, if the file is already open for read, results
- * in a time of measure, time of use (ToMToU) error.
- *
- * In either case invalidate the PCR.
- */
-enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
-static void ima_read_write_check(enum iint_pcr_error error,
-				 struct ima_iint_cache *iint,
-				 struct inode *inode,
-				 const unsigned char *filename)
-{
-	switch (error) {
-	case TOMTOU:
-		if (iint->readcount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "ToMToU");
-		break;
-	case OPEN_WRITERS:
-		if (iint->writecount > 0)
-			ima_add_violation(inode, filename, "invalid_pcr",
-					  "open_writers");
-		break;
-	}
-}
-
-static int get_path_measurement(struct ima_iint_cache *iint, struct file *file,
-				const unsigned char *filename)
-{
-	int rc = 0;
-
-	ima_inc_counts(iint, file->f_mode);
-
-	rc = ima_collect_measurement(iint, file);
-	if (!rc)
-		ima_store_measurement(iint, file, filename);
-	return rc;
-}
-
-/**
- * ima_path_check - based on policy, collect/store measurement.
- * @path: contains a pointer to the path to be measured
- * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
- *
- * Measure the file being open for readonly, based on the
- * ima_must_measure() policy decision.
- *
- * Keep read/write counters for all files, but only
- * invalidate the PCR for measured files:
- * 	- Opening a file for write when already open for read,
- *	  results in a time of measure, time of use (ToMToU) error.
- *	- Opening a file for read when already open for write,
- * 	  could result in a file measurement error.
- *
- * Always return 0 and audit dentry_open failures.
- * (Return code will be based upon measurement appraisal.)
- */
-int ima_path_check(struct path *path, int mask)
-{
-	struct inode *inode = path->dentry->d_inode;
-	struct ima_iint_cache *iint;
-	struct file *file = NULL;
-	int rc;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return 0;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return 0;
-
-	mutex_lock(&iint->mutex);
-
-	rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK);
-	if (rc < 0)
-		goto out;
-
-	if ((mask & MAY_WRITE) || (mask == 0))
-		ima_read_write_check(TOMTOU, iint, inode,
-				     path->dentry->d_name.name);
-
-	if ((mask & (MAY_WRITE | MAY_READ | MAY_EXEC)) != MAY_READ)
-		goto out;
-
-	ima_read_write_check(OPEN_WRITERS, iint, inode,
-			     path->dentry->d_name.name);
-	if (!(iint->flags & IMA_MEASURED)) {
-		struct dentry *dentry = dget(path->dentry);
-		struct vfsmount *mnt = mntget(path->mnt);
-
-		file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE,
-				   current_cred());
-		if (IS_ERR(file)) {
-			int audit_info = 0;
-
-			integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode,
-					    dentry->d_name.name,
-					    "add_measurement",
-					    "dentry_open failed",
-					    1, audit_info);
-			file = NULL;
-			goto out;
-		}
-		rc = get_path_measurement(iint, file, dentry->d_name.name);
-	}
-out:
-	mutex_unlock(&iint->mutex);
-	if (file)
-		fput(file);
-	kref_put(&iint->refcount, iint_free);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ima_path_check);
-
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
 {
@@ -297,33 +251,6 @@ out:
 	return rc;
 }
 
-/*
- * ima_counts_get - increment file counts
- *
- * - for IPC shm and shmat file.
- * - for nfsd exported files.
- *
- * Increment the counts for these files to prevent unnecessary
- * imbalance messages.
- */
-void ima_counts_get(struct file *file)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ima_iint_cache *iint;
-
-	if (!ima_initialized || !S_ISREG(inode->i_mode))
-		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-	mutex_lock(&iint->mutex);
-	ima_inc_counts(iint, file->f_mode);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
-}
-EXPORT_SYMBOL_GPL(ima_counts_get);
-
 /**
  * ima_file_mmap - based on policy, collect/store measurement.
  * @file: pointer to the file to be measured (May be NULL)
@@ -369,6 +296,27 @@ int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
+/**
+ * ima_path_check - based on policy, collect/store measurement.
+ * @file: pointer to the file to be measured
+ * @mask: contains MAY_READ, MAY_WRITE or MAY_EXECUTE
+ *
+ * Measure files based on the ima_must_measure() policy decision.
+ *
+ * Always return 0 and audit dentry_open failures.
+ * (Return code will be based upon measurement appraisal.)
+ */
+int ima_path_check(struct file *file, int mask)
+{
+	int rc;
+
+	rc = process_measurement(file, file->f_dentry->d_name.name,
+				 mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
+				 PATH_CHECK);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ima_path_check);
+
 static int __init init_ima(void)
 {
 	int error;
-- 
cgit v1.2.3


From 9bbb6cad0173e6220f3ac609e26beb48dab3b7cd Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 26 Jan 2010 17:02:40 -0500
Subject: ima: rename ima_path_check to ima_file_check

ima_path_check actually deals with files!  call it ima_file_check instead.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                        | 4 ++--
 fs/nfsd/vfs.c                     | 2 +-
 include/linux/ima.h               | 4 ++--
 security/integrity/ima/ima_main.c | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index cd77b6375efd..d62fdc875f22 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1736,7 +1736,7 @@ do_last:
 		if (nd.root.mnt)
 			path_put(&nd.root);
 		if (!IS_ERR(filp)) {
-			error = ima_path_check(filp, acc_mode);
+			error = ima_file_check(filp, acc_mode);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1796,7 +1796,7 @@ ok:
 	}
 	filp = nameidata_to_filp(&nd);
 	if (!IS_ERR(filp)) {
-		error = ima_path_check(filp, acc_mode);
+		error = ima_file_check(filp, acc_mode);
 		if (error) {
 			fput(filp);
 			filp = ERR_PTR(error);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 32477e3a645c..97d79eff6b7f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -752,7 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			    flags, current_cred());
 	if (IS_ERR(*filp))
 		host_err = PTR_ERR(*filp);
-	host_err = ima_path_check(*filp, access);
+	host_err = ima_file_check(*filp, access);
 out_nfserr:
 	err = nfserrno(host_err);
 out:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index aa55a8f1f5b9..975837e7d6c0 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -17,7 +17,7 @@ struct linux_binprm;
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
-extern int ima_path_check(struct file *file, int mask);
+extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
@@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode)
 	return;
 }
 
-static inline int ima_path_check(struct file *file, int mask)
+static inline int ima_file_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index eb1cf6498cc9..b76e1f03ea2b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -14,7 +14,7 @@
  *
  * File: ima_main.c
  *	implements the IMA hooks: ima_bprm_check, ima_file_mmap,
- *	and ima_path_check.
+ *	and ima_file_check.
  */
 #include <linux/module.h>
 #include <linux/file.h>
@@ -306,7 +306,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * Always return 0 and audit dentry_open failures.
  * (Return code will be based upon measurement appraisal.)
  */
-int ima_path_check(struct file *file, int mask)
+int ima_file_check(struct file *file, int mask)
 {
 	int rc;
 
@@ -315,7 +315,7 @@ int ima_path_check(struct file *file, int mask)
 				 PATH_CHECK);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ima_path_check);
+EXPORT_SYMBOL_GPL(ima_file_check);
 
 static int __init init_ima(void)
 {
-- 
cgit v1.2.3


From 0c9cf2efd74dbc90354e2ccc7dbd6bad68ec6c4d Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 2 Feb 2010 14:46:10 -0800
Subject: percpu_counter: Make __percpu_counter_add an inline function on UP

Even though batch isn't used on UP, we may want to pass one in
to keep the SMP and UP code paths similar. Convert
__percpu_counter_add to an inline function so we wont get
variable unused warnings if we do.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/percpu_counter.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index a7684a513994..794662b2be5d 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -98,9 +98,6 @@ static inline void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 	fbc->count = amount;
 }
 
-#define __percpu_counter_add(fbc, amount, batch) \
-	percpu_counter_add(fbc, amount)
-
 static inline void
 percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
@@ -109,6 +106,12 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 	preempt_enable();
 }
 
+static inline void
+__percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
+{
+	percpu_counter_add(fbc, amount);
+}
+
 static inline s64 percpu_counter_read(struct percpu_counter *fbc)
 {
 	return fbc->count;
-- 
cgit v1.2.3


From 123df2944c436c80640c4281c5bc9c7950b18687 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Dec 2009 04:57:57 -0500
Subject: Lose the new_name argument of fsnotify_move()

it's always new_dentry->d_name.name

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/debugfs/inode.c       | 2 +-
 fs/namei.c               | 6 ++----
 include/linux/fsnotify.h | 3 ++-
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 274ac865bae8..049d6c36da09 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -496,7 +496,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 	}
 	d_move(old_dentry, dentry);
 	fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
-		old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
+		S_ISDIR(old_dentry->d_inode->i_mode),
 		NULL, old_dentry);
 	fsnotify_oldname_free(old_name);
 	unlock_rename(new_dir, old_dir);
diff --git a/fs/namei.c b/fs/namei.c
index d62fdc875f22..f69df876fac3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2661,11 +2661,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
 	else
 		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-	if (!error) {
-		const char *new_name = old_dentry->d_name.name;
-		fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
+	if (!error)
+		fsnotify_move(old_dir, new_dir, old_name, is_dir,
 			      new_dentry->d_inode, old_dentry);
-	}
 	fsnotify_oldname_free(old_name);
 
 	return error;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 936f9aa8bb97..2d755c49c324 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -65,7 +65,7 @@ static inline void fsnotify_link_count(struct inode *inode)
  * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
  */
 static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
-				 const char *old_name, const char *new_name,
+				 const char *old_name,
 				 int isdir, struct inode *target, struct dentry *moved)
 {
 	struct inode *source = moved->d_inode;
@@ -73,6 +73,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	u32 fs_cookie = fsnotify_get_cookie();
 	__u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
 	__u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
+	const char *new_name = moved->d_name.name;
 
 	if (old_dir == new_dir)
 		old_dir_mask |= FS_DN_RENAME;
-- 
cgit v1.2.3


From cccc6bba3f771ef29b33e4f79e70ebc3dba245b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Dec 2009 05:07:33 -0500
Subject: Lose the first argument of audit_inode_child()

it's always equal to ->d_name.name of the second argument

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               |  2 +-
 include/linux/audit.h    | 11 +++++------
 include/linux/fsnotify.h |  8 ++++----
 kernel/auditsc.c         |  7 ++-----
 4 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index f69df876fac3..865282f8e012 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1337,7 +1337,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 		return -ENOENT;
 
 	BUG_ON(victim->d_parent->d_inode != dir);
-	audit_inode_child(victim->d_name.name, victim, dir);
+	audit_inode_child(victim, dir);
 
 	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
 	if (error)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3c7a358241a7..f391d45c8aea 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -424,7 +424,7 @@ extern void audit_syscall_exit(int failed, long return_code);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
-extern void __audit_inode_child(const char *dname, const struct dentry *dentry,
+extern void __audit_inode_child(const struct dentry *dentry,
 				const struct inode *parent);
 extern void __audit_ptrace(struct task_struct *t);
 
@@ -442,11 +442,10 @@ static inline void audit_inode(const char *name, const struct dentry *dentry) {
 	if (unlikely(!audit_dummy_context()))
 		__audit_inode(name, dentry);
 }
-static inline void audit_inode_child(const char *dname, 
-				     const struct dentry *dentry,
+static inline void audit_inode_child(const struct dentry *dentry,
 				     const struct inode *parent) {
 	if (unlikely(!audit_dummy_context()))
-		__audit_inode_child(dname, dentry, parent);
+		__audit_inode_child(dentry, parent);
 }
 void audit_core_dumps(long signr);
 
@@ -544,9 +543,9 @@ extern int audit_signals;
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
 #define __audit_inode(n,d) do { ; } while (0)
-#define __audit_inode_child(d,i,p) do { ; } while (0)
+#define __audit_inode_child(i,p) do { ; } while (0)
 #define audit_inode(n,d) do { ; } while (0)
-#define audit_inode_child(d,i,p) do { ; } while (0)
+#define audit_inode_child(i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) (0)
 #define audit_get_loginuid(t) (-1)
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 2d755c49c324..df8fd9a3b214 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -104,7 +104,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
 		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
-	audit_inode_child(new_name, moved, new_dir);
+	audit_inode_child(moved, new_dir);
 }
 
 /*
@@ -147,7 +147,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
 				  dentry->d_inode);
-	audit_inode_child(dentry->d_name.name, dentry, inode);
+	audit_inode_child(dentry, inode);
 
 	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
@@ -162,7 +162,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
 	inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
 				  inode);
 	fsnotify_link_count(inode);
-	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
+	audit_inode_child(new_dentry, dir);
 
 	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0);
 }
@@ -176,7 +176,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	struct inode *d_inode = dentry->d_inode;
 
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
-	audit_inode_child(dentry->d_name.name, dentry, inode);
+	audit_inode_child(dentry, inode);
 
 	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fc0f928167e7..f3a461c0970a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1988,7 +1988,6 @@ void __audit_inode(const char *name, const struct dentry *dentry)
 
 /**
  * audit_inode_child - collect inode info for created/removed objects
- * @dname: inode's dentry name
  * @dentry: dentry being audited
  * @parent: inode of dentry parent
  *
@@ -2000,13 +1999,14 @@ void __audit_inode(const char *name, const struct dentry *dentry)
  * must be hooked prior, in order to capture the target inode during
  * unsuccessful attempts.
  */
-void __audit_inode_child(const char *dname, const struct dentry *dentry,
+void __audit_inode_child(const struct dentry *dentry,
 			 const struct inode *parent)
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
 	const char *found_parent = NULL, *found_child = NULL;
 	const struct inode *inode = dentry->d_inode;
+	const char *dname = dentry->d_name.name;
 	int dirlen = 0;
 
 	if (!context->in_syscall)
@@ -2014,9 +2014,6 @@ void __audit_inode_child(const char *dname, const struct dentry *dentry,
 
 	if (inode)
 		handle_one(inode);
-	/* determine matching parent */
-	if (!dname)
-		goto add_names;
 
 	/* parent is more likely, look for it first */
 	for (idx = 0; idx < context->name_count; idx++) {
-- 
cgit v1.2.3


From f7c95ef02b564d9984c0655c9659791b1dd5d7ad Mon Sep 17 00:00:00 2001
From: "Kashyap, Desai" <kashyap.desai@lsi.com>
Date: Wed, 16 Dec 2009 18:54:42 +0530
Subject: [SCSI] mpt2sas: Added raid transport support

Adding support for raid transport layer.  This will provide sysfs attributes
containing raid level, state, and resync rate.

MPT2SAS module will select RAID_ATTRS.

Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com>
Reviewed-by: Eric Moore <eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/mpt2sas/Kconfig         |   1 +
 drivers/scsi/mpt2sas/mpt2sas_base.h  |   2 +
 drivers/scsi/mpt2sas/mpt2sas_scsih.c | 190 ++++++++++++++++++++++++++++++++++-
 include/linux/raid_class.h           |   1 +
 4 files changed, 190 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/mpt2sas/Kconfig b/drivers/scsi/mpt2sas/Kconfig
index 70c4c2467dd8..ba8e128de238 100644
--- a/drivers/scsi/mpt2sas/Kconfig
+++ b/drivers/scsi/mpt2sas/Kconfig
@@ -44,6 +44,7 @@ config SCSI_MPT2SAS
 	tristate "LSI MPT Fusion SAS 2.0 Device Driver"
 	depends on PCI && SCSI
 	select SCSI_SAS_ATTRS
+	select RAID_ATTRS
 	---help---
 	This driver supports PCI-Express SAS 6Gb/s Host Adapters.
 
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 12fa18be77e1..014318fa3b0e 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -323,6 +323,7 @@ struct _sas_device {
  * @device_info: bitfield provides detailed info about the hidden components
  * @num_pds: number of hidden raid components
  * @responding: used in _scsih_raid_device_mark_responding
+ * @percent_complete: resync percent complete
  */
 struct _raid_device {
 	struct list_head list;
@@ -336,6 +337,7 @@ struct _raid_device {
 	u32	device_info;
 	u8	num_pds;
 	u8	responding;
+	u8	percent_complete;
 };
 
 /**
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index cd551768bfbf..ca984cbc8e2f 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -52,6 +52,7 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
+#include <linux/raid_class.h>
 
 #include "mpt2sas_base.h"
 
@@ -133,6 +134,9 @@ struct fw_event_work {
 	void			*event_data;
 };
 
+/* raid transport support */
+static struct raid_template *mpt2sas_raid_template;
+
 /**
  * struct _scsi_io_transfer - scsi io transfer
  * @handle: sas device handle (assigned by firmware)
@@ -1418,6 +1422,140 @@ _scsih_display_sata_capabilities(struct MPT2SAS_ADAPTER *ioc,
 	    (flags & MPI2_SAS_DEVICE0_FLAGS_SATA_SW_PRESERVE) ? "y" : "n");
 }
 
+/**
+ * _scsih_is_raid - return boolean indicating device is raid volume
+ * @dev the device struct object
+ */
+static int
+_scsih_is_raid(struct device *dev)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+
+	return (sdev->channel == RAID_CHANNEL) ? 1 : 0;
+}
+
+/**
+ * _scsih_get_resync - get raid volume resync percent complete
+ * @dev the device struct object
+ */
+static void
+_scsih_get_resync(struct device *dev)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct MPT2SAS_ADAPTER *ioc = shost_priv(sdev->host);
+	static struct _raid_device *raid_device;
+	unsigned long flags;
+	Mpi2RaidVolPage0_t vol_pg0;
+	Mpi2ConfigReply_t mpi_reply;
+	u32 volume_status_flags;
+	u8 percent_complete = 0;
+
+	spin_lock_irqsave(&ioc->raid_device_lock, flags);
+	raid_device = _scsih_raid_device_find_by_id(ioc, sdev->id,
+	    sdev->channel);
+	spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
+
+	if (!raid_device)
+		goto out;
+
+	if (mpt2sas_config_get_raid_volume_pg0(ioc, &mpi_reply, &vol_pg0,
+	     MPI2_RAID_VOLUME_PGAD_FORM_HANDLE, raid_device->handle,
+	     sizeof(Mpi2RaidVolPage0_t))) {
+		printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
+		    ioc->name, __FILE__, __LINE__, __func__);
+		goto out;
+	}
+
+	volume_status_flags = le32_to_cpu(vol_pg0.VolumeStatusFlags);
+	if (volume_status_flags & MPI2_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS)
+		percent_complete = raid_device->percent_complete;
+ out:
+	raid_set_resync(mpt2sas_raid_template, dev, percent_complete);
+}
+
+/**
+ * _scsih_get_state - get raid volume level
+ * @dev the device struct object
+ */
+static void
+_scsih_get_state(struct device *dev)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct MPT2SAS_ADAPTER *ioc = shost_priv(sdev->host);
+	static struct _raid_device *raid_device;
+	unsigned long flags;
+	Mpi2RaidVolPage0_t vol_pg0;
+	Mpi2ConfigReply_t mpi_reply;
+	u32 volstate;
+	enum raid_state state = RAID_STATE_UNKNOWN;
+
+	spin_lock_irqsave(&ioc->raid_device_lock, flags);
+	raid_device = _scsih_raid_device_find_by_id(ioc, sdev->id,
+	    sdev->channel);
+	spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
+
+	if (!raid_device)
+		goto out;
+
+	if (mpt2sas_config_get_raid_volume_pg0(ioc, &mpi_reply, &vol_pg0,
+	     MPI2_RAID_VOLUME_PGAD_FORM_HANDLE, raid_device->handle,
+	     sizeof(Mpi2RaidVolPage0_t))) {
+		printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
+		    ioc->name, __FILE__, __LINE__, __func__);
+		goto out;
+	}
+
+	volstate = le32_to_cpu(vol_pg0.VolumeStatusFlags);
+	if (volstate & MPI2_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS) {
+		state = RAID_STATE_RESYNCING;
+		goto out;
+	}
+
+	switch (vol_pg0.VolumeState) {
+	case MPI2_RAID_VOL_STATE_OPTIMAL:
+	case MPI2_RAID_VOL_STATE_ONLINE:
+		state = RAID_STATE_ACTIVE;
+		break;
+	case  MPI2_RAID_VOL_STATE_DEGRADED:
+		state = RAID_STATE_DEGRADED;
+		break;
+	case MPI2_RAID_VOL_STATE_FAILED:
+	case MPI2_RAID_VOL_STATE_MISSING:
+		state = RAID_STATE_OFFLINE;
+		break;
+	}
+ out:
+	raid_set_state(mpt2sas_raid_template, dev, state);
+}
+
+/**
+ * _scsih_set_level - set raid level
+ * @sdev: scsi device struct
+ * @raid_device: raid_device object
+ */
+static void
+_scsih_set_level(struct scsi_device *sdev, struct _raid_device *raid_device)
+{
+	enum raid_level level = RAID_LEVEL_UNKNOWN;
+
+	switch (raid_device->volume_type) {
+	case MPI2_RAID_VOL_TYPE_RAID0:
+		level = RAID_LEVEL_0;
+		break;
+	case MPI2_RAID_VOL_TYPE_RAID10:
+		level = RAID_LEVEL_10;
+		break;
+	case MPI2_RAID_VOL_TYPE_RAID1E:
+		level = RAID_LEVEL_1E;
+		break;
+	case MPI2_RAID_VOL_TYPE_RAID1:
+		level = RAID_LEVEL_1;
+		break;
+	}
+
+	raid_set_level(mpt2sas_raid_template, &sdev->sdev_gendev, level);
+}
+
 /**
  * _scsih_get_volume_capabilities - volume capabilities
  * @ioc: per adapter object
@@ -1574,6 +1712,8 @@ _scsih_slave_configure(struct scsi_device *sdev)
 		    (unsigned long long)raid_device->wwid,
 		    raid_device->num_pds, ds);
 		_scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT);
+		/* raid transport support */
+		_scsih_set_level(sdev, raid_device);
 		return 0;
 	}
 
@@ -5170,11 +5310,33 @@ static void
 _scsih_sas_ir_operation_status_event(struct MPT2SAS_ADAPTER *ioc,
     struct fw_event_work *fw_event)
 {
+	Mpi2EventDataIrOperationStatus_t *event_data = fw_event->event_data;
+	static struct _raid_device *raid_device;
+	unsigned long flags;
+	u16 handle;
+
 #ifdef CONFIG_SCSI_MPT2SAS_LOGGING
 	if (ioc->logging_level & MPT_DEBUG_EVENT_WORK_TASK)
 		_scsih_sas_ir_operation_status_event_debug(ioc,
-		     fw_event->event_data);
+		     event_data);
 #endif
+
+	/* code added for raid transport support */
+	if (event_data->RAIDOperation == MPI2_EVENT_IR_RAIDOP_RESYNC) {
+
+		handle = le16_to_cpu(event_data->VolDevHandle);
+
+		spin_lock_irqsave(&ioc->raid_device_lock, flags);
+		raid_device = _scsih_raid_device_find_by_handle(ioc, handle);
+		spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
+
+		if (!raid_device)
+			return;
+
+		if (event_data->RAIDOperation == MPI2_EVENT_IR_RAIDOP_RESYNC)
+			raid_device->percent_complete =
+			    event_data->PercentComplete;
+	}
 }
 
 /**
@@ -6390,6 +6552,13 @@ static struct pci_driver scsih_driver = {
 #endif
 };
 
+/* raid transport support */
+static struct raid_function_template mpt2sas_raid_functions = {
+	.cookie		= &scsih_driver_template,
+	.is_raid	= _scsih_is_raid,
+	.get_resync	= _scsih_get_resync,
+	.get_state	= _scsih_get_state,
+};
 
 /**
  * _scsih_init - main entry point for this driver.
@@ -6409,6 +6578,12 @@ _scsih_init(void)
 	    sas_attach_transport(&mpt2sas_transport_functions);
 	if (!mpt2sas_transport_template)
 		return -ENODEV;
+	/* raid transport support */
+	mpt2sas_raid_template = raid_class_attach(&mpt2sas_raid_functions);
+	if (!mpt2sas_raid_template) {
+		sas_release_transport(mpt2sas_transport_template);
+		return -ENODEV;
+	}
 
 	mpt2sas_base_initialize_callback_handler();
 
@@ -6443,8 +6618,11 @@ _scsih_init(void)
 	mpt2sas_ctl_init();
 
 	error = pci_register_driver(&scsih_driver);
-	if (error)
+	if (error) {
+		/* raid transport support */
+		raid_class_release(mpt2sas_raid_template);
 		sas_release_transport(mpt2sas_transport_template);
+	}
 
 	return error;
 }
@@ -6462,7 +6640,8 @@ _scsih_exit(void)
 
 	pci_unregister_driver(&scsih_driver);
 
-	sas_release_transport(mpt2sas_transport_template);
+	mpt2sas_ctl_exit();
+
 	mpt2sas_base_release_callback_handler(scsi_io_cb_idx);
 	mpt2sas_base_release_callback_handler(tm_cb_idx);
 	mpt2sas_base_release_callback_handler(base_cb_idx);
@@ -6474,7 +6653,10 @@ _scsih_exit(void)
 	mpt2sas_base_release_callback_handler(tm_tr_cb_idx);
 	mpt2sas_base_release_callback_handler(tm_sas_control_cb_idx);
 
-	mpt2sas_ctl_exit();
+	/* raid transport support */
+	raid_class_release(mpt2sas_raid_template);
+	sas_release_transport(mpt2sas_transport_template);
+
 }
 
 module_init(_scsih_init);
diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h
index 6b537f1ac96c..31e1ff69efc8 100644
--- a/include/linux/raid_class.h
+++ b/include/linux/raid_class.h
@@ -32,6 +32,7 @@ enum raid_level {
 	RAID_LEVEL_0,
 	RAID_LEVEL_1,
 	RAID_LEVEL_10,
+	RAID_LEVEL_1E,
 	RAID_LEVEL_3,
 	RAID_LEVEL_4,
 	RAID_LEVEL_5,
-- 
cgit v1.2.3


From 577cd7584cf5199f1ea22cca0ad1fa129a98effa Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Tue, 9 Feb 2010 04:24:46 +0000
Subject: sh: extend INTC with struct intc_hw_desc

This patch updates the INTC code by moving all vectors,
groups and registers from struct intc_desc to struct
intc_hw_desc.

The idea is that INTC tables should go from using the
macro(s) DECLARE_INTC_DESC..() only to using struct
intc_desc with name and hw initialized using the macro
INTC_HW_DESC(). This move makes it easy to initialize
an extended struct intc_desc in the future.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/intc.c       | 88 +++++++++++++++++++++++++------------------------
 include/linux/sh_intc.h | 31 ++++++++++-------
 2 files changed, 65 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index d5d7f23c19a5..5ec0fffa7db4 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -400,11 +400,11 @@ static unsigned int __init intc_get_reg(struct intc_desc_int *d,
 static intc_enum __init intc_grp_id(struct intc_desc *desc,
 				    intc_enum enum_id)
 {
-	struct intc_group *g = desc->groups;
+	struct intc_group *g = desc->hw.groups;
 	unsigned int i, j;
 
-	for (i = 0; g && enum_id && i < desc->nr_groups; i++) {
-		g = desc->groups + i;
+	for (i = 0; g && enum_id && i < desc->hw.nr_groups; i++) {
+		g = desc->hw.groups + i;
 
 		for (j = 0; g->enum_ids[j]; j++) {
 			if (g->enum_ids[j] != enum_id)
@@ -421,12 +421,12 @@ static unsigned int __init intc_mask_data(struct intc_desc *desc,
 					  struct intc_desc_int *d,
 					  intc_enum enum_id, int do_grps)
 {
-	struct intc_mask_reg *mr = desc->mask_regs;
+	struct intc_mask_reg *mr = desc->hw.mask_regs;
 	unsigned int i, j, fn, mode;
 	unsigned long reg_e, reg_d;
 
-	for (i = 0; mr && enum_id && i < desc->nr_mask_regs; i++) {
-		mr = desc->mask_regs + i;
+	for (i = 0; mr && enum_id && i < desc->hw.nr_mask_regs; i++) {
+		mr = desc->hw.mask_regs + i;
 
 		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
 			if (mr->enum_ids[j] != enum_id)
@@ -469,12 +469,12 @@ static unsigned int __init intc_prio_data(struct intc_desc *desc,
 					  struct intc_desc_int *d,
 					  intc_enum enum_id, int do_grps)
 {
-	struct intc_prio_reg *pr = desc->prio_regs;
+	struct intc_prio_reg *pr = desc->hw.prio_regs;
 	unsigned int i, j, fn, mode, bit;
 	unsigned long reg_e, reg_d;
 
-	for (i = 0; pr && enum_id && i < desc->nr_prio_regs; i++) {
-		pr = desc->prio_regs + i;
+	for (i = 0; pr && enum_id && i < desc->hw.nr_prio_regs; i++) {
+		pr = desc->hw.prio_regs + i;
 
 		for (j = 0; j < ARRAY_SIZE(pr->enum_ids); j++) {
 			if (pr->enum_ids[j] != enum_id)
@@ -517,12 +517,12 @@ static unsigned int __init intc_ack_data(struct intc_desc *desc,
 					  struct intc_desc_int *d,
 					  intc_enum enum_id)
 {
-	struct intc_mask_reg *mr = desc->ack_regs;
+	struct intc_mask_reg *mr = desc->hw.ack_regs;
 	unsigned int i, j, fn, mode;
 	unsigned long reg_e, reg_d;
 
-	for (i = 0; mr && enum_id && i < desc->nr_ack_regs; i++) {
-		mr = desc->ack_regs + i;
+	for (i = 0; mr && enum_id && i < desc->hw.nr_ack_regs; i++) {
+		mr = desc->hw.ack_regs + i;
 
 		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
 			if (mr->enum_ids[j] != enum_id)
@@ -549,11 +549,11 @@ static unsigned int __init intc_sense_data(struct intc_desc *desc,
 					   struct intc_desc_int *d,
 					   intc_enum enum_id)
 {
-	struct intc_sense_reg *sr = desc->sense_regs;
+	struct intc_sense_reg *sr = desc->hw.sense_regs;
 	unsigned int i, j, fn, bit;
 
-	for (i = 0; sr && enum_id && i < desc->nr_sense_regs; i++) {
-		sr = desc->sense_regs + i;
+	for (i = 0; sr && enum_id && i < desc->hw.nr_sense_regs; i++) {
+		sr = desc->hw.sense_regs + i;
 
 		for (j = 0; j < ARRAY_SIZE(sr->enum_ids); j++) {
 			if (sr->enum_ids[j] != enum_id)
@@ -656,7 +656,7 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	/* irq should be disabled by default */
 	d->chip.mask(irq);
 
-	if (desc->ack_regs)
+	if (desc->hw.ack_regs)
 		ack_handle[irq] = intc_ack_data(desc, d, enum_id);
 }
 
@@ -684,6 +684,7 @@ static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
 void __init register_intc_controller(struct intc_desc *desc)
 {
 	unsigned int i, k, smp;
+	struct intc_hw_desc *hw = &desc->hw;
 	struct intc_desc_int *d;
 
 	d = kzalloc(sizeof(*d), GFP_NOWAIT);
@@ -691,10 +692,10 @@ void __init register_intc_controller(struct intc_desc *desc)
 	INIT_LIST_HEAD(&d->list);
 	list_add(&d->list, &intc_list);
 
-	d->nr_reg = desc->mask_regs ? desc->nr_mask_regs * 2 : 0;
-	d->nr_reg += desc->prio_regs ? desc->nr_prio_regs * 2 : 0;
-	d->nr_reg += desc->sense_regs ? desc->nr_sense_regs : 0;
-	d->nr_reg += desc->ack_regs ? desc->nr_ack_regs : 0;
+	d->nr_reg = hw->mask_regs ? hw->nr_mask_regs * 2 : 0;
+	d->nr_reg += hw->prio_regs ? hw->nr_prio_regs * 2 : 0;
+	d->nr_reg += hw->sense_regs ? hw->nr_sense_regs : 0;
+	d->nr_reg += hw->ack_regs ? hw->nr_ack_regs : 0;
 
 	d->reg = kzalloc(d->nr_reg * sizeof(*d->reg), GFP_NOWAIT);
 #ifdef CONFIG_SMP
@@ -702,30 +703,31 @@ void __init register_intc_controller(struct intc_desc *desc)
 #endif
 	k = 0;
 
-	if (desc->mask_regs) {
-		for (i = 0; i < desc->nr_mask_regs; i++) {
-			smp = IS_SMP(desc->mask_regs[i]);
-			k += save_reg(d, k, desc->mask_regs[i].set_reg, smp);
-			k += save_reg(d, k, desc->mask_regs[i].clr_reg, smp);
+	if (hw->mask_regs) {
+		for (i = 0; i < hw->nr_mask_regs; i++) {
+			smp = IS_SMP(hw->mask_regs[i]);
+			k += save_reg(d, k, hw->mask_regs[i].set_reg, smp);
+			k += save_reg(d, k, hw->mask_regs[i].clr_reg, smp);
 		}
 	}
 
-	if (desc->prio_regs) {
-		d->prio = kzalloc(desc->nr_vectors * sizeof(*d->prio), GFP_NOWAIT);
+	if (hw->prio_regs) {
+		d->prio = kzalloc(hw->nr_vectors * sizeof(*d->prio),
+				  GFP_NOWAIT);
 
-		for (i = 0; i < desc->nr_prio_regs; i++) {
-			smp = IS_SMP(desc->prio_regs[i]);
-			k += save_reg(d, k, desc->prio_regs[i].set_reg, smp);
-			k += save_reg(d, k, desc->prio_regs[i].clr_reg, smp);
+		for (i = 0; i < hw->nr_prio_regs; i++) {
+			smp = IS_SMP(hw->prio_regs[i]);
+			k += save_reg(d, k, hw->prio_regs[i].set_reg, smp);
+			k += save_reg(d, k, hw->prio_regs[i].clr_reg, smp);
 		}
 	}
 
-	if (desc->sense_regs) {
-		d->sense = kzalloc(desc->nr_vectors * sizeof(*d->sense), GFP_NOWAIT);
+	if (hw->sense_regs) {
+		d->sense = kzalloc(hw->nr_vectors * sizeof(*d->sense),
+				   GFP_NOWAIT);
 
-		for (i = 0; i < desc->nr_sense_regs; i++) {
-			k += save_reg(d, k, desc->sense_regs[i].reg, 0);
-		}
+		for (i = 0; i < hw->nr_sense_regs; i++)
+			k += save_reg(d, k, hw->sense_regs[i].reg, 0);
 	}
 
 	d->chip.name = desc->name;
@@ -738,9 +740,9 @@ void __init register_intc_controller(struct intc_desc *desc)
 	d->chip.set_type = intc_set_sense;
 	d->chip.set_wake = intc_set_wake;
 
-	if (desc->ack_regs) {
-		for (i = 0; i < desc->nr_ack_regs; i++)
-			k += save_reg(d, k, desc->ack_regs[i].set_reg, 0);
+	if (hw->ack_regs) {
+		for (i = 0; i < hw->nr_ack_regs; i++)
+			k += save_reg(d, k, hw->ack_regs[i].set_reg, 0);
 
 		d->chip.mask_ack = intc_mask_ack;
 	}
@@ -748,8 +750,8 @@ void __init register_intc_controller(struct intc_desc *desc)
 	BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */
 
 	/* register the vectors one by one */
-	for (i = 0; i < desc->nr_vectors; i++) {
-		struct intc_vect *vect = desc->vectors + i;
+	for (i = 0; i < hw->nr_vectors; i++) {
+		struct intc_vect *vect = hw->vectors + i;
 		unsigned int irq = evt2irq(vect->vect);
 		struct irq_desc *irq_desc;
 
@@ -764,8 +766,8 @@ void __init register_intc_controller(struct intc_desc *desc)
 
 		intc_register_irq(desc, d, vect->enum_id, irq);
 
-		for (k = i + 1; k < desc->nr_vectors; k++) {
-			struct intc_vect *vect2 = desc->vectors + k;
+		for (k = i + 1; k < hw->nr_vectors; k++) {
+			struct intc_vect *vect2 = hw->vectors + k;
 			unsigned int irq2 = evt2irq(vect2->vect);
 
 			if (vect->enum_id != vect2->enum_id)
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 4ef246f14654..7b37526bb73f 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -45,7 +45,7 @@ struct intc_sense_reg {
 #define INTC_SMP(stride, nr)
 #endif
 
-struct intc_desc {
+struct intc_hw_desc {
 	struct intc_vect *vectors;
 	unsigned int nr_vectors;
 	struct intc_group *groups;
@@ -56,29 +56,38 @@ struct intc_desc {
 	unsigned int nr_prio_regs;
 	struct intc_sense_reg *sense_regs;
 	unsigned int nr_sense_regs;
-	char *name;
 	struct intc_mask_reg *ack_regs;
 	unsigned int nr_ack_regs;
 };
 
 #define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a)
+#define INTC_HW_DESC(vectors, groups, mask_regs,	\
+		     prio_regs,	sense_regs, ack_regs)	\
+{							\
+	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),	\
+	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),	\
+	_INTC_ARRAY(sense_regs), _INTC_ARRAY(ack_regs),	\
+}
+
+struct intc_desc {
+	char *name;
+	struct intc_hw_desc hw;
+};
+
 #define DECLARE_INTC_DESC(symbol, chipname, vectors, groups,		\
 	mask_regs, prio_regs, sense_regs)				\
 struct intc_desc symbol __initdata = {					\
-	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
-	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
-	_INTC_ARRAY(sense_regs),					\
-	chipname,							\
+	.name = chipname,						\
+	.hw = INTC_HW_DESC(vectors, groups, mask_regs,			\
+			   prio_regs, sense_regs, NULL),		\
 }
 
 #define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups,	\
 	mask_regs, prio_regs, sense_regs, ack_regs)			\
 struct intc_desc symbol __initdata = {					\
-	_INTC_ARRAY(vectors), _INTC_ARRAY(groups),			\
-	_INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs),			\
-	_INTC_ARRAY(sense_regs),					\
-	chipname,							\
-	_INTC_ARRAY(ack_regs),						\
+	.name = chipname,						\
+	.hw = INTC_HW_DESC(vectors, groups, mask_regs,			\
+			   prio_regs, sense_regs, ack_regs),		\
 }
 
 void __init register_intc_controller(struct intc_desc *desc);
-- 
cgit v1.2.3


From d519095344fda705c9840a579acf6aa6205c37cc Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Tue, 9 Feb 2010 04:29:22 +0000
Subject: sh: extend INTC with force_enable

Extend the shared INTC code with force_enable support to
allow keeping mask bits statically enabled. Needed by
upcoming INTC SDHI patches that mux together a bunch of
vectors to a single linux interrupt which is masked by
a priority register, but needs individual mask bits
constantly enabled.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/intc.c       | 151 ++++++++++++++++++++++++++++++++++++++++++------
 include/linux/sh_intc.h |   1 +
 2 files changed, 133 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 5ec0fffa7db4..ccee18945d91 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -259,6 +259,43 @@ static void intc_disable(unsigned int irq)
 	}
 }
 
+static void (*intc_enable_noprio_fns[])(unsigned long addr,
+					unsigned long handle,
+					void (*fn)(unsigned long,
+						   unsigned long,
+						   unsigned long),
+					unsigned int irq) = {
+	[MODE_ENABLE_REG] = intc_mode_field,
+	[MODE_MASK_REG] = intc_mode_zero,
+	[MODE_DUAL_REG] = intc_mode_field,
+	[MODE_PRIO_REG] = intc_mode_field,
+	[MODE_PCLR_REG] = intc_mode_field,
+};
+
+static void intc_enable_disable(struct intc_desc_int *d,
+				unsigned long handle, int do_enable)
+{
+	unsigned long addr;
+	unsigned int cpu;
+	void (*fn)(unsigned long, unsigned long,
+		   void (*)(unsigned long, unsigned long, unsigned long),
+		   unsigned int);
+
+	if (do_enable) {
+		for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_E(handle)); cpu++) {
+			addr = INTC_REG(d, _INTC_ADDR_E(handle), cpu);
+			fn = intc_enable_noprio_fns[_INTC_MODE(handle)];
+			fn(addr, handle, intc_reg_fns[_INTC_FN(handle)], 0);
+		}
+	} else {
+		for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) {
+			addr = INTC_REG(d, _INTC_ADDR_D(handle), cpu);
+			fn = intc_disable_fns[_INTC_MODE(handle)];
+			fn(addr, handle, intc_reg_fns[_INTC_FN(handle)], 0);
+		}
+	}
+}
+
 static int intc_set_wake(unsigned int irq, unsigned int on)
 {
 	return 0; /* allow wakeup, but setup hardware in intc_suspend() */
@@ -417,19 +454,21 @@ static intc_enum __init intc_grp_id(struct intc_desc *desc,
 	return 0;
 }
 
-static unsigned int __init intc_mask_data(struct intc_desc *desc,
-					  struct intc_desc_int *d,
-					  intc_enum enum_id, int do_grps)
+static unsigned int __init _intc_mask_data(struct intc_desc *desc,
+					   struct intc_desc_int *d,
+					   intc_enum enum_id,
+					   unsigned int *reg_idx,
+					   unsigned int *fld_idx)
 {
 	struct intc_mask_reg *mr = desc->hw.mask_regs;
-	unsigned int i, j, fn, mode;
+	unsigned int fn, mode;
 	unsigned long reg_e, reg_d;
 
-	for (i = 0; mr && enum_id && i < desc->hw.nr_mask_regs; i++) {
-		mr = desc->hw.mask_regs + i;
+	while (mr && enum_id && *reg_idx < desc->hw.nr_mask_regs) {
+		mr = desc->hw.mask_regs + *reg_idx;
 
-		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
-			if (mr->enum_ids[j] != enum_id)
+		for (; *fld_idx < ARRAY_SIZE(mr->enum_ids); (*fld_idx)++) {
+			if (mr->enum_ids[*fld_idx] != enum_id)
 				continue;
 
 			if (mr->set_reg && mr->clr_reg) {
@@ -455,29 +494,49 @@ static unsigned int __init intc_mask_data(struct intc_desc *desc,
 					intc_get_reg(d, reg_e),
 					intc_get_reg(d, reg_d),
 					1,
-					(mr->reg_width - 1) - j);
+					(mr->reg_width - 1) - *fld_idx);
 		}
+
+		*fld_idx = 0;
+		(*reg_idx)++;
 	}
 
+	return 0;
+}
+
+static unsigned int __init intc_mask_data(struct intc_desc *desc,
+					  struct intc_desc_int *d,
+					  intc_enum enum_id, int do_grps)
+{
+	unsigned int i = 0;
+	unsigned int j = 0;
+	unsigned int ret;
+
+	ret = _intc_mask_data(desc, d, enum_id, &i, &j);
+	if (ret)
+		return ret;
+
 	if (do_grps)
 		return intc_mask_data(desc, d, intc_grp_id(desc, enum_id), 0);
 
 	return 0;
 }
 
-static unsigned int __init intc_prio_data(struct intc_desc *desc,
-					  struct intc_desc_int *d,
-					  intc_enum enum_id, int do_grps)
+static unsigned int __init _intc_prio_data(struct intc_desc *desc,
+					   struct intc_desc_int *d,
+					   intc_enum enum_id,
+					   unsigned int *reg_idx,
+					   unsigned int *fld_idx)
 {
 	struct intc_prio_reg *pr = desc->hw.prio_regs;
-	unsigned int i, j, fn, mode, bit;
+	unsigned int fn, n, mode, bit;
 	unsigned long reg_e, reg_d;
 
-	for (i = 0; pr && enum_id && i < desc->hw.nr_prio_regs; i++) {
-		pr = desc->hw.prio_regs + i;
+	while (pr && enum_id && *reg_idx < desc->hw.nr_prio_regs) {
+		pr = desc->hw.prio_regs + *reg_idx;
 
-		for (j = 0; j < ARRAY_SIZE(pr->enum_ids); j++) {
-			if (pr->enum_ids[j] != enum_id)
+		for (; *fld_idx < ARRAY_SIZE(pr->enum_ids); (*fld_idx)++) {
+			if (pr->enum_ids[*fld_idx] != enum_id)
 				continue;
 
 			if (pr->set_reg && pr->clr_reg) {
@@ -495,24 +554,69 @@ static unsigned int __init intc_prio_data(struct intc_desc *desc,
 			}
 
 			fn += (pr->reg_width >> 3) - 1;
+			n = *fld_idx + 1;
 
-			BUG_ON((j + 1) * pr->field_width > pr->reg_width);
+			BUG_ON(n * pr->field_width > pr->reg_width);
 
-			bit = pr->reg_width - ((j + 1) * pr->field_width);
+			bit = pr->reg_width - (n * pr->field_width);
 
 			return _INTC_MK(fn, mode,
 					intc_get_reg(d, reg_e),
 					intc_get_reg(d, reg_d),
 					pr->field_width, bit);
 		}
+
+		*fld_idx = 0;
+		(*reg_idx)++;
 	}
 
+	return 0;
+}
+
+static unsigned int __init intc_prio_data(struct intc_desc *desc,
+					  struct intc_desc_int *d,
+					  intc_enum enum_id, int do_grps)
+{
+	unsigned int i = 0;
+	unsigned int j = 0;
+	unsigned int ret;
+
+	ret = _intc_prio_data(desc, d, enum_id, &i, &j);
+	if (ret)
+		return ret;
+
 	if (do_grps)
 		return intc_prio_data(desc, d, intc_grp_id(desc, enum_id), 0);
 
 	return 0;
 }
 
+static void __init intc_enable_disable_enum(struct intc_desc *desc,
+					    struct intc_desc_int *d,
+					    intc_enum enum_id, int enable)
+{
+	unsigned int i, j, data;
+
+	/* go through and enable/disable all mask bits */
+	i = j = 0;
+	do {
+		data = _intc_mask_data(desc, d, enum_id, &i, &j);
+		if (data)
+			intc_enable_disable(d, data, enable);
+		j++;
+	} while (data);
+
+	/* go through and enable/disable all priority fields */
+	i = j = 0;
+	do {
+		data = _intc_prio_data(desc, d, enum_id, &i, &j);
+		if (data)
+			intc_enable_disable(d, data, enable);
+
+		j++;
+	} while (data);
+}
+
 static unsigned int __init intc_ack_data(struct intc_desc *desc,
 					  struct intc_desc_int *d,
 					  intc_enum enum_id)
@@ -747,6 +851,11 @@ void __init register_intc_controller(struct intc_desc *desc)
 		d->chip.mask_ack = intc_mask_ack;
 	}
 
+
+	/* disable bits matching force_enable before registering irqs */
+	if (desc->force_enable)
+		intc_enable_disable_enum(desc, d, desc->force_enable, 0);
+
 	BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */
 
 	/* register the vectors one by one */
@@ -792,6 +901,10 @@ void __init register_intc_controller(struct intc_desc *desc)
 			set_irq_data(irq2, (void *)irq);
 		}
 	}
+
+	/* enable bits matching force_enable after registering irqs */
+	if (desc->force_enable)
+		intc_enable_disable_enum(desc, d, desc->force_enable, 1);
 }
 
 static int intc_suspend(struct sys_device *dev, pm_message_t state)
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 7b37526bb73f..66b4b0c45e71 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -71,6 +71,7 @@ struct intc_hw_desc {
 
 struct intc_desc {
 	char *name;
+	intc_enum force_enable;
 	struct intc_hw_desc hw;
 };
 
-- 
cgit v1.2.3


From 3ad2f3fbb961429d2aa627465ae4829758bc7e07 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 3 Feb 2010 08:01:28 +0800
Subject: tree-wide: Assorted spelling fixes

In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/DocBook/mtdnand.tmpl                  |  6 +++---
 Documentation/DocBook/v4l/common.xml                |  2 +-
 Documentation/DocBook/v4l/vidioc-g-parm.xml         |  2 +-
 Documentation/arm/Samsung-S3C24XX/CPUfreq.txt       |  4 ++--
 Documentation/hwmon/abituguru                       |  2 +-
 Documentation/input/rotary-encoder.txt              |  2 +-
 Documentation/networking/skfp.txt                   |  2 +-
 Documentation/s390/kvm.txt                          |  2 +-
 Documentation/scsi/ChangeLog.lpfc                   | 10 +++++-----
 Documentation/trace/ftrace.txt                      |  2 +-
 arch/arm/mach-ep93xx/micro9.c                       |  2 +-
 arch/arm/mach-nomadik/board-nhk8815.c               |  2 +-
 arch/arm/mach-u300/core.c                           |  4 ++--
 arch/arm/mach-u300/include/mach/debug-macro.S       |  2 +-
 arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h   |  2 +-
 arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h |  2 +-
 arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h  |  2 +-
 arch/cris/arch-v10/lib/old_checksum.c               |  2 +-
 arch/cris/arch-v32/mm/tlb.c                         |  2 +-
 arch/h8300/include/asm/io.h                         |  2 +-
 arch/ia64/sn/kernel/setup.c                         |  2 +-
 arch/m68k/atari/atakeyb.c                           |  2 +-
 arch/m68k/include/asm/io_no.h                       |  2 +-
 arch/powerpc/boot/dts/kmeter1.dts                   |  2 +-
 arch/s390/include/asm/cio.h                         |  2 +-
 arch/s390/kernel/sclp.S                             |  2 +-
 arch/sparc/kernel/leon_kernel.c                     |  2 +-
 arch/sparc/kernel/perf_event.c                      |  2 +-
 arch/x86/crypto/twofish-i586-asm_32.S               | 10 +++++-----
 arch/x86/crypto/twofish-x86_64-asm_64.S             | 20 ++++++++++----------
 arch/x86/kernel/head_64.S                           |  2 +-
 arch/x86/kernel/pci-calgary_64.c                    |  2 +-
 arch/x86/kernel/tsc.c                               |  2 +-
 arch/xtensa/kernel/entry.S                          |  4 ++--
 block/bsg.c                                         |  2 +-
 drivers/acpi/dock.c                                 |  2 +-
 drivers/ata/libata-sff.c                            |  2 +-
 drivers/ata/pata_acpi.c                             |  2 +-
 drivers/ata/pata_hpt3x3.c                           |  2 +-
 drivers/ata/pata_pcmcia.c                           |  2 +-
 drivers/block/drbd/drbd_int.h                       |  4 ++--
 drivers/block/drbd/drbd_req.h                       |  2 +-
 drivers/char/agp/intel-agp.c                        |  2 +-
 drivers/char/applicom.c                             |  2 +-
 drivers/char/hvc_iseries.c                          |  2 +-
 drivers/char/hw_random/n2-drv.c                     |  2 +-
 drivers/char/ip2/i2hw.h                             |  2 +-
 drivers/char/pty.c                                  |  2 +-
 drivers/char/tty_io.c                               |  2 +-
 drivers/char/vt.c                                   |  2 +-
 drivers/dma/coh901318_lli.h                         |  2 +-
 drivers/gpu/drm/nouveau/nouveau_bios.c              |  2 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h               |  2 +-
 drivers/gpu/drm/via/via_irq.c                       |  4 ++--
 drivers/i2c/busses/i2c-pxa.c                        |  2 +-
 drivers/infiniband/hw/ehca/ehca_qes.h               |  4 ++--
 drivers/infiniband/hw/ehca/ehca_reqs.c              |  2 +-
 drivers/input/misc/yealink.h                        |  2 +-
 drivers/isdn/i4l/isdn_common.c                      |  2 +-
 drivers/media/dvb/dvb-core/dvb_frontend.h           |  8 ++++----
 drivers/media/video/bt8xx/bttv-cards.c              |  4 ++--
 drivers/media/video/gspca/ov519.c                   |  2 +-
 drivers/media/video/pwc/philips.txt                 |  2 +-
 drivers/media/video/sn9c102/sn9c102_sensor.h        |  2 +-
 drivers/media/video/tea6420.c                       |  2 +-
 drivers/mfd/sm501.c                                 |  8 ++++----
 drivers/mmc/host/mxcmmc.c                           |  2 +-
 drivers/mtd/chips/jedec_probe.c                     |  2 +-
 drivers/mtd/nand/bcm_umi_nand.c                     |  4 ++--
 drivers/mtd/nand/mxc_nand.c                         |  2 +-
 drivers/net/atlx/atl2.h                             |  2 +-
 drivers/net/chelsio/sge.c                           |  2 +-
 drivers/net/e1000e/82571.c                          |  2 +-
 drivers/net/e1000e/lib.c                            |  2 +-
 drivers/net/igb/igb_main.c                          |  2 +-
 drivers/net/irda/sa1100_ir.c                        |  2 +-
 drivers/net/qlge/qlge_ethtool.c                     |  2 +-
 drivers/net/qlge/qlge_main.c                        |  2 +-
 drivers/net/sfc/regs.h                              |  2 +-
 drivers/net/smsc9420.c                              |  2 +-
 drivers/net/spider_net.c                            |  4 ++--
 drivers/net/sungem.c                                |  2 +-
 drivers/net/tehuti.c                                |  2 +-
 drivers/net/tokenring/tms380tr.c                    |  4 ++--
 drivers/net/tun.c                                   |  2 +-
 drivers/net/ucc_geth.c                              |  2 +-
 drivers/net/wimax/i2400m/fw.c                       |  2 +-
 drivers/net/wimax/i2400m/i2400m.h                   |  2 +-
 drivers/net/wimax/i2400m/sdio.c                     |  4 ++--
 drivers/net/wimax/i2400m/usb.c                      |  4 ++--
 drivers/net/wireless/ath/ar9170/main.c              |  2 +-
 drivers/net/wireless/iwmc3200wifi/lmac.h            |  2 +-
 drivers/net/wireless/rt2x00/rt2500usb.c             |  4 ++--
 drivers/net/wireless/rt2x00/rt2800usb.c             |  4 ++--
 drivers/net/wireless/rt2x00/rt2x00debug.c           |  2 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c             |  2 +-
 drivers/net/wireless/rt2x00/rt2x00queue.c           |  2 +-
 drivers/net/wireless/rt2x00/rt61pci.c               |  2 +-
 drivers/net/wireless/rt2x00/rt73usb.c               |  6 +++---
 drivers/s390/char/raw3270.c                         |  2 +-
 drivers/s390/char/sclp.c                            |  2 +-
 drivers/scsi/a100u2w.c                              |  2 +-
 drivers/scsi/initio.c                               |  2 +-
 drivers/scsi/libfc/fc_fcp.c                         |  2 +-
 drivers/scsi/lpfc/lpfc_els.c                        |  4 ++--
 drivers/scsi/pcmcia/nsp_cs.h                        |  2 +-
 drivers/scsi/pm8001/pm8001_hwi.c                    |  2 +-
 drivers/scsi/pm8001/pm8001_sas.c                    |  2 +-
 drivers/scsi/pmcraid.h                              |  2 +-
 drivers/scsi/sd.c                                   |  2 +-
 drivers/spi/spi_s3c24xx.c                           |  2 +-
 drivers/usb/musb/musb_regs.h                        |  2 +-
 drivers/usb/serial/cypress_m8.c                     |  2 +-
 drivers/video/omap/lcdc.c                           |  2 +-
 drivers/video/s1d13xxxfb.c                          |  4 ++--
 drivers/video/sm501fb.c                             |  2 +-
 fs/affs/bitmap.c                                    |  2 +-
 fs/binfmt_elf_fdpic.c                               |  2 +-
 fs/cifs/cifs_dfs_ref.c                              |  2 +-
 fs/cifs/cifssmb.c                                   |  2 +-
 fs/ext4/move_extent.c                               |  2 +-
 fs/fuse/inode.c                                     |  2 +-
 fs/gfs2/ops_fstype.c                                |  2 +-
 fs/jbd/transaction.c                                |  2 +-
 fs/nfsd/nfs4xdr.c                                   |  2 +-
 fs/ocfs2/dlmglue.c                                  |  2 +-
 fs/ocfs2/extent_map.c                               |  2 +-
 fs/reiserfs/bitmap.c                                |  2 +-
 include/linux/hil.h                                 | 16 ++++++++--------
 include/linux/lru_cache.h                           |  2 +-
 include/linux/sched.h                               |  2 +-
 include/media/davinci/vpfe_capture.h                |  2 +-
 net/ipv4/tcp_timer.c                                |  2 +-
 net/mac80211/mesh_plink.c                           |  2 +-
 net/netfilter/nf_conntrack_sip.c                    |  4 ++--
 net/netfilter/xt_hashlimit.c                        |  2 +-
 net/sctp/sm_sideeffect.c                            |  2 +-
 scripts/gfp-translate                               |  2 +-
 sound/pci/rme9652/hdspm.c                           |  2 +-
 sound/soc/codecs/wm8990.c                           |  2 +-
 tools/perf/util/hist.c                              |  2 +-
 141 files changed, 192 insertions(+), 192 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 5e7d84b48505..133cd6c3f3c1 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -488,7 +488,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 				The ECC bytes must be placed immidiately after the data
 				bytes in order to make the syndrome generator work. This
 				is contrary to the usual layout used by software ECC. The
-				seperation of data and out of band area is not longer
+				separation of data and out of band area is not longer
 				possible. The nand driver code handles this layout and
 				the remaining free bytes in the oob area are managed by 
 				the autoplacement code. Provide a matching oob-layout
@@ -560,7 +560,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 				bad blocks. They have factory marked good blocks. The marker pattern
 				is erased when the block is erased to be reused. So in case of
 				powerloss before writing the pattern back to the chip this block 
-				would be lost and added to the bad blocks. Therefor we scan the 
+				would be lost and added to the bad blocks. Therefore we scan the 
 				chip(s) when we detect them the first time for good blocks and 
 				store this information in a bad block table before erasing any 
 				of the blocks.
@@ -1094,7 +1094,7 @@ in this page</entry>
 		manufacturers specifications. This applies similar to the spare area. 
 	</para>
 	<para>
-		Therefor NAND aware filesystems must either write in page size chunks
+		Therefore NAND aware filesystems must either write in page size chunks
 		or hold a writebuffer to collect smaller writes until they sum up to 
 		pagesize. Available NAND aware filesystems: JFFS2, YAFFS. 		
 	</para>
diff --git a/Documentation/DocBook/v4l/common.xml b/Documentation/DocBook/v4l/common.xml
index c65f0ac9b6ee..cea23e1c4fc6 100644
--- a/Documentation/DocBook/v4l/common.xml
+++ b/Documentation/DocBook/v4l/common.xml
@@ -1170,7 +1170,7 @@ frames per second. If less than this number of frames is to be
 captured or output, applications can request frame skipping or
 duplicating on the driver side. This is especially useful when using
 the &func-read; or &func-write;, which are not augmented by timestamps
-or sequence counters, and to avoid unneccessary data copying.</para>
+or sequence counters, and to avoid unnecessary data copying.</para>
 
     <para>Finally these ioctls can be used to determine the number of
 buffers used internally by a driver in read/write mode. For
diff --git a/Documentation/DocBook/v4l/vidioc-g-parm.xml b/Documentation/DocBook/v4l/vidioc-g-parm.xml
index 78332d365ce9..392aa9e5571e 100644
--- a/Documentation/DocBook/v4l/vidioc-g-parm.xml
+++ b/Documentation/DocBook/v4l/vidioc-g-parm.xml
@@ -55,7 +55,7 @@ captured or output, applications can request frame skipping or
 duplicating on the driver side. This is especially useful when using
 the <function>read()</function> or <function>write()</function>, which
 are not augmented by timestamps or sequence counters, and to avoid
-unneccessary data copying.</para>
+unnecessary data copying.</para>
 
     <para>Further these ioctls can be used to determine the number of
 buffers used internally by a driver in read/write mode. For
diff --git a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
index 76b3a11e90be..fa968aa99d67 100644
--- a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
+++ b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
@@ -14,8 +14,8 @@ Introduction
  how the clocks are arranged. The first implementation used as single
  PLL to feed the ARM, memory and peripherals via a series of dividers
  and muxes and this is the implementation that is documented here. A
- newer version where there is a seperate PLL and clock divider for the
- ARM core is available as a seperate driver.
+ newer version where there is a separate PLL and clock divider for the
+ ARM core is available as a separate driver.
 
 
 Layout
diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru
index 87ffa0f5ec70..5eb3b9d5f0d5 100644
--- a/Documentation/hwmon/abituguru
+++ b/Documentation/hwmon/abituguru
@@ -30,7 +30,7 @@ Supported chips:
 	   bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
 	   You may also need to specify the fan_sensors option for these boards
 	   fan_sensors=5
-	2) There is a seperate abituguru3 driver for these motherboards,
+	2) There is a separate abituguru3 driver for these motherboards,
 	   the abituguru (without the 3 !) driver will not work on these
 	   motherboards (and visa versa)!
 
diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt
index 3a6aec40c0b0..8b4129de1d2d 100644
--- a/Documentation/input/rotary-encoder.txt
+++ b/Documentation/input/rotary-encoder.txt
@@ -75,7 +75,7 @@ and the number of steps or will clamp at the maximum and zero depending on
 the configuration.
 
 Because GPIO to IRQ mapping is platform specific, this information must
-be given in seperately to the driver. See the example below.
+be given in separately to the driver. See the example below.
 
 ---------<snip>---------
 
diff --git a/Documentation/networking/skfp.txt b/Documentation/networking/skfp.txt
index abfddf81e34a..203ec66c9fb4 100644
--- a/Documentation/networking/skfp.txt
+++ b/Documentation/networking/skfp.txt
@@ -68,7 +68,7 @@ Compaq adapters (not tested):
 =======================
 
 From v2.01 on, the driver is integrated in the linux kernel sources.
-Therefor, the installation is the same as for any other adapter
+Therefore, the installation is the same as for any other adapter
 supported by the kernel.
 Refer to the manual of your distribution about the installation
 of network adapters.
diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt
index 6f5ceb0f09fc..85f3280d7ef6 100644
--- a/Documentation/s390/kvm.txt
+++ b/Documentation/s390/kvm.txt
@@ -102,7 +102,7 @@ args:		unsigned long
 see also:	include/linux/kvm.h
 This ioctl stores the state of the cpu at the guest real address given as
 argument, unless one of the following values defined in include/linux/kvm.h
-is given as arguement:
+is given as argument:
 KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
 absolute lowcore as defined by the principles of operation
 KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
diff --git a/Documentation/scsi/ChangeLog.lpfc b/Documentation/scsi/ChangeLog.lpfc
index ff19a52fe004..2ffc1148eb95 100644
--- a/Documentation/scsi/ChangeLog.lpfc
+++ b/Documentation/scsi/ChangeLog.lpfc
@@ -989,8 +989,8 @@ Changes from 20040709 to 20040716
 	* Remove redundant port_cmp != 2 check in if
 	  (!port_cmp) { .... if (port_cmp != 2).... }
 	* Clock changes: removed struct clk_data and timerList.
-	* Clock changes: seperate nodev_tmo and els_retry_delay into 2
-	  seperate timers and convert to 1 argument changed
+	* Clock changes: separate nodev_tmo and els_retry_delay into 2
+	  separate timers and convert to 1 argument changed
 	  LPFC_NODE_FARP_PEND_t to struct lpfc_node_farp_pend convert
 	  ipfarp_tmo to 1 argument convert target struct tmofunc and
 	  rtplunfunc to 1 argument * cr_count, cr_delay and
@@ -1514,7 +1514,7 @@ Changes from 20040402 to 20040409
 	* Remove unused elxclock declaration in elx_sli.h.
 	* Since everywhere IOCB_ENTRY is used, the return value is cast,
 	  move the cast into the macro.
-	* Split ioctls out into seperate files
+	* Split ioctls out into separate files
 
 Changes from 20040326 to 20040402
 
@@ -1534,7 +1534,7 @@ Changes from 20040326 to 20040402
 	* Unused variable cleanup
 	* Use Linux list macros for DMABUF_t
 	* Break up ioctls into 3 sections, dfc, util, hbaapi
-	  rearranged code so this could be easily seperated into a
+	  rearranged code so this could be easily separated into a
 	  differnet module later All 3 are currently turned on by
 	  defines in lpfc_ioctl.c LPFC_DFC_IOCTL, LPFC_UTIL_IOCTL,
 	  LPFC_HBAAPI_IOCTL
@@ -1551,7 +1551,7 @@ Changes from 20040326 to 20040402
 	  started by lpfc_online().  lpfc_offline() only stopped
 	  els_timeout routine.  It now stops all timeout routines
 	  associated with that hba.
-	* Replace seperate next and prev pointers in struct
+	* Replace separate next and prev pointers in struct
 	  lpfc_bindlist with list_head type.  In elxHBA_t, replace
 	  fc_nlpbind_start and _end with fc_nlpbind_list and use
 	  list_head macros to access it.
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bab3040da548..03485bfbd797 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1588,7 +1588,7 @@ module author does not need to worry about it.
 
 When tracing is enabled, kstop_machine is called to prevent
 races with the CPUS executing code being modified (which can
-cause the CPU to do undesireable things), and the nops are
+cause the CPU to do undesirable things), and the nops are
 patched back to calls. But this time, they do not call mcount
 (which is just a function stub). They now call into the ftrace
 infrastructure.
diff --git a/arch/arm/mach-ep93xx/micro9.c b/arch/arm/mach-ep93xx/micro9.c
index f3757a1c5a10..c33360e82868 100644
--- a/arch/arm/mach-ep93xx/micro9.c
+++ b/arch/arm/mach-ep93xx/micro9.c
@@ -28,7 +28,7 @@
  *
  * Micro9-High has up to 64MB of 32-bit flash on CS1
  * Micro9-Mid has up to 64MB of either 32-bit or 16-bit flash on CS1
- * Micro9-Lite uses a seperate MTD map driver for flash support
+ * Micro9-Lite uses a separate MTD map driver for flash support
  * Micro9-Slim has up to 64MB of either 32-bit or 16-bit flash on CS1
  *************************************************************************/
 static struct physmap_flash_data micro9_flash_data;
diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 9438bf6613a3..ab3712c86d2b 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -38,7 +38,7 @@
 #define SRC_CR_INIT_MASK	0x00007fff
 #define SRC_CR_INIT_VAL		0x2aaa8000
 
-/* These adresses span 16MB, so use three individual pages */
+/* These addresses span 16MB, so use three individual pages */
 static struct resource nhk8815_nand_resources[] = {
 	{
 		.name = "nand_addr",
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 653e25be3dd8..d0cb5e940776 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -356,7 +356,7 @@ static struct resource ave_resources[] = {
 	/*
 	 * The AVE3e requires two regions of 256MB that it considers
 	 * "invisible". The hardware will not be able to access these
-	 * adresses, so they should never point to system RAM.
+	 * addresses, so they should never point to system RAM.
 	 */
 	{
 		.name  = "AVE3e Reserved 0",
@@ -571,7 +571,7 @@ static void __init u300_init_check_chip(void)
 /*
  * Some devices and their resources require reserved physical memory from
  * the end of the available RAM. This function traverses the list of devices
- * and assigns actual adresses to these.
+ * and assigns actual addresses to these.
  */
 static void __init u300_assign_physmem(void)
 {
diff --git a/arch/arm/mach-u300/include/mach/debug-macro.S b/arch/arm/mach-u300/include/mach/debug-macro.S
index f3a1cbbeeab3..d591fe13ed13 100644
--- a/arch/arm/mach-u300/include/mach/debug-macro.S
+++ b/arch/arm/mach-u300/include/mach/debug-macro.S
@@ -11,7 +11,7 @@
 #include <mach/hardware.h>
 
 	.macro	addruart,rx
-	/* If we move the adress using MMU, use this. */
+	/* If we move the address using MMU, use this. */
 	mrc	p15, 0, \rx, c1, c0
 	tst	\rx, #1			@ MMU enabled?
 	ldreq	\rx,	  = U300_SLOW_PER_PHYS_BASE @ MMU off, physical address
diff --git a/arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h b/arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h
index 652e2bbdaa20..dda19da037ad 100644
--- a/arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h
+++ b/arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h
@@ -78,7 +78,7 @@ extern int s3c_gpio_setcfg_s3c24xx_a(struct s3c_gpio_chip *chip,
  *	others = Special functions (dependant on bank)
  *
  * Note, since the code to deal with the case where there are two control
- * registers instead of one, we do not have a seperate set of functions for
+ * registers instead of one, we do not have a separate set of functions for
  * each case.
 */
 extern int s3c_gpio_setcfg_s3c64xx_4bit(struct s3c_gpio_chip *chip,
diff --git a/arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h b/arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h
index 36a85f5000c8..a111ad871833 100644
--- a/arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h
+++ b/arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h
@@ -12,7 +12,7 @@
  * published by the Free Software Foundation.
 */
 
-/* Note, this is a seperate header file as some of the clock framework
+/* Note, this is a separate header file as some of the clock framework
  * needs to touch this if the clk_48m is used as the USB OHCI or other
  * peripheral source.
 */
diff --git a/arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h b/arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h
index 33d421d78bad..d623235ae961 100644
--- a/arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h
+++ b/arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h
@@ -135,7 +135,7 @@ struct s3c_cpufreq_config {
  * @locktime_m: The lock-time in uS for the MPLL.
  * @locktime_u: The lock-time in uS for the UPLL.
  * @locttime_bits: The number of bits each LOCKTIME field.
- * @need_pll: Set if this driver needs to change the PLL values to acheive
+ * @need_pll: Set if this driver needs to change the PLL values to achieve
  *	any frequency changes. This is really only need by devices like the
  *	S3C2410 where there is no or limited divider between the PLL and the
  *	ARMCLK.
diff --git a/arch/cris/arch-v10/lib/old_checksum.c b/arch/cris/arch-v10/lib/old_checksum.c
index 1734b467efa6..8f79163f1394 100644
--- a/arch/cris/arch-v10/lib/old_checksum.c
+++ b/arch/cris/arch-v10/lib/old_checksum.c
@@ -77,7 +77,7 @@ __wsum csum_partial(const void *p, int len, __wsum __sum)
 		sum += *buff++;
 
 	if (endMarker > buff)
-		sum += *(const u8 *)buff;	/* add extra byte seperately */
+		sum += *(const u8 *)buff;	/* add extra byte separately */
 
 	BITOFF;
 	return (__force __wsum)sum;
diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c
index 6779bcb28ab0..c030d020660a 100644
--- a/arch/cris/arch-v32/mm/tlb.c
+++ b/arch/cris/arch-v32/mm/tlb.c
@@ -189,7 +189,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		spin_unlock(&mmu_context_lock);
 
 		/*
-		 * Remember the pgd for the fault handlers. Keep a seperate
+		 * Remember the pgd for the fault handlers. Keep a separate
 		 * copy of it because current and active_mm might be invalid
 		 * at points where * there's still a need to derefer the pgd.
 		 */
diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h
index 33e842f3284b..c1a8df22080f 100644
--- a/arch/h8300/include/asm/io.h
+++ b/arch/h8300/include/asm/io.h
@@ -25,7 +25,7 @@
  * memory location directly.
  */
 /* ++roman: The assignments to temp. vars avoid that gcc sometimes generates
- * two accesses to memory, which may be undesireable for some devices.
+ * two accesses to memory, which may be undesirable for some devices.
  */
 
 /*
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index ece1bf994499..c6c6d9381126 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -241,7 +241,7 @@ static void __cpuinit sn_check_for_wars(void)
  * Note:  This stuff is duped here because Altix requires the PCDP to
  * locate a usable VGA device due to lack of proper ACPI support.  Structures
  * could be used from drivers/firmware/pcdp.h, but it was decided that moving
- * this file to a more public location just for Altix use was undesireable.
+ * this file to a more public location just for Altix use was undesirable.
  */
 
 struct hcdp_uart_desc {
diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
index 4add96d13b19..5890897d28bf 100644
--- a/arch/m68k/atari/atakeyb.c
+++ b/arch/m68k/atari/atakeyb.c
@@ -121,7 +121,7 @@ KEYBOARD_STATE kb_state;
  * bytes have been lost and in which state of the packet structure we are now.
  * This usually causes keyboards bytes to be interpreted as mouse movements
  * and vice versa, which is very annoying. It seems better to throw away some
- * bytes (that are usually mouse bytes) than to misinterpret them. Therefor I
+ * bytes (that are usually mouse bytes) than to misinterpret them. Therefore I
  * introduced the RESYNC state for IKBD data. In this state, the bytes up to
  * one that really looks like a key event (0x04..0xf2) or the start of a mouse
  * packet (0xf8..0xfb) are thrown away, but at most 2 bytes. This at least
diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
index 359065d5a9f2..6e2413e518cb 100644
--- a/arch/m68k/include/asm/io_no.h
+++ b/arch/m68k/include/asm/io_no.h
@@ -16,7 +16,7 @@
  * memory location directly.
  */
 /* ++roman: The assignments to temp. vars avoid that gcc sometimes generates
- * two accesses to memory, which may be undesireable for some devices.
+ * two accesses to memory, which may be undesirable for some devices.
  */
 
 /*
diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts
index 65b8b4f27efe..d8b5d12fb663 100644
--- a/arch/powerpc/boot/dts/kmeter1.dts
+++ b/arch/powerpc/boot/dts/kmeter1.dts
@@ -490,7 +490,7 @@
 			compatible = "cfi-flash";
 			/*
 			 * The Intel P30 chip has 2 non-identical chips on
-			 * one die, so we need to define 2 seperate regions
+			 * one die, so we need to define 2 separate regions
 			 * that are scanned by physmap_of independantly.
 			 */
 			reg = <0 0x00000000 0x02000000
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index e85679af54dd..e34347d567a6 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -20,7 +20,7 @@
 /**
  * struct ccw1 - channel command word
  * @cmd_code: command code
- * @flags: flags, like IDA adressing, etc.
+ * @flags: flags, like IDA addressing, etc.
  * @count: byte count
  * @cda: data address
  *
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
index e27ca63076d1..27c1a2e236d1 100644
--- a/arch/s390/kernel/sclp.S
+++ b/arch/s390/kernel/sclp.S
@@ -221,7 +221,7 @@ _sclp_print:
 	lh	%r9,0(%r8)			# update sccb length
 	ar	%r9,%r6
 	sth	%r9,0(%r8)
-	ar	%r7,%r6				# update current mto adress
+	ar	%r7,%r6				# update current mto address
 	ltr	%r0,%r0				# more characters?
 	jnz	.LinitmtoS4
 	l	%r2,.LwritedataS4-.LbaseS4(%r13)# write data
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 87f1760c0aa2..554e0b80bcde 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -124,7 +124,7 @@ void __init leon_init_timers(irq_handler_t counter_fn)
 
 		if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
 		      (1<<LEON3_GPTIMER_SEPIRQ))) {
-			prom_printf("irq timer not configured with seperate irqs \n");
+			prom_printf("irq timer not configured with separate irqs \n");
 			BUG();
 		}
 
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index e856456ec02f..a565ee5146eb 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1353,7 +1353,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 }
 
 /* Like powerpc we can't get PMU interrupts within the PMU handler,
- * so no need for seperate NMI and IRQ chains as on x86.
+ * so no need for separate NMI and IRQ chains as on x86.
  */
 static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
 
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 39b98ed2c1b9..575331cb2a8a 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -22,7 +22,7 @@
 
 #include <asm/asm-offsets.h>
 
-/* return adress at 0 */
+/* return address at 0 */
 
 #define in_blk    12  /* input byte array address parameter*/
 #define out_blk   8  /* output byte array address parameter*/
@@ -230,8 +230,8 @@ twofish_enc_blk:
 	push    %edi
 
 	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
-	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
-	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx address */
+	mov     in_blk+16(%esp),%edi	/* input address in edi */
 
 	mov	(%edi),		%eax
 	mov	b_offset(%edi),	%ebx
@@ -286,8 +286,8 @@ twofish_dec_blk:
 
 
 	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
-	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
-	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx address */
+	mov     in_blk+16(%esp),%edi	/* input address in edi */
 
 	mov	(%edi),		%eax
 	mov	b_offset(%edi),	%ebx
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 35974a586615..573aa102542e 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -221,11 +221,11 @@
 twofish_enc_blk:
 	pushq    R1
 
-	/* %rdi contains the crypto tfm adress */
-	/* %rsi contains the output adress */
-	/* %rdx contains the input adress */
-	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
-	/* ctx adress is moved to free one non-rex register
+	/* %rdi contains the crypto tfm address */
+	/* %rsi contains the output address */
+	/* %rdx contains the input address */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx address */
+	/* ctx address is moved to free one non-rex register
 	as target for the 8bit high operations */
 	mov	%rdi,		%r11
 
@@ -274,11 +274,11 @@ twofish_enc_blk:
 twofish_dec_blk:
 	pushq    R1
 
-	/* %rdi contains the crypto tfm adress */
-	/* %rsi contains the output adress */
-	/* %rdx contains the input adress */
-	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
-	/* ctx adress is moved to free one non-rex register
+	/* %rdi contains the crypto tfm address */
+	/* %rsi contains the output address */
+	/* %rdx contains the input address */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx address */
+	/* ctx address is moved to free one non-rex register
 	as target for the 8bit high operations */
 	mov	%rdi,		%r11
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2d8b5035371c..3d1e6f16b7a6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -27,7 +27,7 @@
 #define GET_CR2_INTO_RCX movq %cr2, %rcx
 #endif
 
-/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
+/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
  * because we need identity-mapped pages.
  *
  */
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 2bbde6078143..fb99f7edb341 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1309,7 +1309,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
 /*
  * get_tce_space_from_tar():
  * Function for kdump case. Get the tce tables from first kernel
- * by reading the contents of the base adress register of calgary iommu
+ * by reading the contents of the base address register of calgary iommu
  */
 static void __init get_tce_space_from_tar(void)
 {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 597683aa5ba0..dec8f68e3eda 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -50,7 +50,7 @@ u64 native_sched_clock(void)
 	 *   unstable. We do this because unlike Time Of Day,
 	 *   the scheduler clock tolerates small errors and it's
 	 *   very important for it to be as fast as the platform
-	 *   can achive it. )
+	 *   can achieve it. )
 	 */
 	if (unlikely(tsc_disabled)) {
 		/* No locking but a rare wrong value is not a big deal: */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 80d24c485fd3..77fc9f6dc016 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -104,7 +104,7 @@
  *   excsave has been restored, and
  *   stack pointer (a1) has been set.
  *
- * Note: _user_exception might be at an odd adress. Don't use call0..call12
+ * Note: _user_exception might be at an odd address. Don't use call0..call12
  */
 
 ENTRY(user_exception)
@@ -244,7 +244,7 @@ _user_exception:
  *   excsave has been restored, and
  *   stack pointer (a1) has been set.
  *
- * Note: _kernel_exception might be at an odd adress. Don't use call0..call12
+ * Note: _kernel_exception might be at an odd address. Don't use call0..call12
  */
 
 ENTRY(kernel_exception)
diff --git a/block/bsg.c b/block/bsg.c
index a9fd2d84b53a..46597a6bd112 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -260,7 +260,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
 		return ERR_PTR(ret);
 
 	/*
-	 * map scatter-gather elements seperately and string them to request
+	 * map scatter-gather elements separately and string them to request
 	 */
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	if (!rq)
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index bbc2c1315c47..d7f363f9435f 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -605,7 +605,7 @@ register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops,
 	list_for_each_entry(dock_station, &dock_stations, sibling) {
 		/*
 		 * An ATA bay can be in a dock and itself can be ejected
-		 * seperately, so there are two 'dock stations' which need the
+		 * separately, so there are two 'dock stations' which need the
 		 * ops
 		 */
 		dd = find_dock_dependent_device(dock_station, handle);
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 741065c9da67..7f2c94a07c00 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2258,7 +2258,7 @@ EXPORT_SYMBOL_GPL(ata_sff_postreset);
  *	@qc: command
  *
  *	Drain the FIFO and device of any stuck data following a command
- *	failing to complete. In some cases this is neccessary before a
+ *	failing to complete. In some cases this is necessary before a
  *	reset will recover the device.
  *
  */
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index d8f35fe44421..9e33da9565d9 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -161,7 +161,7 @@ static void pacpi_set_dmamode(struct ata_port *ap, struct ata_device *adev)
  *
  *	Called when the libata layer is about to issue a command. We wrap
  *	this interface so that we can load the correct ATA timings if
- *	neccessary.
+ *	necessary.
  */
 
 static unsigned int pacpi_qc_issue(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c
index c86c71639a95..727a81ce4c9f 100644
--- a/drivers/ata/pata_hpt3x3.c
+++ b/drivers/ata/pata_hpt3x3.c
@@ -180,7 +180,7 @@ static void hpt3x3_init_chipset(struct pci_dev *dev)
  *	@id: Entry in match table
  *
  *	Perform basic initialisation. We set the device up so we access all
- *	ports via BAR4. This is neccessary to work around errata.
+ *	ports via BAR4. This is necessary to work around errata.
  */
 
 static int hpt3x3_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 1b392c9e8531..416aebb8b913 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -131,7 +131,7 @@ static unsigned int ata_data_xfer_8bit(struct ata_device *dev,
  *	@qc: command
  *
  *	Drain the FIFO and device of any stuck data following a command
- *	failing to complete. In some cases this is neccessary before a
+ *	failing to complete. In some cases this is necessary before a
  *	reset will recover the device.
  *
  */
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 2bf3a6ef3684..d9301e861d9f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -95,7 +95,7 @@ extern char usermode_helper[];
 
 /* All EEs on the free list should have ID_VACANT (== 0)
  * freshly allocated EEs get !ID_VACANT (== 1)
- * so if it says "cannot dereference null pointer at adress 0x00000001",
+ * so if it says "cannot dereference null pointer at address 0x00000001",
  * it is most likely one of these :( */
 
 #define ID_IN_SYNC      (4711ULL)
@@ -1171,7 +1171,7 @@ extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf
 /* Meta data layout
    We reserve a 128MB Block (4k aligned)
    * either at the end of the backing device
-   * or on a seperate meta data device. */
+   * or on a separate meta data device. */
 
 #define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
 /* The following numbers are sectors */
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index f22c1bc8ec7e..16119d7056cc 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -57,7 +57,7 @@
  *
  *  It may me handed over to the local disk subsystem.
  *  It may be completed by the local disk subsystem,
- *    either sucessfully or with io-error.
+ *    either successfully or with io-error.
  *  In case it is a READ request, and it failed locally,
  *    it may be retried remotely.
  *
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 3999a5f25f38..45a22f9bfec2 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -269,7 +269,7 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem,
 			j++;
 		}
 	} else {
-		/* sg may merge pages, but we have to seperate
+		/* sg may merge pages, but we have to separate
 		 * per-page addr for GTT */
 		unsigned int len, m;
 
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index fe2cb2f5db17..a7424bf7eacf 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -14,7 +14,7 @@
 /* et passe en argument a acinit, mais est scrute sur le bus pour s'adapter  */
 /* au nombre de cartes presentes sur le bus. IOCL code 6 affichait V2.4.3    */
 /* F.LAFORSE 28/11/95 creation de fichiers acXX.o avec les differentes       */
-/* adresses de base des cartes, IOCTL 6 plus complet                         */
+/* addresses de base des cartes, IOCTL 6 plus complet                         */
 /* J.PAGET le 19/08/96 copie de la version V2.6 en V2.8.0 sans modification  */
 /* de code autre que le texte V2.6.1 en V2.8.0                               */
 /*****************************************************************************/
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
index 936d05bf37fa..0794925d8042 100644
--- a/drivers/char/hvc_iseries.c
+++ b/drivers/char/hvc_iseries.c
@@ -353,7 +353,7 @@ static void hvc_close_event(struct HvLpEvent *event)
 
 	if (!hvlpevent_is_int(event)) {
 		printk(KERN_WARNING
-			"hvc: got unexpected close acknowlegement\n");
+			"hvc: got unexpected close acknowledgement\n");
 		return;
 	}
 
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 9b3e09cd41f9..10f868eefaa6 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -71,7 +71,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
  *        x22 + x21 + x17 + x15 + x13 + x12 + x11 + x7 + x5 + x + 1
  *
  * The RNG_CTL_VCO value of each noise cell must be programmed
- * seperately.  This is why 4 control register values must be provided
+ * separately.  This is why 4 control register values must be provided
  * to the hypervisor.  During a write, the hypervisor writes them all,
  * one at a time, to the actual RNG_CTL register.  The first three
  * values are used to setup the desired RNG_CTL_VCO for each entropy
diff --git a/drivers/char/ip2/i2hw.h b/drivers/char/ip2/i2hw.h
index 8aa6e7ab8d5b..c0ba6c05f0cd 100644
--- a/drivers/char/ip2/i2hw.h
+++ b/drivers/char/ip2/i2hw.h
@@ -559,7 +559,7 @@ Loadware may be sent to the board in two ways:
 
 2) It may be hard-coded into your source by including a .h file (typically
 	supplied by Computone), which declares a data array and initializes every
-	element. This acheives the same result as if an entire loadware file had 
+	element. This achieves the same result as if an entire loadware file had 
 	been read into the array.
 
 	This requires more data space in your program, but access to the file system
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 385c44b3034f..5ee424817263 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -220,7 +220,7 @@ static void pty_set_termios(struct tty_struct *tty,
  *	@tty: tty being resized
  *	@ws: window size being set.
  *
- *	Update the termios variables and send the neccessary signals to
+ *	Update the termios variables and send the necessary signals to
  *	peform a terminal resize correctly
  */
 
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index c6f3b48be9dd..56b11c1c7aeb 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2026,7 +2026,7 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
  *	@rows: rows (character)
  *	@cols: cols (character)
  *
- *	Update the termios variables and send the neccessary signals to
+ *	Update the termios variables and send the necessary signals to
  *	peform a terminal resize correctly
  */
 
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 94f530a29691..bd1d1164fec5 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -821,7 +821,7 @@ static inline int resize_screen(struct vc_data *vc, int width, int height,
  *
  *	Resize a virtual console, clipping according to the actual constraints.
  *	If the caller passes a tty structure then update the termios winsize
- *	information and perform any neccessary signal handling.
+ *	information and perform any necessary signal handling.
  *
  *	Caller must hold the console semaphore. Takes the termios mutex and
  *	ctrl_lock of the tty IFF a tty is passed.
diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h
index 7bf713b79c6b..7a5c80990e9e 100644
--- a/drivers/dma/coh901318_lli.h
+++ b/drivers/dma/coh901318_lli.h
@@ -30,7 +30,7 @@ struct device;
  * @pool: pool handle
  * @dev: dma device
  * @lli_nbr: number of lli:s in the pool
- * @algin: adress alignemtn of lli:s
+ * @algin: address alignemtn of lli:s
  * returns 0 on success otherwise none zero
  */
 int coh901318_pool_create(struct coh901318_pool *pool,
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index d7f8d8b4a4b8..52fb371784e1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -3544,7 +3544,7 @@ int nouveau_bios_parse_lvds_table(struct drm_device *dev, int pxclk, bool *dl, b
 	 * at which modes should be set up in the dual link style.
 	 *
 	 * Following the header, the BMP (ver 0xa) table has several records,
-	 * indexed by a seperate xlat table, indexed in turn by the fp strap in
+	 * indexed by a separate xlat table, indexed in turn by the fp strap in
 	 * EXTDEV_BOOT. Each record had a config byte, followed by 6 script
 	 * numbers for use by INIT_SUB which controlled panel init and power,
 	 * and finally a dword of ms to sleep between power off and on
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 6b9690418bc7..23664058690f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -544,7 +544,7 @@ struct drm_nouveau_private {
 	uint32_t ramro_offset;
 	uint32_t ramro_size;
 
-	/* base physical adresses */
+	/* base physical addresses */
 	uint64_t fb_phys;
 	uint64_t fb_available_size;
 	uint64_t fb_mappable_pages;
diff --git a/drivers/gpu/drm/via/via_irq.c b/drivers/gpu/drm/via/via_irq.c
index 5935b8842e86..34079f251cd4 100644
--- a/drivers/gpu/drm/via/via_irq.c
+++ b/drivers/gpu/drm/via/via_irq.c
@@ -150,7 +150,7 @@ irqreturn_t via_driver_irq_handler(DRM_IRQ_ARGS)
 		cur_irq++;
 	}
 
-	/* Acknowlege interrupts */
+	/* Acknowledge interrupts */
 	VIA_WRITE(VIA_REG_INTERRUPT, status);
 
 
@@ -165,7 +165,7 @@ static __inline__ void viadrv_acknowledge_irqs(drm_via_private_t * dev_priv)
 	u32 status;
 
 	if (dev_priv) {
-		/* Acknowlege interrupts */
+		/* Acknowledge interrupts */
 		status = VIA_READ(VIA_REG_INTERRUPT);
 		VIA_WRITE(VIA_REG_INTERRUPT, status |
 			  dev_priv->irq_pending_mask);
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 7647a20523a0..90ffbf6f9d4f 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -12,7 +12,7 @@
  *
  *  History:
  *    Apr 2002: Initial version [CS]
- *    Jun 2002: Properly seperated algo/adap [FB]
+ *    Jun 2002: Properly separated algo/adap [FB]
  *    Jan 2003: Fixed several bugs concerning interrupt handling [Kai-Uwe Bloem]
  *    Jan 2003: added limited signal handling [Kai-Uwe Bloem]
  *    Sep 2004: Major rework to ensure efficient bus handling [RMK]
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
index 5d28e3e98a20..90c4efa67586 100644
--- a/drivers/infiniband/hw/ehca/ehca_qes.h
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -46,7 +46,7 @@
 
 #include "ehca_tools.h"
 
-/* virtual scatter gather entry to specify remote adresses with length */
+/* virtual scatter gather entry to specify remote addresses with length */
 struct ehca_vsgentry {
 	u64 vaddr;
 	u32 lkey;
@@ -148,7 +148,7 @@ struct ehca_wqe {
 	u32 immediate_data;
 	union {
 		struct {
-			u64 remote_virtual_adress;
+			u64 remote_virtual_address;
 			u32 rkey;
 			u32 reserved;
 			u64 atomic_1st_op_dma_len;
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index e3ec7fdd67bd..9a3fbfca9b41 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -269,7 +269,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
 		/* no break is intentional here */
 	case IB_QPT_RC:
 		/* TODO: atomic not implemented */
-		wqe_p->u.nud.remote_virtual_adress =
+		wqe_p->u.nud.remote_virtual_address =
 			send_wr->wr.rdma.remote_addr;
 		wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
 
diff --git a/drivers/input/misc/yealink.h b/drivers/input/misc/yealink.h
index 48af0be9cbdf..1e0f52397010 100644
--- a/drivers/input/misc/yealink.h
+++ b/drivers/input/misc/yealink.h
@@ -127,7 +127,7 @@ struct yld_ctl_packet {
  * yld_status struct.
  */
 
-/* LCD, each segment must be driven seperately.
+/* LCD, each segment must be driven separately.
  *
  * Layout:
  *
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index adb1e8c36b46..00c60e2e0ff7 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -1347,7 +1347,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 /*
  * isdn net devices manage lots of configuration variables as linked lists.
  * Those lists must only be manipulated from user space. Some of the ioctl's
- * service routines access user space and are not atomic. Therefor, ioctl's
+ * service routines access user space and are not atomic. Therefore, ioctl's
  * manipulating the lists and ioctl's sleeping while accessing the lists
  * are serialized by means of a semaphore.
  */
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 52e4ce4304ee..80dda308ff74 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -214,14 +214,14 @@ struct dvb_tuner_ops {
 	int (*get_status)(struct dvb_frontend *fe, u32 *status);
 	int (*get_rf_strength)(struct dvb_frontend *fe, u16 *strength);
 
-	/** These are provided seperately from set_params in order to facilitate silicon
-	 * tuners which require sophisticated tuning loops, controlling each parameter seperately. */
+	/** These are provided separately from set_params in order to facilitate silicon
+	 * tuners which require sophisticated tuning loops, controlling each parameter separately. */
 	int (*set_frequency)(struct dvb_frontend *fe, u32 frequency);
 	int (*set_bandwidth)(struct dvb_frontend *fe, u32 bandwidth);
 
 	/*
-	 * These are provided seperately from set_params in order to facilitate silicon
-	 * tuners which require sophisticated tuning loops, controlling each parameter seperately.
+	 * These are provided separately from set_params in order to facilitate silicon
+	 * tuners which require sophisticated tuning loops, controlling each parameter separately.
 	 */
 	int (*set_state)(struct dvb_frontend *fe, enum tuner_param param, struct tuner_state *state);
 	int (*get_state)(struct dvb_frontend *fe, enum tuner_param param, struct tuner_state *state);
diff --git a/drivers/media/video/bt8xx/bttv-cards.c b/drivers/media/video/bt8xx/bttv-cards.c
index 12279f6d9bc4..716870ae85d5 100644
--- a/drivers/media/video/bt8xx/bttv-cards.c
+++ b/drivers/media/video/bt8xx/bttv-cards.c
@@ -4404,7 +4404,7 @@ static void rv605_muxsel(struct bttv *btv, unsigned int input)
 /* Tibet Systems 'Progress DVR' CS16 muxsel helper [Chris Fanning]
  *
  * The CS16 (available on eBay cheap) is a PCI board with four Fusion
- * 878A chips, a PCI bridge, an Atmel microcontroller, four sync seperator
+ * 878A chips, a PCI bridge, an Atmel microcontroller, four sync separator
  * chips, ten eight input analog multiplexors, a not chip and a few
  * other components.
  *
@@ -4426,7 +4426,7 @@ static void rv605_muxsel(struct bttv *btv, unsigned int input)
  *
  * There is an ATMEL microcontroller with an 8031 core on board.  I have not
  * determined what function (if any) it provides.  With the microcontroller
- * and sync seperator chips a guess is that it might have to do with video
+ * and sync separator chips a guess is that it might have to do with video
  * switching and maybe some digital I/O.
  */
 static void tibetCS16_muxsel(struct bttv *btv, unsigned int input)
diff --git a/drivers/media/video/gspca/ov519.c b/drivers/media/video/gspca/ov519.c
index b4f965731244..e5e4c4440d39 100644
--- a/drivers/media/video/gspca/ov519.c
+++ b/drivers/media/video/gspca/ov519.c
@@ -503,7 +503,7 @@ static const struct v4l2_pix_format ovfx2_ov3610_mode[] = {
 /*
  * The FX2 chip does not give us a zero length read at end of frame.
  * It does, however, give a short read at the end of a frame, if
- * neccessary, rather than run two frames together.
+ * necessary, rather than run two frames together.
  *
  * By choosing the right bulk transfer size, we are guaranteed to always
  * get a short read for the last read of each frame.  Frame sizes are
diff --git a/drivers/media/video/pwc/philips.txt b/drivers/media/video/pwc/philips.txt
index f9f3584281d8..d38dd791511e 100644
--- a/drivers/media/video/pwc/philips.txt
+++ b/drivers/media/video/pwc/philips.txt
@@ -33,7 +33,7 @@ a lot of extra information, a FAQ, and the binary plugin 'PWCX'. This plugin
 contains decompression routines that allow you to use higher image sizes and
 framerates; in addition the webcam uses less bandwidth on the USB bus (handy
 if you want to run more than 1 camera simultaneously). These routines fall
-under a NDA, and may therefor not be distributed as source; however, its use
+under a NDA, and may therefore not be distributed as source; however, its use
 is completely optional.
 
 You can build this code either into your kernel, or as a module. I recommend
diff --git a/drivers/media/video/sn9c102/sn9c102_sensor.h b/drivers/media/video/sn9c102/sn9c102_sensor.h
index 4af7382da5c5..494957b10bac 100644
--- a/drivers/media/video/sn9c102/sn9c102_sensor.h
+++ b/drivers/media/video/sn9c102/sn9c102_sensor.h
@@ -120,7 +120,7 @@ extern int sn9c102_write_regs(struct sn9c102_device*, const u8 valreg[][2],
 /*
    Write multiple registers with constant values. For example:
    sn9c102_write_const_regs(cam, {0x00, 0x14}, {0x60, 0x17}, {0x0f, 0x18});
-   Register adresses must be < 256.
+   Register addresses must be < 256.
 */
 #define sn9c102_write_const_regs(sn9c102_device, data...)                     \
 	({ static const u8 _valreg[][2] = {data};                             \
diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c
index 0446524d3543..6bf6bc7dbc7f 100644
--- a/drivers/media/video/tea6420.c
+++ b/drivers/media/video/tea6420.c
@@ -6,7 +6,7 @@
 
     The tea6420 is a bus controlled audio-matrix with 5 stereo inputs,
     4 stereo outputs and gain control for each output.
-    It is cascadable, i.e. it can be found at the adresses 0x98
+    It is cascadable, i.e. it can be found at the addresses 0x98
     and 0x9a on the i2c-bus.
 
     For detailed informations download the specifications directly
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 0cc5eeff5ee8..10491e4e305d 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -523,7 +523,7 @@ unsigned long sm501_set_clock(struct device *dev,
 	unsigned long clock = readl(sm->regs + SM501_CURRENT_CLOCK);
 	unsigned char reg;
 	unsigned int pll_reg = 0;
-	unsigned long sm501_freq; /* the actual frequency acheived */
+	unsigned long sm501_freq; /* the actual frequency achieved */
 
 	struct sm501_clock to;
 
@@ -533,7 +533,7 @@ unsigned long sm501_set_clock(struct device *dev,
 
 	switch (clksrc) {
 	case SM501_CLOCK_P2XCLK:
-		/* This clock is divided in half so to achive the
+		/* This clock is divided in half so to achieve the
 		 * requested frequency the value must be multiplied by
 		 * 2. This clock also has an additional pre divisor */
 
@@ -562,7 +562,7 @@ unsigned long sm501_set_clock(struct device *dev,
 		break;
 
 	case SM501_CLOCK_V2XCLK:
-		/* This clock is divided in half so to achive the
+		/* This clock is divided in half so to achieve the
 		 * requested frequency the value must be multiplied by 2. */
 
 		sm501_freq = (sm501_select_clock(2 * req_freq, &to, 3) / 2);
@@ -648,7 +648,7 @@ unsigned long sm501_find_clock(struct device *dev,
 			       unsigned long req_freq)
 {
 	struct sm501_devdata *sm = dev_get_drvdata(dev);
-	unsigned long sm501_freq; /* the frequency achiveable by the 501 */
+	unsigned long sm501_freq; /* the frequency achieveable by the 501 */
 	struct sm501_clock to;
 
 	switch (clksrc) {
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 60a2b69e54f5..16cc91c827c9 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -4,7 +4,7 @@
  *  This is a driver for the SDHC controller found in Freescale MX2/MX3
  *  SoCs. It is basically the same hardware as found on MX1 (imxmmc.c).
  *  Unlike the hardware found on MX1, this hardware just works and does
- *  not need all the quirks found in imxmmc.c, hence the seperate driver.
+ *  not need all the quirks found in imxmmc.c, hence the separate driver.
  *
  *  Copyright (C) 2008 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
  *  Copyright (C) 2006 Pavel Pisa, PiKRON <ppisa@pikron.com>
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index 1bec5e1ce6ac..8db1148dfa47 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -226,7 +226,7 @@ struct unlock_addr {
  * exists, but is for MTD_UADDR_NOT_SUPPORTED - and, therefore,
  * should not be used.  The  problem is that structures with
  * initializers have extra fields initialized to 0.  It is _very_
- * desireable to have the unlock address entries for unsupported
+ * desirable to have the unlock address entries for unsupported
  * data widths automatically initialized - that means that
  * MTD_UADDR_NOT_SUPPORTED must be 0 and the first entry here
  * must go unused.
diff --git a/drivers/mtd/nand/bcm_umi_nand.c b/drivers/mtd/nand/bcm_umi_nand.c
index 087bcd745bb7..7d1cca7a31a9 100644
--- a/drivers/mtd/nand/bcm_umi_nand.c
+++ b/drivers/mtd/nand/bcm_umi_nand.c
@@ -381,7 +381,7 @@ static int __devinit bcm_umi_nand_probe(struct platform_device *pdev)
 	if (!r)
 		return -ENXIO;
 
-	/* map physical adress */
+	/* map physical address */
 	bcm_umi_io_base = ioremap(r->start, r->end - r->start + 1);
 
 	if (!bcm_umi_io_base) {
@@ -525,7 +525,7 @@ static int bcm_umi_nand_remove(struct platform_device *pdev)
 	/* Release resources, unregister device */
 	nand_release(board_mtd);
 
-	/* unmap physical adress */
+	/* unmap physical address */
 	iounmap(bcm_umi_io_base);
 
 	/* Free the MTD device structure */
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 45dec5770da0..b2900d8406d3 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -507,7 +507,7 @@ static void mxc_do_addr_cycle(struct mtd_info *mtd, int column, int page_addr)
 		 * MXC NANDFC can only perform full page+spare or
 		 * spare-only read/write.  When the upper layers
 		 * layers perform a read/write buf operation,
-		 * we will used the saved column adress to index into
+		 * we will used the saved column address to index into
 		 * the full page.
 		 */
 		send_addr(host, 0, page_addr == -1);
diff --git a/drivers/net/atlx/atl2.h b/drivers/net/atlx/atl2.h
index d918bbe621ea..927e4de6474d 100644
--- a/drivers/net/atlx/atl2.h
+++ b/drivers/net/atlx/atl2.h
@@ -442,7 +442,7 @@ struct atl2_hw {
 struct atl2_ring_header {
     /* pointer to the descriptor ring memory */
     void *desc;
-    /* physical adress of the descriptor ring */
+    /* physical address of the descriptor ring */
     dma_addr_t dma;
     /* length of descriptor ring in bytes */
     unsigned int size;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 109d2783e4d8..bef02330464d 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -248,7 +248,7 @@ static void restart_sched(unsigned long);
  *
  * Interrupts are handled by a single CPU and it is likely that on a MP system
  * the application is migrated to another CPU. In that scenario, we try to
- * seperate the RX(in irq context) and TX state in order to decrease memory
+ * separate the RX(in irq context) and TX state in order to decrease memory
  * contention.
  */
 struct sge {
diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c
index 02d67d047d96..dc4eb87309c5 100644
--- a/drivers/net/e1000e/82571.c
+++ b/drivers/net/e1000e/82571.c
@@ -1363,7 +1363,7 @@ static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw)
  *
  *  1) down
  *  2) autoneg_progress
- *  3) autoneg_complete (the link sucessfully autonegotiated)
+ *  3) autoneg_complete (the link successfully autonegotiated)
  *  4) forced_up (the link has been forced up, it did not autonegotiate)
  *
  **/
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c
index 2fa9b36a2c5a..3af0b1b82832 100644
--- a/drivers/net/e1000e/lib.c
+++ b/drivers/net/e1000e/lib.c
@@ -587,7 +587,7 @@ s32 e1000e_check_for_serdes_link(struct e1000_hw *hw)
 				if (!(rxcw & E1000_RXCW_IV)) {
 					mac->serdes_has_link = true;
 					e_dbg("SERDES: Link up - autoneg "
-					   "completed sucessfully.\n");
+					   "completed successfully.\n");
 				} else {
 					mac->serdes_has_link = false;
 					e_dbg("SERDES: Link down - invalid"
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 997124d2992a..e2ce8f8a21a7 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -674,7 +674,7 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter)
 	/* start with one vector for every rx queue */
 	numvecs = adapter->num_rx_queues;
 
-	/* if tx handler is seperate add 1 for every tx queue */
+	/* if tx handler is separate add 1 for every tx queue */
 	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
 		numvecs += adapter->num_tx_queues;
 
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c
index c412e8026173..1dcdce0631aa 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/net/irda/sa1100_ir.c
@@ -331,7 +331,7 @@ static int sa1100_irda_resume(struct platform_device *pdev)
 		 * If we missed a speed change, initialise at the new speed
 		 * directly.  It is debatable whether this is actually
 		 * required, but in the interests of continuing from where
-		 * we left off it is desireable.  The converse argument is
+		 * we left off it is desirable.  The converse argument is
 		 * that we should re-negotiate at 9600 baud again.
 		 */
 		if (si->newspeed) {
diff --git a/drivers/net/qlge/qlge_ethtool.c b/drivers/net/qlge/qlge_ethtool.c
index 058fa0a48c6f..b8d21ab212c6 100644
--- a/drivers/net/qlge/qlge_ethtool.c
+++ b/drivers/net/qlge/qlge_ethtool.c
@@ -402,7 +402,7 @@ static int ql_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 		u32 wol = 0;
 		status = ql_mb_wol_mode(qdev, wol);
 		QPRINTK(qdev, DRV, ERR, "WOL %s (wol code 0x%x) on %s\n",
-			(status == 0) ? "cleared sucessfully" : "clear failed",
+			(status == 0) ? "cleared successfully" : "clear failed",
 			wol, qdev->ndev->name);
 	}
 
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 894a7c84faef..a35845b48ea4 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -3517,7 +3517,7 @@ int ql_wol(struct ql_adapter *qdev)
 		wol |= MB_WOL_MODE_ON;
 		status = ql_mb_wol_mode(qdev, wol);
 		QPRINTK(qdev, DRV, ERR, "WOL %s (wol code 0x%x) on %s\n",
-			(status == 0) ? "Sucessfully set" : "Failed", wol,
+			(status == 0) ? "Successfully set" : "Failed", wol,
 			qdev->ndev->name);
 	}
 
diff --git a/drivers/net/sfc/regs.h b/drivers/net/sfc/regs.h
index 89d606fe9248..18a3be428348 100644
--- a/drivers/net/sfc/regs.h
+++ b/drivers/net/sfc/regs.h
@@ -95,7 +95,7 @@
 #define	FRF_AA_INT_ACK_KER_FIELD_LBN 0
 #define	FRF_AA_INT_ACK_KER_FIELD_WIDTH 32
 
-/* INT_ISR0_REG: Function 0 Interrupt Acknowlege Status register */
+/* INT_ISR0_REG: Function 0 Interrupt Acknowledge Status register */
 #define	FR_BZ_INT_ISR0 0x00000090
 #define	FRF_BZ_INT_ISR_REG_LBN 0
 #define	FRF_BZ_INT_ISR_REG_WIDTH 64
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index 12f0f5d74e3c..2ae1972bcb46 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -1348,7 +1348,7 @@ static int smsc9420_open(struct net_device *dev)
 
 	netif_carrier_off(dev);
 
-	/* disable, mask and acknowlege all interrupts */
+	/* disable, mask and acknowledge all interrupts */
 	spin_lock_irqsave(&pd->int_lock, flags);
 	int_cfg = smsc9420_reg_read(pd, INT_CFG) & (~INT_CFG_IRQ_EN_);
 	smsc9420_reg_write(pd, INT_CFG, int_cfg);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 218524857bfc..839b1f065d3c 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -474,7 +474,7 @@ spider_net_prepare_rx_descr(struct spider_net_card *card,
  * spider_net_enable_rxchtails - sets RX dmac chain tail addresses
  * @card: card structure
  *
- * spider_net_enable_rxchtails sets the RX DMAC chain tail adresses in the
+ * spider_net_enable_rxchtails sets the RX DMAC chain tail addresses in the
  * chip by writing to the appropriate register. DMA is enabled in
  * spider_net_enable_rxdmac.
  */
@@ -1820,7 +1820,7 @@ spider_net_enable_card(struct spider_net_card *card)
 
 	spider_net_write_reg(card, SPIDER_NET_ECMODE, SPIDER_NET_ECMODE_VALUE);
 
-	/* set chain tail adress for RX chains and
+	/* set chain tail address for RX chains and
 	 * enable DMA */
 	spider_net_enable_rxchtails(card);
 	spider_net_enable_rxdmac(card);
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index b571a1babab9..a88fcb39ba15 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -782,7 +782,7 @@ static int gem_rx(struct gem *gp, int work_to_do)
 			break;
 
 		/* When writing back RX descriptor, GEM writes status
-		 * then buffer address, possibly in seperate transactions.
+		 * then buffer address, possibly in separate transactions.
 		 * If we don't wait for the chip to write both, we could
 		 * post a new buffer to this descriptor then have GEM spam
 		 * on the buffer address.  We sync on the RX completion
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 80b404f2b938..ed4e9c42935c 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -1857,7 +1857,7 @@ static void bdx_tx_push_desc(struct bdx_priv *priv, void *data, int size)
  * @data - desc's data
  * @size - desc's size
  *
- * NOTE: this func does check for available space and, if neccessary, waits for
+ * NOTE: this func does check for available space and, if necessary, waits for
  *   NIC to read existing data before writing new one.
  */
 static void bdx_tx_push_desc_safe(struct bdx_priv *priv, void *data, int size)
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index e3c42f5ac4a9..4e4c402319c9 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -693,7 +693,7 @@ static netdev_tx_t tms380tr_hardware_send_packet(struct sk_buff *skb,
  * NOTE: This function should be used whenever the status of any TPL must be
  * modified by the driver, because the compiler may otherwise change the
  * order of instructions such that writing the TPL status may be executed at
- * an undesireable time. When this function is used, the status is always
+ * an undesirable time. When this function is used, the status is always
  * written when the function is called.
  */
 static void tms380tr_write_tpl_status(TPL *tpl, unsigned int Status)
@@ -2266,7 +2266,7 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
  * This function should be used whenever the status of any RPL must be
  * modified by the driver, because the compiler may otherwise change the
  * order of instructions such that writing the RPL status may be executed
- * at an undesireable time. When this function is used, the status is
+ * at an undesirable time. When this function is used, the status is
  * always written when the function is called.
  */
 static void tms380tr_write_rpl_status(RPL *rpl, unsigned int Status)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2834a01bae24..e572ecc09a44 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1365,7 +1365,7 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 
 		__tun_detach(tun);
 
-		/* If desireable, unregister the netdevice. */
+		/* If desirable, unregister the netdevice. */
 		if (!(tun->flags & TUN_PERSIST)) {
 			rtnl_lock();
 			if (dev->reg_state == NETREG_REGISTERED)
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index eb8fe7e16c6c..8eec97799e05 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -429,7 +429,7 @@ static void hw_add_addr_in_hash(struct ucc_geth_private *ugeth,
 	    ucc_fast_get_qe_cr_subblock(ugeth->ug_info->uf_info.ucc_num);
 
 	/* Ethernet frames are defined in Little Endian mode,
-	therefor to insert */
+	therefore to insert */
 	/* the address to the hash (Big Endian mode), we reverse the bytes.*/
 
 	set_mac_addr(&p_82xx_addr_filt->taddr.h, p_enet_addr);
diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index 64cdfeb299ca..6c459f5cb5df 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c
@@ -612,7 +612,7 @@ ssize_t i2400m_bm_cmd(struct i2400m *i2400m,
 		goto error_wait_for_ack;
 	}
 	rx_bytes = result;
-	/* verify the ack and read more if neccessary [result is the
+	/* verify the ack and read more if necessary [result is the
 	 * final amount of bytes we get in the ack]  */
 	result = __i2400m_bm_ack_verify(i2400m, opcode, ack, ack_size, flags);
 	if (result < 0)
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index 04df9bbe340f..820b128705ec 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -627,7 +627,7 @@ enum i2400m_bm_cmd_flags {
  * @I2400M_BRI_NO_REBOOT: Do not reboot the device and proceed
  *     directly to wait for a reboot barker from the device.
  * @I2400M_BRI_MAC_REINIT: We need to reinitialize the boot
- *     rom after reading the MAC adress. This is quite a dirty hack,
+ *     rom after reading the MAC address. This is quite a dirty hack,
  *     if you ask me -- the device requires the bootrom to be
  *     intialized after reading the MAC address.
  */
diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c
index 76a50ac02ebb..14f876b1358b 100644
--- a/drivers/net/wimax/i2400m/sdio.c
+++ b/drivers/net/wimax/i2400m/sdio.c
@@ -304,7 +304,7 @@ error_kzalloc:
  *
  * The device will be fully reset internally, but won't be
  * disconnected from the bus (so no reenumeration will
- * happen). Firmware upload will be neccessary.
+ * happen). Firmware upload will be necessary.
  *
  * The device will send a reboot barker that will trigger the driver
  * to reinitialize the state via __i2400m_dev_reset_handle.
@@ -314,7 +314,7 @@ error_kzalloc:
  *
  * The device will be fully reset internally, disconnected from the
  * bus an a reenumeration will happen. Firmware upload will be
- * neccessary. Thus, we don't do any locking or struct
+ * necessary. Thus, we don't do any locking or struct
  * reinitialization, as we are going to be fully disconnected and
  * reenumerated.
  *
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index 98f4f8c5fb68..99f04c475898 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -246,7 +246,7 @@ error_kzalloc:
  *
  * The device will be fully reset internally, but won't be
  * disconnected from the USB bus (so no reenumeration will
- * happen). Firmware upload will be neccessary.
+ * happen). Firmware upload will be necessary.
  *
  * The device will send a reboot barker in the notification endpoint
  * that will trigger the driver to reinitialize the state
@@ -257,7 +257,7 @@ error_kzalloc:
  *
  * The device will be fully reset internally, disconnected from the
  * USB bus an a reenumeration will happen. Firmware upload will be
- * neccessary. Thus, we don't do any locking or struct
+ * necessary. Thus, we don't do any locking or struct
  * reinitialization, as we are going to be fully disconnected and
  * reenumerated.
  *
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index f9d6db8d013e..46a1e19c6787 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -2535,7 +2535,7 @@ void *ar9170_alloc(size_t priv_size)
 	/*
 	 * this buffer is used for rx stream reconstruction.
 	 * Under heavy load this device (or the transport layer?)
-	 * tends to split the streams into seperate rx descriptors.
+	 * tends to split the streams into separate rx descriptors.
 	 */
 
 	skb = __dev_alloc_skb(AR9170_MAX_RX_BUFFER_SIZE, GFP_KERNEL);
diff --git a/drivers/net/wireless/iwmc3200wifi/lmac.h b/drivers/net/wireless/iwmc3200wifi/lmac.h
index a3a79b5e2898..a855a99e49b8 100644
--- a/drivers/net/wireless/iwmc3200wifi/lmac.h
+++ b/drivers/net/wireless/iwmc3200wifi/lmac.h
@@ -262,7 +262,7 @@ struct iwm_ct_kill_cfg_cmd {
 
 /* Power Management */
 #define POWER_TABLE_CMD			0x77
-#define SAVE_RESTORE_ADRESS_CMD		0x78
+#define SAVE_RESTORE_ADDRESS_CMD		0x78
 #define REPLY_WATERMARK_CMD		0x79
 #define PM_DEBUG_STATISTIC_NOTIFIC	0x7B
 #define PD_FLUSH_N_NOTIFICATION		0x7C
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 83f2592c59de..486c93559c29 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -368,7 +368,7 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev,
 
 		/*
 		 * The encryption key doesn't fit within the CSR cache,
-		 * this means we should allocate it seperately and use
+		 * this means we should allocate it separately and use
 		 * rt2x00usb_vendor_request() to send the key to the hardware.
 		 */
 		reg = KEY_ENTRY(key->hw_key_idx);
@@ -382,7 +382,7 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev,
 		/*
 		 * The driver does not support the IV/EIV generation
 		 * in hardware. However it demands the data to be provided
-		 * both seperately as well as inside the frame.
+		 * both separately as well as inside the frame.
 		 * We already provided the CONFIG_CRYPTO_COPY_IV to rt2x00lib
 		 * to ensure rt2x00lib will not strip the data from the
 		 * frame after the copy, now we must tell mac80211
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index ab95346cf6a3..2e5c8a13758b 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -100,7 +100,7 @@ static int rt2800usb_check_firmware(struct rt2x00_dev *rt2x00dev,
 	 * There are 2 variations of the rt2870 firmware.
 	 * a) size: 4kb
 	 * b) size: 8kb
-	 * Note that (b) contains 2 seperate firmware blobs of 4k
+	 * Note that (b) contains 2 separate firmware blobs of 4k
 	 * within the file. The first blob is the same firmware as (a),
 	 * but the second blob is for the additional chipsets.
 	 */
@@ -118,7 +118,7 @@ static int rt2800usb_check_firmware(struct rt2x00_dev *rt2x00dev,
 
 	/*
 	 * 8kb firmware files must be checked as if it were
-	 * 2 seperate firmware files.
+	 * 2 separate firmware files.
 	 */
 	while (offset < len) {
 		if (!rt2800usb_check_crc(data + offset, 4096))
diff --git a/drivers/net/wireless/rt2x00/rt2x00debug.c b/drivers/net/wireless/rt2x00/rt2x00debug.c
index 7d323a763b54..afee806affc2 100644
--- a/drivers/net/wireless/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/rt2x00/rt2x00debug.c
@@ -109,7 +109,7 @@ struct rt2x00debug_intf {
 
 	/*
 	 * HW crypto statistics.
-	 * All statistics are stored seperately per cipher type.
+	 * All statistics are stored separately per cipher type.
 	 */
 	struct rt2x00debug_crypto crypto_stats[CIPHER_MAX];
 
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 265e66dba552..5e1d5167fff4 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -397,7 +397,7 @@ void rt2x00lib_rxdone(struct rt2x00_dev *rt2x00dev,
 	/*
 	 * Hardware might have stripped the IV/EIV/ICV data,
 	 * in that case it is possible that the data was
-	 * provided seperately (through hardware descriptor)
+	 * provided separately (through hardware descriptor)
 	 * in which case we should reinsert the data into the frame.
 	 */
 	if ((rxdesc.dev_flags & RXDONE_CRYPTO_IV) &&
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index 9915a09141ef..38ffca9b0fe7 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -502,7 +502,7 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb,
 	/*
 	 * When hardware encryption is supported, and this frame
 	 * is to be encrypted, we should strip the IV/EIV data from
-	 * the frame so we can provide it to the driver seperately.
+	 * the frame so we can provide it to the driver separately.
 	 */
 	if (test_bit(ENTRY_TXD_ENCRYPT, &txdesc.flags) &&
 	    !test_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc.flags)) {
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index 0ca589306d71..99459db61efd 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -476,7 +476,7 @@ static int rt61pci_config_pairwise_key(struct rt2x00_dev *rt2x00dev,
 		 * The driver does not support the IV/EIV generation
 		 * in hardware. However it doesn't support the IV/EIV
 		 * inside the ieee80211 frame either, but requires it
-		 * to be provided seperately for the descriptor.
+		 * to be provided separately for the descriptor.
 		 * rt2x00lib will cut the IV/EIV data out of all frames
 		 * given to us by mac80211, but we must tell mac80211
 		 * to generate the IV/EIV data.
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index ced3b6ab5e16..527368a45fd5 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -339,7 +339,7 @@ static int rt73usb_config_shared_key(struct rt2x00_dev *rt2x00dev,
 		 * The driver does not support the IV/EIV generation
 		 * in hardware. However it doesn't support the IV/EIV
 		 * inside the ieee80211 frame either, but requires it
-		 * to be provided seperately for the descriptor.
+		 * to be provided separately for the descriptor.
 		 * rt2x00lib will cut the IV/EIV data out of all frames
 		 * given to us by mac80211, but we must tell mac80211
 		 * to generate the IV/EIV data.
@@ -439,7 +439,7 @@ static int rt73usb_config_pairwise_key(struct rt2x00_dev *rt2x00dev,
 		 * The driver does not support the IV/EIV generation
 		 * in hardware. However it doesn't support the IV/EIV
 		 * inside the ieee80211 frame either, but requires it
-		 * to be provided seperately for the descriptor.
+		 * to be provided separately for the descriptor.
 		 * rt2x00lib will cut the IV/EIV data out of all frames
 		 * given to us by mac80211, but we must tell mac80211
 		 * to generate the IV/EIV data.
@@ -1665,7 +1665,7 @@ static void rt73usb_fill_rxdone(struct queue_entry *entry,
 
 		/*
 		 * Hardware has stripped IV/EIV data from 802.11 frame during
-		 * decryption. It has provided the data seperately but rt2x00lib
+		 * decryption. It has provided the data separately but rt2x00lib
 		 * should decide if it should be reinserted.
 		 */
 		rxdesc->flags |= RX_FLAG_IV_STRIPPED;
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 62ddf5202b79..2a4c566456e7 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -373,7 +373,7 @@ raw3270_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
 		rq->rc = ccw_device_start(rp->cdev, &rq->ccw,
 					  (unsigned long) rq, 0, 0);
 		if (rq->rc == 0)
-			return;	/* Sucessfully restarted. */
+			return;	/* Successfully restarted. */
 		break;
 	case RAW3270_IO_STOP:
 		if (!rq)
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index ec88c59842e3..f6d72e1f2a38 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -196,7 +196,7 @@ __sclp_start_request(struct sclp_req *req)
 	req->start_count++;
 
 	if (rc == 0) {
-		/* Sucessfully started request */
+		/* Successfully started request */
 		req->status = SCLP_REQ_RUNNING;
 		sclp_running_state = sclp_running_state_running;
 		__sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ,
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index 208d6df9ed59..ff5716d5f044 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -492,7 +492,7 @@ static void init_alloc_map(struct orc_host * host)
  *	init_orchid		-	initialise the host adapter
  *	@host:host adapter to initialise
  *
- *	Initialise the controller and if neccessary load the firmware.
+ *	Initialise the controller and if necessary load the firmware.
  *
  *	Returns -1 if the initialisation fails.
  */
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c
index 89a59484be02..a7714160fbc3 100644
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -531,7 +531,7 @@ static void initio_read_eeprom(unsigned long base)
  *	initio_stop_bm		-	stop bus master
  *	@host: InitIO we are stopping
  *
- *	Stop any pending DMA operation, aborting the DMA if neccessary
+ *	Stop any pending DMA operation, aborting the DMA if necessary
  */
 
 static void initio_stop_bm(struct initio_host * host)
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 96ee599d9a05..96446a85e008 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -48,7 +48,7 @@ struct kmem_cache *scsi_pkt_cachep;
 #define FC_SRB_CMD_SENT		(1 << 0)	/* cmd has been sent */
 #define FC_SRB_RCV_STATUS	(1 << 1)	/* response has arrived */
 #define FC_SRB_ABORT_PENDING	(1 << 2)	/* cmd abort sent to device */
-#define FC_SRB_ABORTED		(1 << 3)	/* abort acknowleged */
+#define FC_SRB_ABORTED		(1 << 3)	/* abort acknowledged */
 #define FC_SRB_DISCONTIG	(1 << 4)	/* non-sequential data recvd */
 #define FC_SRB_COMPL		(1 << 5)	/* fc_io_compl has been run */
 #define FC_SRB_FCP_PROCESSING_TMO (1 << 6)	/* timer function processing */
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 2cc39684ce97..c898f47f30ba 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -969,7 +969,7 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
  * function returns, it does not guarantee all the IOCBs are actually aborted.
  *
  * Return code
- *   0 - Sucessfully issued abort iocb on all outstanding flogis (Always 0)
+ *   0 - Successfully issued abort iocb on all outstanding flogis (Always 0)
  **/
 int
 lpfc_els_abort_flogi(struct lpfc_hba *phba)
@@ -3117,7 +3117,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	if (ndlp && NLP_CHK_NODE_ACT(ndlp) &&
 	    (*((uint32_t *) (pcmd)) == ELS_CMD_LS_RJT)) {
 		/* A LS_RJT associated with Default RPI cleanup has its own
-		 * seperate code path.
+		 * separate code path.
 		 */
 		if (!(ndlp->nlp_flag & NLP_RM_DFLT_RPI))
 			ls_rjt = 1;
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index 7db28cd49446..8c61a4fe1db9 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -187,7 +187,7 @@
 #define S_IO		BIT(1)    /* Input/Output line from SCSI bus */
 #define S_CD		BIT(2)    /* Command/Data line from SCSI bus */
 #define S_BUSY		BIT(3)    /* Busy line from SCSI bus         */
-#define S_ACK		BIT(4)    /* Acknowlege line from SCSI bus   */
+#define S_ACK		BIT(4)    /* Acknowledge line from SCSI bus  */
 #define S_REQUEST	BIT(5)    /* Request line from SCSI bus      */
 #define S_SELECT	BIT(6)	  /*                                 */
 #define S_ATN		BIT(7)	  /*                                 */
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 9b44c6f1b10e..7985ae45d688 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -2924,7 +2924,7 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		break;
 	default:
 		PM8001_MSG_DBG(pm8001_ha,
-			pm8001_printk("unkown device type(%x)\n", deviceType));
+			pm8001_printk("unknown device type(%x)\n", deviceType));
 		break;
 	}
 	phy->phy_type |= PORT_TYPE_SAS;
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 7f9c83a76390..3b2c98fba834 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -600,7 +600,7 @@ static void pm8001_free_dev(struct pm8001_device *pm8001_dev)
   * by the command "OPC_INB_REG_DEV", after that the HBA will assign a
   * device ID(according to device's sas address) and returned it to LLDD. From
   * now on, we communicate with HBA FW with the device ID which HBA assigned
-  * rather than sas address. it is the neccessary step for our HBA but it is
+  * rather than sas address. it is the necessary step for our HBA but it is
   * the optional for other HBA driver.
   */
 static int pm8001_dev_found_notify(struct domain_device *dev)
diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h
index 92f89d50850c..b8ad07c3449e 100644
--- a/drivers/scsi/pmcraid.h
+++ b/drivers/scsi/pmcraid.h
@@ -938,7 +938,7 @@ static struct pmcraid_ioasc_error pmcraid_ioasc_error_table[] = {
 
 /*
  * pmcraid_ioctl_header - definition of header structure that preceeds all the
- * buffers given as ioctl arguements.
+ * buffers given as ioctl arguments.
  *
  * .signature           : always ASCII string, "PMCRAID"
  * .reserved            : not used
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 255da53e5a01..5d94772d449d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2105,7 +2105,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
  *	which is followed by sdaaa.
  *
  *	This is basically 26 base counting with one extra 'nil' entry
- *	at the beggining from the second digit on and can be
+ *	at the beginning from the second digit on and can be
  *	determined using similar method as 26 base conversion with the
  *	index shifted -1 after each digit is computed.
  *
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index c010733877ae..1fabede9e061 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -275,7 +275,7 @@ static inline u32 ack_bit(unsigned int irq)
  * Claim the FIQ handler (only one can be active at any one time) and
  * then setup the correct transfer code for this transfer.
  *
- * This call updates all the necessary state information if sucessful,
+ * This call updates all the necessary state information if successful,
  * so the caller does not need to do anything more than start the transfer
  * as normal, since the IRQ will have been re-routed to the FIQ handler.
 */
diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h
index 473a94ef905f..7c14d5c5a8ac 100644
--- a/drivers/usb/musb/musb_regs.h
+++ b/drivers/usb/musb/musb_regs.h
@@ -436,7 +436,7 @@ static inline void  musb_write_txhubport(void __iomem *mbase, u8 epnum,
 #define MUSB_FLAT_OFFSET(_epnum, _offset)	\
 	(USB_OFFSET(USB_EP_NI0_TXMAXP) + (0x40 * (_epnum)) + (_offset))
 
-/* Not implemented - HW has seperate Tx/Rx FIFO */
+/* Not implemented - HW has separate Tx/Rx FIFO */
 #define MUSB_TXCSR_MODE			0x0000
 
 static inline void musb_write_txfifosz(void __iomem *mbase, u8 c_size)
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index a591ebec0f89..52a81a312b86 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -154,7 +154,7 @@ struct cypress_private {
 	int isthrottled;		   /* if throttled, discard reads */
 	wait_queue_head_t delta_msr_wait;  /* used for TIOCMIWAIT */
 	char prev_status, diff_status;	   /* used for TIOCMIWAIT */
-	/* we pass a pointer to this as the arguement sent to
+	/* we pass a pointer to this as the argument sent to
 	   cypress_set_termios old_termios */
 	struct ktermios tmp_termios; 	   /* stores the old termios settings */
 };
diff --git a/drivers/video/omap/lcdc.c b/drivers/video/omap/lcdc.c
index a33483910dc8..9557f963662e 100644
--- a/drivers/video/omap/lcdc.c
+++ b/drivers/video/omap/lcdc.c
@@ -389,7 +389,7 @@ static int omap_lcdc_enable_plane(int plane, int enable)
 /*
  * Configure the LCD DMA for a palette load operation and do the palette
  * downloading synchronously. We don't use the frame+palette load mode of
- * the controller, since the palette can always be downloaded seperately.
+ * the controller, since the palette can always be downloaded separately.
  */
 static void load_palette(void)
 {
diff --git a/drivers/video/s1d13xxxfb.c b/drivers/video/s1d13xxxfb.c
index 0deb0a8867b7..7b63429f1a7c 100644
--- a/drivers/video/s1d13xxxfb.c
+++ b/drivers/video/s1d13xxxfb.c
@@ -517,12 +517,12 @@ s1d13xxxfb_bitblt_copyarea(struct fb_info *info, const struct fb_copyarea *area)
 		src = (sy * stride) + (bpp * sx);
 	}
 
-	/* set source adress */
+	/* set source address */
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START0, (src & 0xff));
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START1, (src >> 8) & 0x00ff);
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START2, (src >> 16) & 0x00ff);
 
-	/* set destination adress */
+	/* set destination address */
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START0, (dst & 0xff));
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START1, (dst >> 8) & 0x00ff);
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START2, (dst >> 16) & 0x00ff);
diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index 35370d0ecf03..b7dc1800efa9 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -411,7 +411,7 @@ static int sm501fb_set_par_common(struct fb_info *info,
 	struct sm501fb_par  *par = info->par;
 	struct sm501fb_info *fbi = par->info;
 	unsigned long pixclock;      /* pixelclock in Hz */
-	unsigned long sm501pixclock; /* pixelclock the 501 can achive in Hz */
+	unsigned long sm501pixclock; /* pixelclock the 501 can achieve in Hz */
 	unsigned int mem_type;
 	unsigned int clock_type;
 	unsigned int head_addr;
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index dc5ef14bdc1c..8306d53307ed 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -128,7 +128,7 @@ err_range:
 /*
  * Allocate a block in the given allocation zone.
  * Since we have to byte-swap the bitmap on little-endian
- * machines, this is rather expensive. Therefor we will
+ * machines, this is rather expensive. Therefore we will
  * preallocate up to 16 blocks from the same word, if
  * possible. We are not doing preallocations in the
  * header zone, though.
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 18d77297ccc8..364fcfc0c5df 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1393,7 +1393,7 @@ static inline void fill_note(struct memelfnote *note, const char *name, int type
 
 /*
  * fill up all the fields in prstatus from the given task struct, except
- * registers which need to be filled up seperately.
+ * registers which need to be filled up separately.
  */
 static void fill_prstatus(struct elf_prstatus *prstatus,
 			  struct task_struct *p, long signr)
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index b44ce0a0711c..b1d61d0bdfc7 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -54,7 +54,7 @@ void cifs_dfs_release_automount_timer(void)
  * Extracts sharename form full UNC.
  * i.e. strips from UNC trailing path that is not part of share
  * name and fixup missing '\' in the begining of DFS node refferal
- * if neccessary.
+ * if necessary.
  * Returns pointer to share name on success or ERR_PTR on error.
  * Caller is responsible for freeing returned string.
  */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 941441d3e386..0e22440d2f0f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3886,7 +3886,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
 		goto parse_DFS_referrals_exit;
 	}
 
-	/* collect neccessary data from referrals */
+	/* collect necessary data from referrals */
 	for (i = 0; i < *num_of_nodes; i++) {
 		char *temp;
 		int max_len;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 82c415be87a4..12a9ec73a888 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -928,7 +928,7 @@ out2:
 }
 
 /**
- * mext_check_argumants - Check whether move extent can be done
+ * mext_check_arguments - Check whether move extent can be done
  *
  * @orig_inode:		original inode
  * @donor_inode:	donor inode
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce2b24b..ec14d19ce501 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -850,7 +850,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	req->in.args[0].size = sizeof(*arg);
 	req->in.args[0].value = arg;
 	req->out.numargs = 1;
-	/* Variable length arguement used for backward compatibility
+	/* Variable length argument used for backward compatibility
 	   with interface version < 7.5.  Rest of init_out is zeroed
 	   by do_get_request(), so a short reply is not a problem */
 	req->out.argvar = 1;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index edfee24f3636..0556f7fededd 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -992,7 +992,7 @@ static const struct lm_lockops nolock_ops = {
 /**
  * gfs2_lm_mount - mount a locking protocol
  * @sdp: the filesystem
- * @args: mount arguements
+ * @args: mount arguments
  * @silent: if 1, don't complain if the FS isn't a GFS2 fs
  *
  * Returns: errno
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 006f9ad838a2..57ae203c8abf 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1398,7 +1398,7 @@ int journal_stop(handle_t *handle)
 	 * the case where our storage is so fast that it is more optimal to go
 	 * ahead and force a flush and wait for the transaction to be committed
 	 * than it is to wait for an arbitrary amount of time for new writers to
-	 * join the transaction.  We acheive this by measuring how long it takes
+	 * join the transaction.  We achieve this by measuring how long it takes
 	 * to commit a transaction, and compare it with how long this
 	 * transaction has been running, and if run time < commit time then we
 	 * sleep for the delta and commit.  This greatly helps super fast disks
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a8587e90fd5a..143d43a93b72 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1528,7 +1528,7 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
 	} } while (0);
 
 /* Encode as an array of strings the string given with components
- * seperated @sep.
+ * separated @sep.
  */
 static __be32 nfsd4_encode_components(char sep, char *components,
 				   __be32 **pp, int *buflen)
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c5e4a49e3a12..ccb9c44f478d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1757,7 +1757,7 @@ out:
  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
  * flock() calls. The locking approach this requires is sufficiently
  * different from all other cluster lock types that we implement a
- * seperate path to the "low-level" dlm calls. In particular:
+ * separate path to the "low-level" dlm calls. In particular:
  *
  * - No optimization of lock levels is done - we take at exactly
  *   what's been requested.
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index d35a27f4523e..83e9b1249aed 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -453,7 +453,7 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 	if (i == -1) {
 		/*
 		 * Holes can be larger than the maximum size of an
-		 * extent, so we return their lengths in a seperate
+		 * extent, so we return their lengths in a separate
 		 * field.
 		 */
 		if (hole_len) {
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 65c872761177..ecc04b5ede57 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -169,7 +169,7 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 			return 0;	// No free blocks in this bitmap
 		}
 
-		/* search for a first zero bit -- beggining of a window */
+		/* search for a first zero bit -- beginning of a window */
 		*beg = reiserfs_find_next_zero_le_bit
 		    ((unsigned long *)(bh->b_data), boundary, *beg);
 
diff --git a/include/linux/hil.h b/include/linux/hil.h
index 13352d7d0caf..523785a9de70 100644
--- a/include/linux/hil.h
+++ b/include/linux/hil.h
@@ -168,14 +168,14 @@ enum hil_command {
 	HIL_CMD_PR6	= 0x45,	/* Prompt6 */
 	HIL_CMD_PR7	= 0x46,	/* Prompt7 */
 	HIL_CMD_PRM	= 0x47,	/* Prompt (General Purpose) */
-	HIL_CMD_AK1	= 0x48,	/* Acknowlege1 */  
-	HIL_CMD_AK2	= 0x49,	/* Acknowlege2 */
-	HIL_CMD_AK3	= 0x4a,	/* Acknowlege3 */
-	HIL_CMD_AK4	= 0x4b,	/* Acknowlege4 */
-	HIL_CMD_AK5	= 0x4c,	/* Acknowlege5 */
-	HIL_CMD_AK6	= 0x4d,	/* Acknowlege6 */
-	HIL_CMD_AK7	= 0x4e,	/* Acknowlege7 */
-	HIL_CMD_ACK	= 0x4f,	/* Acknowlege (General Purpose) */
+	HIL_CMD_AK1	= 0x48,	/* Acknowledge1 */  
+	HIL_CMD_AK2	= 0x49,	/* Acknowledge2 */
+	HIL_CMD_AK3	= 0x4a,	/* Acknowledge3 */
+	HIL_CMD_AK4	= 0x4b,	/* Acknowledge4 */
+	HIL_CMD_AK5	= 0x4c,	/* Acknowledge5 */
+	HIL_CMD_AK6	= 0x4d,	/* Acknowledge6 */
+	HIL_CMD_AK7	= 0x4e,	/* Acknowledge7 */
+	HIL_CMD_ACK	= 0x4f,	/* Acknowledge (General Purpose) */
 
 	/* 0x50 to 0x78 reserved for future use  */
 	/* 0x80 to 0xEF device-specific commands */
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 3a2b2d9b0472..de48d167568b 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -64,7 +64,7 @@ For crash recovery after replication node failure,
   usually the condition is softened to regions that _may_ have been target of
   in-flight WRITE IO, e.g. by only lazily clearing the on-disk write-intent
   bitmap, trading frequency of meta data transactions against amount of
-  (possibly unneccessary) resync traffic.
+  (possibly unnecessary) resync traffic.
 
   If we set a hard limit on the area that may be "hot" at any given time, we
   limit the amount of resync traffic needed for crash recovery.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index abdfacc58653..a70957b138ed 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1533,7 +1533,7 @@ struct task_struct {
 
 	struct list_head	*scm_work_list;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Index of current stored adress in ret_stack */
+	/* Index of current stored address in ret_stack */
 	int curr_ret_stack;
 	/* Stack of return addresses for return function tracing */
 	struct ftrace_ret_stack	*ret_stack;
diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h
index d863e5e8426d..4314a5f6a087 100644
--- a/include/media/davinci/vpfe_capture.h
+++ b/include/media/davinci/vpfe_capture.h
@@ -165,7 +165,7 @@ struct vpfe_device {
 	u8 started;
 	/*
 	 * offset where second field starts from the starting of the
-	 * buffer for field seperated YCbCr formats
+	 * buffer for field separated YCbCr formats
 	 */
 	u32 field_off;
 };
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8816a20c2597..aff48d657181 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -133,7 +133,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 }
 
 /* This function calculates a "timeout" which is equivalent to the timeout of a
- * TCP connection after "boundary" unsucessful, exponentially backed-off
+ * TCP connection after "boundary" unsuccessful, exponentially backed-off
  * retransmissions with an initial RTO of TCP_RTO_MIN.
  */
 static bool retransmits_timed_out(struct sock *sk,
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 0f7c6e6a4248..54e4c8bb23e7 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -743,7 +743,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		break;
 	default:
 		/* should not get here, PLINK_BLOCKED is dealt with at the
-		 * beggining of the function
+		 * beginning of the function
 		 */
 		spin_unlock_bh(&sta->lock);
 		break;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 023966b569bf..fbe94adee7ac 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -276,7 +276,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request);
  * tabs, spaces and continuation lines, which are treated as a single whitespace
  * character.
  *
- * Some headers may appear multiple times. A comma seperated list of values is
+ * Some headers may appear multiple times. A comma separated list of values is
  * equivalent to multiple headers.
  */
 static const struct sip_header ct_sip_hdrs[] = {
@@ -412,7 +412,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_get_header);
 
-/* Get next header field in a list of comma seperated values */
+/* Get next header field in a list of comma separated values */
 static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr,
 			      unsigned int dataoff, unsigned int datalen,
 			      enum sip_header_types type,
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index dd16e404424f..cbaac92dad59 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1,6 +1,6 @@
 /*
  *	xt_hashlimit - Netfilter module to limit the number of packets per time
- *	seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ *	separately for each hashbucket (sourceip/sourceport/dstip/dstport)
  *
  *	(C) 2003-2004 by Harald Welte <laforge@netfilter.org>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4e4ca65cd320..500886bda9b4 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -475,7 +475,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
 	 * used to provide an upper bound to this doubling operation.
 	 *
 	 * Special Case:  the first HB doesn't trigger exponential backoff.
-	 * The first unacknowleged HB triggers it.  We do this with a flag
+	 * The first unacknowledged HB triggers it.  We do this with a flag
 	 * that indicates that we have an outstanding HB.
 	 */
 	if (!is_hb || transport->hb_sent) {
diff --git a/scripts/gfp-translate b/scripts/gfp-translate
index 073cb6d152a0..d81b968d864e 100644
--- a/scripts/gfp-translate
+++ b/scripts/gfp-translate
@@ -19,7 +19,7 @@ usage() {
 	exit 0
 }
 
-# Parse command-line arguements
+# Parse command-line arguments
 while [ $# -gt 0 ]; do
 	case $1 in
 		--source)
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index a1b10d1a384d..db0ed1cbd982 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -2479,7 +2479,7 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol,
    on MADICARD 
   - playback mixer matrix: [channelout+64] [output] [value]
   - input(thru) mixer matrix: [channelin] [output] [value]
-  (better do 2 kontrols for seperation ?)
+  (better do 2 kontrols for separation ?)
 */
 
 #define HDSPM_MIXER(xname, xindex) \
diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c
index 341481e0e830..427614a2762b 100644
--- a/sound/soc/codecs/wm8990.c
+++ b/sound/soc/codecs/wm8990.c
@@ -990,7 +990,7 @@ static int wm8990_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id,
 		reg = snd_soc_read(codec, WM8990_CLOCKING_2);
 		snd_soc_write(codec, WM8990_CLOCKING_2, reg | WM8990_SYSCLK_SRC);
 
-		/* set up N , fractional mode and pre-divisor if neccessary */
+		/* set up N , fractional mode and pre-divisor if necessary */
 		snd_soc_write(codec, WM8990_PLL1, pll_div.n | WM8990_SDM |
 			(pll_div.div2?WM8990_PRESCALE:0));
 		snd_soc_write(codec, WM8990_PLL2, (u8)(pll_div.k>>8));
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index e8daf5ca6fd2..44408c2621cf 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -321,7 +321,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
 			new_depth_mask &= ~(1 << (depth - 1));
 
 		/*
-		 * But we keep the older depth mask for the line seperator
+		 * But we keep the older depth mask for the line separator
 		 * to keep the level link until we reach the last child
 		 */
 		ret += ipchain__fprintf_graph_line(fp, depth, depth_mask,
-- 
cgit v1.2.3


From fcdeb7fedf89f4bbc2e11959794968080cd8426e Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Fri, 29 Jan 2010 05:04:33 -0700
Subject: of: merge of_attach_node() & of_detach_node()

Merge common code between PowerPC and Microblaze

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/microblaze/include/asm/prom.h |  4 ---
 arch/microblaze/kernel/prom.c      | 59 -------------------------------
 arch/powerpc/include/asm/prom.h    |  4 ---
 arch/powerpc/kernel/prom.c         | 59 -------------------------------
 drivers/of/Kconfig                 |  4 +++
 drivers/of/base.c                  | 71 ++++++++++++++++++++++++++++++++++++++
 include/linux/of.h                 |  6 ++++
 7 files changed, 81 insertions(+), 126 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 07d1063f9aae..6c6b386cf3c6 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -39,10 +39,6 @@ extern struct device_node *of_chosen;
 
 extern rwlock_t devtree_lock;	/* temporary while merging */
 
-/* For updating the device tree at runtime */
-extern void of_attach_node(struct device_node *);
-extern void of_detach_node(struct device_node *);
-
 /* Other Prototypes */
 extern int early_uartlite_console(void);
 
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index cd158ef5b583..8171282a0b0d 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -197,65 +197,6 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 }
 EXPORT_SYMBOL(of_find_node_by_phandle);
 
-/*
- * Plug a device node into the tree and global list.
- */
-void of_attach_node(struct device_node *np)
-{
-	unsigned long flags;
-
-	write_lock_irqsave(&devtree_lock, flags);
-	np->sibling = np->parent->child;
-	np->allnext = allnodes;
-	np->parent->child = np;
-	allnodes = np;
-	write_unlock_irqrestore(&devtree_lock, flags);
-}
-
-/*
- * "Unplug" a node from the device tree.  The caller must hold
- * a reference to the node.  The memory associated with the node
- * is not freed until its refcount goes to zero.
- */
-void of_detach_node(struct device_node *np)
-{
-	struct device_node *parent;
-	unsigned long flags;
-
-	write_lock_irqsave(&devtree_lock, flags);
-
-	parent = np->parent;
-	if (!parent)
-		goto out_unlock;
-
-	if (allnodes == np)
-		allnodes = np->allnext;
-	else {
-		struct device_node *prev;
-		for (prev = allnodes;
-		     prev->allnext != np;
-		     prev = prev->allnext)
-			;
-		prev->allnext = np->allnext;
-	}
-
-	if (parent->child == np)
-		parent->child = np->sibling;
-	else {
-		struct device_node *prevsib;
-		for (prevsib = np->parent->child;
-		     prevsib->sibling != np;
-		     prevsib = prevsib->sibling)
-			;
-		prevsib->sibling = np->sibling;
-	}
-
-	of_node_set_flag(np, OF_DETACHED);
-
-out_unlock:
-	write_unlock_irqrestore(&devtree_lock, flags);
-}
-
 #if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
 static struct debugfs_blob_wrapper flat_dt_blob;
 
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 2ab9cbd98826..f384db815ea8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -34,10 +34,6 @@ extern struct device_node *of_chosen;
 
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
-/* For updating the device tree at runtime */
-extern void of_attach_node(struct device_node *);
-extern void of_detach_node(struct device_node *);
-
 #ifdef CONFIG_PPC32
 /*
  * PCI <-> OF matching functions
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1ed2ec2ea05b..f954c718d7eb 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -817,65 +817,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np)
 	return NULL;
 }
 
-/*
- * Plug a device node into the tree and global list.
- */
-void of_attach_node(struct device_node *np)
-{
-	unsigned long flags;
-
-	write_lock_irqsave(&devtree_lock, flags);
-	np->sibling = np->parent->child;
-	np->allnext = allnodes;
-	np->parent->child = np;
-	allnodes = np;
-	write_unlock_irqrestore(&devtree_lock, flags);
-}
-
-/*
- * "Unplug" a node from the device tree.  The caller must hold
- * a reference to the node.  The memory associated with the node
- * is not freed until its refcount goes to zero.
- */
-void of_detach_node(struct device_node *np)
-{
-	struct device_node *parent;
-	unsigned long flags;
-
-	write_lock_irqsave(&devtree_lock, flags);
-
-	parent = np->parent;
-	if (!parent)
-		goto out_unlock;
-
-	if (allnodes == np)
-		allnodes = np->allnext;
-	else {
-		struct device_node *prev;
-		for (prev = allnodes;
-		     prev->allnext != np;
-		     prev = prev->allnext)
-			;
-		prev->allnext = np->allnext;
-	}
-
-	if (parent->child == np)
-		parent->child = np->sibling;
-	else {
-		struct device_node *prevsib;
-		for (prevsib = np->parent->child;
-		     prevsib->sibling != np;
-		     prevsib = prevsib->sibling)
-			;
-		prevsib->sibling = np->sibling;
-	}
-
-	of_node_set_flag(np, OF_DETACHED);
-
-out_unlock:
-	write_unlock_irqrestore(&devtree_lock, flags);
-}
-
 #ifdef CONFIG_PPC_PSERIES
 /*
  * Fix up the uninitialized fields in a new device node:
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 462825e03123..7cecc8fea9bd 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -2,6 +2,10 @@ config OF_FLATTREE
 	bool
 	depends on OF
 
+config OF_DYNAMIC
+	def_bool y
+	depends on OF && PPC_OF
+
 config OF_DEVICE
 	def_bool y
 	depends on OF && (SPARC || PPC_OF || MICROBLAZE)
diff --git a/drivers/of/base.c b/drivers/of/base.c
index cf89ee6253f3..2ce58be314af 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -870,3 +870,74 @@ int prom_update_property(struct device_node *np,
 
 	return 0;
 }
+
+#if defined(CONFIG_OF_DYNAMIC)
+/*
+ * Support for dynamic device trees.
+ *
+ * On some platforms, the device tree can be manipulated at runtime.
+ * The routines in this section support adding, removing and changing
+ * device tree nodes.
+ */
+
+/**
+ * of_attach_node - Plug a device node into the tree and global list.
+ */
+void of_attach_node(struct device_node *np)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&devtree_lock, flags);
+	np->sibling = np->parent->child;
+	np->allnext = allnodes;
+	np->parent->child = np;
+	allnodes = np;
+	write_unlock_irqrestore(&devtree_lock, flags);
+}
+
+/**
+ * of_detach_node - "Unplug" a node from the device tree.
+ *
+ * The caller must hold a reference to the node.  The memory associated with
+ * the node is not freed until its refcount goes to zero.
+ */
+void of_detach_node(struct device_node *np)
+{
+	struct device_node *parent;
+	unsigned long flags;
+
+	write_lock_irqsave(&devtree_lock, flags);
+
+	parent = np->parent;
+	if (!parent)
+		goto out_unlock;
+
+	if (allnodes == np)
+		allnodes = np->allnext;
+	else {
+		struct device_node *prev;
+		for (prev = allnodes;
+		     prev->allnext != np;
+		     prev = prev->allnext)
+			;
+		prev->allnext = np->allnext;
+	}
+
+	if (parent->child == np)
+		parent->child = np->sibling;
+	else {
+		struct device_node *prevsib;
+		for (prevsib = np->parent->child;
+		     prevsib->sibling != np;
+		     prevsib = prevsib->sibling)
+			;
+		prevsib->sibling = np->sibling;
+	}
+
+	of_node_set_flag(np, OF_DETACHED);
+
+out_unlock:
+	write_unlock_irqrestore(&devtree_lock, flags);
+}
+#endif /* defined(CONFIG_OF_DYNAMIC) */
+
diff --git a/include/linux/of.h b/include/linux/of.h
index dbabf86e0b7a..3cc0d7ae290e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -184,4 +184,10 @@ extern int of_parse_phandles_with_args(struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct device_node **out_node, const void **out_args);
 
+#if defined(CONFIG_OF_DYNAMIC)
+/* For updating the device tree at runtime */
+extern void of_attach_node(struct device_node *);
+extern void of_detach_node(struct device_node *);
+#endif
+
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From 71a157e8edca55198e808f8561dd49017a54ee34 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Mon, 1 Feb 2010 21:34:14 -0700
Subject: of: add 'of_' prefix to machine_is_compatible()

machine is compatible is an OF-specific call.  It should have
the of_ prefix to protect the global namespace.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Michal Simek <monstr@monstr.eu>
---
 arch/powerpc/kernel/pci_64.c                  |  2 +-
 arch/powerpc/platforms/85xx/xes_mpc85xx.c     |  4 +--
 arch/powerpc/platforms/cell/cbe_powerbutton.c |  2 +-
 arch/powerpc/platforms/cell/ras.c             |  2 +-
 arch/powerpc/platforms/pasemi/cpufreq.c       |  4 +--
 arch/powerpc/platforms/powermac/cpufreq_32.c  | 14 ++++-----
 arch/powerpc/platforms/powermac/cpufreq_64.c  | 14 ++++-----
 arch/powerpc/platforms/powermac/feature.c     |  2 +-
 arch/powerpc/platforms/powermac/smp.c         | 12 ++++----
 arch/powerpc/platforms/powermac/time.c        |  8 ++---
 arch/powerpc/platforms/powermac/udbg_scc.c    |  6 ++--
 arch/powerpc/sysdev/grackle.c                 |  4 +--
 drivers/char/hvc_beat.c                       |  2 +-
 drivers/gpu/drm/radeon/radeon_combios.c       | 44 +++++++++++++--------------
 drivers/macintosh/adb.c                       |  4 +--
 drivers/macintosh/therm_pm72.c                |  8 ++---
 drivers/macintosh/therm_windtunnel.c          |  2 +-
 drivers/macintosh/via-pmu-backlight.c         |  8 ++---
 drivers/macintosh/via-pmu.c                   |  8 ++---
 drivers/macintosh/windfarm_core.c             |  6 ++--
 drivers/macintosh/windfarm_cpufreq_clamp.c    |  6 ++--
 drivers/macintosh/windfarm_lm75_sensor.c      |  6 ++--
 drivers/macintosh/windfarm_max6690_sensor.c   |  6 ++--
 drivers/macintosh/windfarm_pm112.c            |  2 +-
 drivers/macintosh/windfarm_pm121.c            |  2 +-
 drivers/macintosh/windfarm_pm81.c             |  4 +--
 drivers/macintosh/windfarm_pm91.c             |  2 +-
 drivers/macintosh/windfarm_smu_sensors.c      |  6 ++--
 drivers/net/mace.c                            |  2 +-
 drivers/of/base.c                             |  6 ++--
 drivers/serial/pmac_zilog.c                   |  6 ++--
 drivers/video/aty/aty128fb.c                  | 14 ++++-----
 drivers/video/aty/atyfb_base.c                |  8 ++---
 drivers/video/aty/radeon_backlight.c          |  6 ++--
 include/linux/of_fdt.h                        |  2 +-
 sound/ppc/awacs.c                             | 24 +++++++--------
 sound/ppc/burgundy.c                          |  4 +--
 sound/ppc/pmac.c                              | 18 +++++------
 sound/soc/fsl/efika-audio-fabric.c            |  2 +-
 sound/soc/fsl/pcm030-audio-fabric.c           |  2 +-
 40 files changed, 142 insertions(+), 142 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index ccf56ac92de5..d43fc65749c1 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -224,7 +224,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
 	 * G5 machines... So when something asks for bus 0 io base
 	 * (bus 0 is HT root), we return the AGP one instead.
 	 */
-	if (in_bus == 0 && machine_is_compatible("MacRISC4")) {
+	if (in_bus == 0 && of_machine_is_compatible("MacRISC4")) {
 		struct device_node *agp;
 
 		agp = of_find_compatible_node(NULL, NULL, "u3-agp");
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 1b426050a2f9..0125604d096e 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -80,8 +80,8 @@ static void xes_mpc85xx_configure_l2(void __iomem *l2_base)
 	printk(KERN_INFO "xes_mpc85xx: Enabling L2 as cache\n");
 
 	ctl = MPC85xx_L2CTL_L2E | MPC85xx_L2CTL_L2I;
-	if (machine_is_compatible("MPC8540") ||
-	    machine_is_compatible("MPC8560"))
+	if (of_machine_is_compatible("MPC8540") ||
+	    of_machine_is_compatible("MPC8560"))
 		/*
 		 * Assume L2 SRAM is used fully for cache, so set
 		 * L2BLKSZ (bits 4:5) to match L2SIZ (bits 2:3).
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
index dcddaa5fcb66..f75a4daa4ca2 100644
--- a/arch/powerpc/platforms/cell/cbe_powerbutton.c
+++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -48,7 +48,7 @@ static int __init cbe_powerbutton_init(void)
 	int ret = 0;
 	struct input_dev *dev;
 
-	if (!machine_is_compatible("IBM,CBPLUS-1.0")) {
+	if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
 		printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
 		ret = -ENODEV;
 		goto out;
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 5e0a191764fc..608fd2b584c9 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -255,7 +255,7 @@ static int __init cbe_sysreset_init(void)
 {
 	struct cbe_pmd_regs __iomem *regs;
 
-	sysreset_hack = machine_is_compatible("IBM,CBPLUS-1.0");
+	sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
 	if (!sysreset_hack)
 		return 0;
 
diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c
index be2527a516ea..d35e0520abf0 100644
--- a/arch/powerpc/platforms/pasemi/cpufreq.c
+++ b/arch/powerpc/platforms/pasemi/cpufreq.c
@@ -304,8 +304,8 @@ static struct cpufreq_driver pas_cpufreq_driver = {
 
 static int __init pas_cpufreq_init(void)
 {
-	if (!machine_is_compatible("PA6T-1682M") &&
-	    !machine_is_compatible("pasemi,pwrficient"))
+	if (!of_machine_is_compatible("PA6T-1682M") &&
+	    !of_machine_is_compatible("pasemi,pwrficient"))
 		return -ENODEV;
 
 	return cpufreq_register_driver(&pas_cpufreq_driver);
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
index 08d94e4cedd3..d4f127d18141 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_32.c
@@ -657,31 +657,31 @@ static int __init pmac_cpufreq_setup(void)
 	cur_freq = (*value) / 1000;
 
 	/*  Check for 7447A based MacRISC3 */
-	if (machine_is_compatible("MacRISC3") &&
+	if (of_machine_is_compatible("MacRISC3") &&
 	    of_get_property(cpunode, "dynamic-power-step", NULL) &&
 	    PVR_VER(mfspr(SPRN_PVR)) == 0x8003) {
 		pmac_cpufreq_init_7447A(cpunode);
 	/* Check for other MacRISC3 machines */
-	} else if (machine_is_compatible("PowerBook3,4") ||
-		   machine_is_compatible("PowerBook3,5") ||
-		   machine_is_compatible("MacRISC3")) {
+	} else if (of_machine_is_compatible("PowerBook3,4") ||
+		   of_machine_is_compatible("PowerBook3,5") ||
+		   of_machine_is_compatible("MacRISC3")) {
 		pmac_cpufreq_init_MacRISC3(cpunode);
 	/* Else check for iBook2 500/600 */
-	} else if (machine_is_compatible("PowerBook4,1")) {
+	} else if (of_machine_is_compatible("PowerBook4,1")) {
 		hi_freq = cur_freq;
 		low_freq = 400000;
 		set_speed_proc = pmu_set_cpu_speed;
 		is_pmu_based = 1;
 	}
 	/* Else check for TiPb 550 */
-	else if (machine_is_compatible("PowerBook3,3") && cur_freq == 550000) {
+	else if (of_machine_is_compatible("PowerBook3,3") && cur_freq == 550000) {
 		hi_freq = cur_freq;
 		low_freq = 500000;
 		set_speed_proc = pmu_set_cpu_speed;
 		is_pmu_based = 1;
 	}
 	/* Else check for TiPb 400 & 500 */
-	else if (machine_is_compatible("PowerBook3,2")) {
+	else if (of_machine_is_compatible("PowerBook3,2")) {
 		/* We only know about the 400 MHz and the 500Mhz model
 		 * they both have 300 MHz as low frequency
 		 */
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
index 708c75133377..3ed288e68ec4 100644
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ b/arch/powerpc/platforms/powermac/cpufreq_64.c
@@ -398,11 +398,11 @@ static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
 	int rc = -ENODEV;
 
 	/* Check supported platforms */
-	if (machine_is_compatible("PowerMac8,1") ||
-	    machine_is_compatible("PowerMac8,2") ||
-	    machine_is_compatible("PowerMac9,1"))
+	if (of_machine_is_compatible("PowerMac8,1") ||
+	    of_machine_is_compatible("PowerMac8,2") ||
+	    of_machine_is_compatible("PowerMac9,1"))
 		use_volts_smu = 1;
-	else if (machine_is_compatible("PowerMac11,2"))
+	else if (of_machine_is_compatible("PowerMac11,2"))
 		use_volts_vdnap = 1;
 	else
 		return -ENODEV;
@@ -729,9 +729,9 @@ static int __init g5_cpufreq_init(void)
 		return -ENODEV;
 	}
 
-	if (machine_is_compatible("PowerMac7,2") ||
-	    machine_is_compatible("PowerMac7,3") ||
-	    machine_is_compatible("RackMac3,1"))
+	if (of_machine_is_compatible("PowerMac7,2") ||
+	    of_machine_is_compatible("PowerMac7,3") ||
+	    of_machine_is_compatible("RackMac3,1"))
 		rc = g5_pm72_cpufreq_init(cpus);
 #ifdef CONFIG_PMAC_SMU
 	else
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index fbc9bbd74dbd..33e815f4466c 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2426,7 +2426,7 @@ static int __init probe_motherboard(void)
 	    }
 	}
 	for(i=0; i<ARRAY_SIZE(pmac_mb_defs); i++) {
-	    if (machine_is_compatible(pmac_mb_defs[i].model_string)) {
+	    if (of_machine_is_compatible(pmac_mb_defs[i].model_string)) {
 		pmac_mb = pmac_mb_defs[i];
 		goto found;
 	    }
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index b40c22d697f0..6898e8241cd0 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -693,9 +693,9 @@ static void __init smp_core99_setup(int ncpus)
 #ifdef CONFIG_PPC64
 
 	/* i2c based HW sync on some G5s */
-	if (machine_is_compatible("PowerMac7,2") ||
-	    machine_is_compatible("PowerMac7,3") ||
-	    machine_is_compatible("RackMac3,1"))
+	if (of_machine_is_compatible("PowerMac7,2") ||
+	    of_machine_is_compatible("PowerMac7,3") ||
+	    of_machine_is_compatible("RackMac3,1"))
 		smp_core99_setup_i2c_hwsync(ncpus);
 
 	/* pfunc based HW sync on recent G5s */
@@ -713,7 +713,7 @@ static void __init smp_core99_setup(int ncpus)
 #else /* CONFIG_PPC64 */
 
 	/* GPIO based HW sync on ppc32 Core99 */
-	if (pmac_tb_freeze == NULL && !machine_is_compatible("MacRISC4")) {
+	if (pmac_tb_freeze == NULL && !of_machine_is_compatible("MacRISC4")) {
 		struct device_node *cpu;
 		const u32 *tbprop = NULL;
 
@@ -750,7 +750,7 @@ static void __init smp_core99_setup(int ncpus)
 #endif
 
 	/* 32 bits SMP can't NAP */
-	if (!machine_is_compatible("MacRISC4"))
+	if (!of_machine_is_compatible("MacRISC4"))
 		powersave_nap = 0;
 }
 
@@ -852,7 +852,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr)
 		/* If we didn't start the second CPU, we must take
 		 * it off the bus
 		 */
-		if (machine_is_compatible("MacRISC4") &&
+		if (of_machine_is_compatible("MacRISC4") &&
 		    num_online_cpus() < 2)		
 			g5_phy_disable_cpu1();
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 1810e4226e56..48211ca134c3 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -317,9 +317,9 @@ void __init pmac_calibrate_decr(void)
 	 * calibration. That's better since the VIA itself seems
 	 * to be slightly off. --BenH
 	 */
-	if (!machine_is_compatible("MacRISC2") &&
-	    !machine_is_compatible("MacRISC3") &&
-	    !machine_is_compatible("MacRISC4"))
+	if (!of_machine_is_compatible("MacRISC2") &&
+	    !of_machine_is_compatible("MacRISC3") &&
+	    !of_machine_is_compatible("MacRISC4"))
 		if (via_calibrate_decr())
 			return;
 
@@ -328,7 +328,7 @@ void __init pmac_calibrate_decr(void)
 	 * probably implement calibration based on the KL timer on these
 	 * machines anyway... -BenH
 	 */
-	if (machine_is_compatible("PowerMac3,5"))
+	if (of_machine_is_compatible("PowerMac3,5"))
 		if (via_calibrate_decr())
 			return;
 #endif
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 9490157da62e..d83135a9830e 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -132,9 +132,9 @@ void udbg_scc_init(int force_scc)
 		scc_inittab[1] = in_8(sccc);
 		out_8(sccc, 12);
 		scc_inittab[3] = in_8(sccc);
-	} else if (machine_is_compatible("RackMac1,1")
-		   || machine_is_compatible("RackMac1,2")
-		   || machine_is_compatible("MacRISC4")) {
+	} else if (of_machine_is_compatible("RackMac1,1")
+		   || of_machine_is_compatible("RackMac1,2")
+		   || of_machine_is_compatible("MacRISC4")) {
 		/* Xserves and G5s default to 57600 */
 		scc_inittab[1] = 0;
 		scc_inittab[3] = 0;
diff --git a/arch/powerpc/sysdev/grackle.c b/arch/powerpc/sysdev/grackle.c
index 5da37c2f22ee..cf27df6e508b 100644
--- a/arch/powerpc/sysdev/grackle.c
+++ b/arch/powerpc/sysdev/grackle.c
@@ -56,9 +56,9 @@ static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable)
 void __init setup_grackle(struct pci_controller *hose)
 {
 	setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0);
-	if (machine_is_compatible("PowerMac1,1"))
+	if (of_machine_is_compatible("PowerMac1,1"))
 		ppc_pci_add_flags(PPC_PCI_REASSIGN_ALL_BUS);
-	if (machine_is_compatible("AAPL,PowerBook1998"))
+	if (of_machine_is_compatible("AAPL,PowerBook1998"))
 		grackle_set_loop_snoop(hose, 1);
 #if 0	/* Disabled for now, HW problems ??? */
 	grackle_set_stg(hose, 1);
diff --git a/drivers/char/hvc_beat.c b/drivers/char/hvc_beat.c
index 0afc8b82212e..9b397c5ee1d7 100644
--- a/drivers/char/hvc_beat.c
+++ b/drivers/char/hvc_beat.c
@@ -99,7 +99,7 @@ static int hvc_beat_config(char *p)
 
 static int __init hvc_beat_console_init(void)
 {
-	if (hvc_beat_useit && machine_is_compatible("Beat")) {
+	if (hvc_beat_useit && of_machine_is_compatible("Beat")) {
 		hvc_instantiate(0, 0, &hvc_beat_get_put_ops);
 	}
 	return 0;
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 579c8920e081..13826a534065 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -1280,47 +1280,47 @@ bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev)
 	rdev->mode_info.connector_table = radeon_connector_table;
 	if (rdev->mode_info.connector_table == CT_NONE) {
 #ifdef CONFIG_PPC_PMAC
-		if (machine_is_compatible("PowerBook3,3")) {
+		if (of_machine_is_compatible("PowerBook3,3")) {
 			/* powerbook with VGA */
 			rdev->mode_info.connector_table = CT_POWERBOOK_VGA;
-		} else if (machine_is_compatible("PowerBook3,4") ||
-			   machine_is_compatible("PowerBook3,5")) {
+		} else if (of_machine_is_compatible("PowerBook3,4") ||
+			   of_machine_is_compatible("PowerBook3,5")) {
 			/* powerbook with internal tmds */
 			rdev->mode_info.connector_table = CT_POWERBOOK_INTERNAL;
-		} else if (machine_is_compatible("PowerBook5,1") ||
-			   machine_is_compatible("PowerBook5,2") ||
-			   machine_is_compatible("PowerBook5,3") ||
-			   machine_is_compatible("PowerBook5,4") ||
-			   machine_is_compatible("PowerBook5,5")) {
+		} else if (of_machine_is_compatible("PowerBook5,1") ||
+			   of_machine_is_compatible("PowerBook5,2") ||
+			   of_machine_is_compatible("PowerBook5,3") ||
+			   of_machine_is_compatible("PowerBook5,4") ||
+			   of_machine_is_compatible("PowerBook5,5")) {
 			/* powerbook with external single link tmds (sil164) */
 			rdev->mode_info.connector_table = CT_POWERBOOK_EXTERNAL;
-		} else if (machine_is_compatible("PowerBook5,6")) {
+		} else if (of_machine_is_compatible("PowerBook5,6")) {
 			/* powerbook with external dual or single link tmds */
 			rdev->mode_info.connector_table = CT_POWERBOOK_EXTERNAL;
-		} else if (machine_is_compatible("PowerBook5,7") ||
-			   machine_is_compatible("PowerBook5,8") ||
-			   machine_is_compatible("PowerBook5,9")) {
+		} else if (of_machine_is_compatible("PowerBook5,7") ||
+			   of_machine_is_compatible("PowerBook5,8") ||
+			   of_machine_is_compatible("PowerBook5,9")) {
 			/* PowerBook6,2 ? */
 			/* powerbook with external dual link tmds (sil1178?) */
 			rdev->mode_info.connector_table = CT_POWERBOOK_EXTERNAL;
-		} else if (machine_is_compatible("PowerBook4,1") ||
-			   machine_is_compatible("PowerBook4,2") ||
-			   machine_is_compatible("PowerBook4,3") ||
-			   machine_is_compatible("PowerBook6,3") ||
-			   machine_is_compatible("PowerBook6,5") ||
-			   machine_is_compatible("PowerBook6,7")) {
+		} else if (of_machine_is_compatible("PowerBook4,1") ||
+			   of_machine_is_compatible("PowerBook4,2") ||
+			   of_machine_is_compatible("PowerBook4,3") ||
+			   of_machine_is_compatible("PowerBook6,3") ||
+			   of_machine_is_compatible("PowerBook6,5") ||
+			   of_machine_is_compatible("PowerBook6,7")) {
 			/* ibook */
 			rdev->mode_info.connector_table = CT_IBOOK;
-		} else if (machine_is_compatible("PowerMac4,4")) {
+		} else if (of_machine_is_compatible("PowerMac4,4")) {
 			/* emac */
 			rdev->mode_info.connector_table = CT_EMAC;
-		} else if (machine_is_compatible("PowerMac10,1")) {
+		} else if (of_machine_is_compatible("PowerMac10,1")) {
 			/* mini with internal tmds */
 			rdev->mode_info.connector_table = CT_MINI_INTERNAL;
-		} else if (machine_is_compatible("PowerMac10,2")) {
+		} else if (of_machine_is_compatible("PowerMac10,2")) {
 			/* mini with external tmds */
 			rdev->mode_info.connector_table = CT_MINI_EXTERNAL;
-		} else if (machine_is_compatible("PowerMac12,1")) {
+		} else if (of_machine_is_compatible("PowerMac12,1")) {
 			/* PowerMac8,1 ? */
 			/* imac g5 isight */
 			rdev->mode_info.connector_table = CT_IMAC_G5_ISIGHT;
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 23741cec45e3..d840a109f833 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -322,8 +322,8 @@ static int __init adb_init(void)
 		adb_controller = NULL;
 	} else {
 #ifdef CONFIG_PPC
-		if (machine_is_compatible("AAPL,PowerBook1998") ||
-			machine_is_compatible("PowerBook1,1"))
+		if (of_machine_is_compatible("AAPL,PowerBook1998") ||
+			of_machine_is_compatible("PowerBook1,1"))
 			sleepy_trackpad = 1;
 #endif /* CONFIG_PPC */
 
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 454bc501df3c..5738d8bf2d97 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -1899,7 +1899,7 @@ static int create_control_loops(void)
 	 */
 	if (rackmac)
 		cpu_pid_type = CPU_PID_TYPE_RACKMAC;
-	else if (machine_is_compatible("PowerMac7,3")
+	else if (of_machine_is_compatible("PowerMac7,3")
 	    && (cpu_count > 1)
 	    && fcu_fans[CPUA_PUMP_RPM_INDEX].id != FCU_FAN_ABSENT_ID
 	    && fcu_fans[CPUB_PUMP_RPM_INDEX].id != FCU_FAN_ABSENT_ID) {
@@ -2234,10 +2234,10 @@ static int __init therm_pm72_init(void)
 {
 	struct device_node *np;
 
-	rackmac = machine_is_compatible("RackMac3,1");
+	rackmac = of_machine_is_compatible("RackMac3,1");
 
-	if (!machine_is_compatible("PowerMac7,2") &&
-	    !machine_is_compatible("PowerMac7,3") &&
+	if (!of_machine_is_compatible("PowerMac7,2") &&
+	    !of_machine_is_compatible("PowerMac7,3") &&
 	    !rackmac)
 	    	return -ENODEV;
 
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index ba48fd76396e..7fb8b4da35a7 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -490,7 +490,7 @@ g4fan_init( void )
 	info = of_get_property(np, "thermal-info", NULL);
 	of_node_put(np);
 
-	if( !info || !machine_is_compatible("PowerMac3,6") )
+	if( !info || !of_machine_is_compatible("PowerMac3,6") )
 		return -ENODEV;
 
 	if( info->id != 3 ) {
diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c
index a348bb0791d3..4f3c4479c16a 100644
--- a/drivers/macintosh/via-pmu-backlight.c
+++ b/drivers/macintosh/via-pmu-backlight.c
@@ -150,13 +150,13 @@ void __init pmu_backlight_init()
 
 	/* Special case for the old PowerBook since I can't test on it */
 	autosave =
-		machine_is_compatible("AAPL,3400/2400") ||
-		machine_is_compatible("AAPL,3500");
+		of_machine_is_compatible("AAPL,3400/2400") ||
+		of_machine_is_compatible("AAPL,3500");
 
 	if (!autosave &&
 	    !pmac_has_backlight_type("pmu") &&
-	    !machine_is_compatible("AAPL,PowerBook1998") &&
-	    !machine_is_compatible("PowerBook1,1"))
+	    !of_machine_is_compatible("AAPL,PowerBook1998") &&
+	    !of_machine_is_compatible("PowerBook1,1"))
 		return;
 
 	snprintf(name, sizeof(name), "pmubl");
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index db379c381432..42764849eb78 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -463,8 +463,8 @@ static int __init via_pmu_dev_init(void)
 #endif
 
 #ifdef CONFIG_PPC32
-  	if (machine_is_compatible("AAPL,3400/2400") ||
-  		machine_is_compatible("AAPL,3500")) {
+  	if (of_machine_is_compatible("AAPL,3400/2400") ||
+  		of_machine_is_compatible("AAPL,3500")) {
 		int mb = pmac_call_feature(PMAC_FTR_GET_MB_INFO,
 			NULL, PMAC_MB_INFO_MODEL, 0);
 		pmu_battery_count = 1;
@@ -472,8 +472,8 @@ static int __init via_pmu_dev_init(void)
 			pmu_batteries[0].flags |= PMU_BATT_TYPE_COMET;
 		else
 			pmu_batteries[0].flags |= PMU_BATT_TYPE_HOOPER;
-	} else if (machine_is_compatible("AAPL,PowerBook1998") ||
-		machine_is_compatible("PowerBook1,1")) {
+	} else if (of_machine_is_compatible("AAPL,PowerBook1998") ||
+		of_machine_is_compatible("PowerBook1,1")) {
 		pmu_battery_count = 2;
 		pmu_batteries[0].flags |= PMU_BATT_TYPE_SMART;
 		pmu_batteries[1].flags |= PMU_BATT_TYPE_SMART;
diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c
index 075b4d99e354..437f55c5d18d 100644
--- a/drivers/macintosh/windfarm_core.c
+++ b/drivers/macintosh/windfarm_core.c
@@ -468,9 +468,9 @@ static int __init windfarm_core_init(void)
 	DBG("wf: core loaded\n");
 
 	/* Don't register on old machines that use therm_pm72 for now */
-	if (machine_is_compatible("PowerMac7,2") ||
-	    machine_is_compatible("PowerMac7,3") ||
-	    machine_is_compatible("RackMac3,1"))
+	if (of_machine_is_compatible("PowerMac7,2") ||
+	    of_machine_is_compatible("PowerMac7,3") ||
+	    of_machine_is_compatible("RackMac3,1"))
 		return -ENODEV;
 	platform_device_register(&wf_platform_device);
 	return 0;
diff --git a/drivers/macintosh/windfarm_cpufreq_clamp.c b/drivers/macintosh/windfarm_cpufreq_clamp.c
index 900aade06198..1a77a7c97d0e 100644
--- a/drivers/macintosh/windfarm_cpufreq_clamp.c
+++ b/drivers/macintosh/windfarm_cpufreq_clamp.c
@@ -76,9 +76,9 @@ static int __init wf_cpufreq_clamp_init(void)
 	struct wf_control *clamp;
 
 	/* Don't register on old machines that use therm_pm72 for now */
-	if (machine_is_compatible("PowerMac7,2") ||
-	    machine_is_compatible("PowerMac7,3") ||
-	    machine_is_compatible("RackMac3,1"))
+	if (of_machine_is_compatible("PowerMac7,2") ||
+	    of_machine_is_compatible("PowerMac7,3") ||
+	    of_machine_is_compatible("RackMac3,1"))
 		return -ENODEV;
 
 	clamp = kmalloc(sizeof(struct wf_control), GFP_KERNEL);
diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index ed6426a10773..d8257d35afde 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -239,9 +239,9 @@ static struct i2c_driver wf_lm75_driver = {
 static int __init wf_lm75_sensor_init(void)
 {
 	/* Don't register on old machines that use therm_pm72 for now */
-	if (machine_is_compatible("PowerMac7,2") ||
-	    machine_is_compatible("PowerMac7,3") ||
-	    machine_is_compatible("RackMac3,1"))
+	if (of_machine_is_compatible("PowerMac7,2") ||
+	    of_machine_is_compatible("PowerMac7,3") ||
+	    of_machine_is_compatible("RackMac3,1"))
 		return -ENODEV;
 	return i2c_add_driver(&wf_lm75_driver);
 }
diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c
index a67b349319e9..b486eb929fde 100644
--- a/drivers/macintosh/windfarm_max6690_sensor.c
+++ b/drivers/macintosh/windfarm_max6690_sensor.c
@@ -188,9 +188,9 @@ static struct i2c_driver wf_max6690_driver = {
 static int __init wf_max6690_sensor_init(void)
 {
 	/* Don't register on old machines that use therm_pm72 for now */
-	if (machine_is_compatible("PowerMac7,2") ||
-	    machine_is_compatible("PowerMac7,3") ||
-	    machine_is_compatible("RackMac3,1"))
+	if (of_machine_is_compatible("PowerMac7,2") ||
+	    of_machine_is_compatible("PowerMac7,3") ||
+	    of_machine_is_compatible("RackMac3,1"))
 		return -ENODEV;
 	return i2c_add_driver(&wf_max6690_driver);
 }
diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c
index 73d695dc9e50..e0ee80700cde 100644
--- a/drivers/macintosh/windfarm_pm112.c
+++ b/drivers/macintosh/windfarm_pm112.c
@@ -676,7 +676,7 @@ static int __init wf_pm112_init(void)
 {
 	struct device_node *cpu;
 
-	if (!machine_is_compatible("PowerMac11,2"))
+	if (!of_machine_is_compatible("PowerMac11,2"))
 		return -ENODEV;
 
 	/* Count the number of CPU cores */
diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c
index 66ec4fb115bb..947d4afa25ca 100644
--- a/drivers/macintosh/windfarm_pm121.c
+++ b/drivers/macintosh/windfarm_pm121.c
@@ -1008,7 +1008,7 @@ static int __init pm121_init(void)
 {
 	int rc = -ENODEV;
 
-	if (machine_is_compatible("PowerMac12,1"))
+	if (of_machine_is_compatible("PowerMac12,1"))
 		rc = pm121_init_pm();
 
 	if (rc == 0) {
diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c
index abbe206474f5..565d5b2adc95 100644
--- a/drivers/macintosh/windfarm_pm81.c
+++ b/drivers/macintosh/windfarm_pm81.c
@@ -779,8 +779,8 @@ static int __init wf_smu_init(void)
 {
 	int rc = -ENODEV;
 
-	if (machine_is_compatible("PowerMac8,1") ||
-	    machine_is_compatible("PowerMac8,2"))
+	if (of_machine_is_compatible("PowerMac8,1") ||
+	    of_machine_is_compatible("PowerMac8,2"))
 		rc = wf_init_pm();
 
 	if (rc == 0) {
diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c
index 764c525b2117..bea99168ff35 100644
--- a/drivers/macintosh/windfarm_pm91.c
+++ b/drivers/macintosh/windfarm_pm91.c
@@ -711,7 +711,7 @@ static int __init wf_smu_init(void)
 {
 	int rc = -ENODEV;
 
-	if (machine_is_compatible("PowerMac9,1"))
+	if (of_machine_is_compatible("PowerMac9,1"))
 		rc = wf_init_pm();
 
 	if (rc == 0) {
diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c
index 9c567b93f417..3c193504bb80 100644
--- a/drivers/macintosh/windfarm_smu_sensors.c
+++ b/drivers/macintosh/windfarm_smu_sensors.c
@@ -363,9 +363,9 @@ smu_cpu_power_create(struct wf_sensor *volts, struct wf_sensor *amps)
 	 * I yet have to figure out what's up with 8,2 and will have to
 	 * adjust for later, unless we can 100% trust the SDB partition...
 	 */
-	if ((machine_is_compatible("PowerMac8,1") ||
-	     machine_is_compatible("PowerMac8,2") ||
-	     machine_is_compatible("PowerMac9,1")) &&
+	if ((of_machine_is_compatible("PowerMac8,1") ||
+	     of_machine_is_compatible("PowerMac8,2") ||
+	     of_machine_is_compatible("PowerMac9,1")) &&
 	    cpuvcp_version >= 2) {
 		pow->quadratic = 1;
 		DBG("windfarm: CPU Power using quadratic transform\n");
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index d9fbad386389..43aea91e3369 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -206,7 +206,7 @@ static int __devinit mace_probe(struct macio_dev *mdev, const struct of_device_i
 		mp->port_aaui = port_aaui;
 	else {
 		/* Apple Network Server uses the AAUI port */
-		if (machine_is_compatible("AAPL,ShinerESB"))
+		if (of_machine_is_compatible("AAPL,ShinerESB"))
 			mp->port_aaui = 1;
 		else {
 #ifdef CONFIG_MACE_AAUI_PORT
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 785e9cc1b207..6bc8740c21ad 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -219,13 +219,13 @@ int of_device_is_compatible(const struct device_node *device,
 EXPORT_SYMBOL(of_device_is_compatible);
 
 /**
- * machine_is_compatible - Test root of device tree for a given compatible value
+ * of_machine_is_compatible - Test root of device tree for a given compatible value
  * @compat: compatible string to look for in root node's compatible property.
  *
  * Returns true if the root node has the given value in its
  * compatible property.
  */
-int machine_is_compatible(const char *compat)
+int of_machine_is_compatible(const char *compat)
 {
 	struct device_node *root;
 	int rc = 0;
@@ -237,7 +237,7 @@ int machine_is_compatible(const char *compat)
 	}
 	return rc;
 }
-EXPORT_SYMBOL(machine_is_compatible);
+EXPORT_SYMBOL(of_machine_is_compatible);
 
 /**
  *  of_device_is_available - check if a device is available for use
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index 683e66f18e8c..3e2ae4807ae2 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -2031,9 +2031,9 @@ static int __init pmz_console_setup(struct console *co, char *options)
 	/*
 	 * XServe's default to 57600 bps
 	 */
-	if (machine_is_compatible("RackMac1,1")
-	    || machine_is_compatible("RackMac1,2")
-	    || machine_is_compatible("MacRISC4"))
+	if (of_machine_is_compatible("RackMac1,1")
+	    || of_machine_is_compatible("RackMac1,2")
+	    || of_machine_is_compatible("MacRISC4"))
 	 	baud = 57600;
 
 	/*
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index e4e4d433b007..9ee67d6da710 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -1931,22 +1931,22 @@ static int __devinit aty128_init(struct pci_dev *pdev, const struct pci_device_i
 			 * PowerMac2,2 summer 2000 iMacs
 			 * PowerMac4,1 january 2001 iMacs "flower power"
 			 */
-			if (machine_is_compatible("PowerMac2,1") ||
-			    machine_is_compatible("PowerMac2,2") ||
-			    machine_is_compatible("PowerMac4,1"))
+			if (of_machine_is_compatible("PowerMac2,1") ||
+			    of_machine_is_compatible("PowerMac2,2") ||
+			    of_machine_is_compatible("PowerMac4,1"))
 				default_vmode = VMODE_1024_768_75;
 
 			/* iBook SE */
-			if (machine_is_compatible("PowerBook2,2"))
+			if (of_machine_is_compatible("PowerBook2,2"))
 				default_vmode = VMODE_800_600_60;
 
 			/* PowerBook Firewire (Pismo), iBook Dual USB */
-			if (machine_is_compatible("PowerBook3,1") ||
-			    machine_is_compatible("PowerBook4,1"))
+			if (of_machine_is_compatible("PowerBook3,1") ||
+			    of_machine_is_compatible("PowerBook4,1"))
 				default_vmode = VMODE_1024_768_60;
 
 			/* PowerBook Titanium */
-			if (machine_is_compatible("PowerBook3,2"))
+			if (of_machine_is_compatible("PowerBook3,2"))
 				default_vmode = VMODE_1152_768_60;
 	
 			if (default_cmode > 16) 
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 5f1b5807a48f..e45ab8db2ddc 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2439,7 +2439,7 @@ static int __devinit aty_init(struct fb_info *info)
 	 * The Apple iBook1 uses non-standard memory frequencies.
 	 * We detect it and set the frequency manually.
 	 */
-	if (machine_is_compatible("PowerBook2,1")) {
+	if (of_machine_is_compatible("PowerBook2,1")) {
 		par->pll_limits.mclk = 70;
 		par->pll_limits.xclk = 53;
 	}
@@ -2659,7 +2659,7 @@ static int __devinit aty_init(struct fb_info *info)
 		      FBINFO_HWACCEL_YPAN;
 
 #ifdef CONFIG_PMAC_BACKLIGHT
-	if (M64_HAS(G3_PB_1_1) && machine_is_compatible("PowerBook1,1")) {
+	if (M64_HAS(G3_PB_1_1) && of_machine_is_compatible("PowerBook1,1")) {
 		/*
 		 * these bits let the 101 powerbook
 		 * wake up from sleep -- paulus
@@ -2690,9 +2690,9 @@ static int __devinit aty_init(struct fb_info *info)
 				if (M64_HAS(G3_PB_1024x768))
 					/* G3 PowerBook with 1024x768 LCD */
 					default_vmode = VMODE_1024_768_60;
-				else if (machine_is_compatible("iMac"))
+				else if (of_machine_is_compatible("iMac"))
 					default_vmode = VMODE_1024_768_75;
-				else if (machine_is_compatible("PowerBook2,1"))
+				else if (of_machine_is_compatible("PowerBook2,1"))
 					/* iBook with 800x600 LCD */
 					default_vmode = VMODE_800_600_60;
 				else
diff --git a/drivers/video/aty/radeon_backlight.c b/drivers/video/aty/radeon_backlight.c
index 1a056adb61c8..fa1198c4ccc5 100644
--- a/drivers/video/aty/radeon_backlight.c
+++ b/drivers/video/aty/radeon_backlight.c
@@ -175,9 +175,9 @@ void radeonfb_bl_init(struct radeonfb_info *rinfo)
 
 #ifdef CONFIG_PMAC_BACKLIGHT
 	pdata->negative = pdata->negative ||
-		machine_is_compatible("PowerBook4,3") ||
-		machine_is_compatible("PowerBook6,3") ||
-		machine_is_compatible("PowerBook6,5");
+		of_machine_is_compatible("PowerBook4,3") ||
+		of_machine_is_compatible("PowerBook6,3") ||
+		of_machine_is_compatible("PowerBook6,5");
 #endif
 
 	rinfo->info->bl_dev = bd;
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 8118d4559dd5..fbf29610616d 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -85,7 +85,7 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 extern void finish_device_tree(void);
 extern void unflatten_device_tree(void);
 extern void early_init_devtree(void *);
-extern int machine_is_compatible(const char *compat);
+extern int of_machine_is_compatible(const char *compat);
 extern void print_properties(struct device_node *node);
 extern int prom_n_intr_cells(struct device_node* np);
 extern void prom_get_irq_senses(unsigned char *senses, int off, int max);
diff --git a/sound/ppc/awacs.c b/sound/ppc/awacs.c
index 2e156467b814..b36679384b27 100644
--- a/sound/ppc/awacs.c
+++ b/sound/ppc/awacs.c
@@ -751,8 +751,8 @@ static void snd_pmac_awacs_suspend(struct snd_pmac *chip)
 
 static void snd_pmac_awacs_resume(struct snd_pmac *chip)
 {
-	if (machine_is_compatible("PowerBook3,1")
-	    || machine_is_compatible("PowerBook3,2")) {
+	if (of_machine_is_compatible("PowerBook3,1")
+	    || of_machine_is_compatible("PowerBook3,2")) {
 		msleep(100);
 		snd_pmac_awacs_write_reg(chip, 1,
 			chip->awacs_reg[1] & ~MASK_PAROUT);
@@ -780,16 +780,16 @@ static void snd_pmac_awacs_resume(struct snd_pmac *chip)
 }
 #endif /* CONFIG_PM */
 
-#define IS_PM7500 (machine_is_compatible("AAPL,7500") \
-		|| machine_is_compatible("AAPL,8500") \
-		|| machine_is_compatible("AAPL,9500"))
-#define IS_PM5500 (machine_is_compatible("AAPL,e411"))
-#define IS_BEIGE (machine_is_compatible("AAPL,Gossamer"))
-#define IS_IMAC1 (machine_is_compatible("PowerMac2,1"))
-#define IS_IMAC2 (machine_is_compatible("PowerMac2,2") \
-		|| machine_is_compatible("PowerMac4,1"))
-#define IS_G4AGP (machine_is_compatible("PowerMac3,1"))
-#define IS_LOMBARD (machine_is_compatible("PowerBook1,1"))
+#define IS_PM7500 (of_machine_is_compatible("AAPL,7500") \
+		|| of_machine_is_compatible("AAPL,8500") \
+		|| of_machine_is_compatible("AAPL,9500"))
+#define IS_PM5500 (of_machine_is_compatible("AAPL,e411"))
+#define IS_BEIGE (of_machine_is_compatible("AAPL,Gossamer"))
+#define IS_IMAC1 (of_machine_is_compatible("PowerMac2,1"))
+#define IS_IMAC2 (of_machine_is_compatible("PowerMac2,2") \
+		|| of_machine_is_compatible("PowerMac4,1"))
+#define IS_G4AGP (of_machine_is_compatible("PowerMac3,1"))
+#define IS_LOMBARD (of_machine_is_compatible("PowerBook1,1"))
 
 static int imac1, imac2;
 
diff --git a/sound/ppc/burgundy.c b/sound/ppc/burgundy.c
index 0accfe49735b..1f72e1c786bf 100644
--- a/sound/ppc/burgundy.c
+++ b/sound/ppc/burgundy.c
@@ -582,7 +582,7 @@ static int snd_pmac_burgundy_detect_headphone(struct snd_pmac *chip)
 static void snd_pmac_burgundy_update_automute(struct snd_pmac *chip, int do_notify)
 {
 	if (chip->auto_mute) {
-		int imac = machine_is_compatible("iMac");
+		int imac = of_machine_is_compatible("iMac");
 		int reg, oreg;
 		reg = oreg = snd_pmac_burgundy_rcb(chip,
 				MASK_ADDR_BURGUNDY_MORE_OUTPUTENABLES);
@@ -620,7 +620,7 @@ static void snd_pmac_burgundy_update_automute(struct snd_pmac *chip, int do_noti
  */
 int __devinit snd_pmac_burgundy_init(struct snd_pmac *chip)
 {
-	int imac = machine_is_compatible("iMac");
+	int imac = of_machine_is_compatible("iMac");
 	int i, err;
 
 	/* Checks to see the chip is alive and kicking */
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 7bc492ee77ec..85081172403f 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -922,11 +922,11 @@ static void __devinit detect_byte_swap(struct snd_pmac *chip)
 	}
 
 	/* it seems the Pismo & iBook can't byte-swap in hardware. */
-	if (machine_is_compatible("PowerBook3,1") ||
-	    machine_is_compatible("PowerBook2,1"))
+	if (of_machine_is_compatible("PowerBook3,1") ||
+	    of_machine_is_compatible("PowerBook2,1"))
 		chip->can_byte_swap = 0 ;
 
-	if (machine_is_compatible("PowerBook2,1"))
+	if (of_machine_is_compatible("PowerBook2,1"))
 		chip->can_duplex = 0;
 }
 
@@ -959,11 +959,11 @@ static int __devinit snd_pmac_detect(struct snd_pmac *chip)
 	chip->control_mask = MASK_IEPC | MASK_IEE | 0x11; /* default */
 
 	/* check machine type */
-	if (machine_is_compatible("AAPL,3400/2400")
-	    || machine_is_compatible("AAPL,3500"))
+	if (of_machine_is_compatible("AAPL,3400/2400")
+	    || of_machine_is_compatible("AAPL,3500"))
 		chip->is_pbook_3400 = 1;
-	else if (machine_is_compatible("PowerBook1,1")
-		 || machine_is_compatible("AAPL,PowerBook1998"))
+	else if (of_machine_is_compatible("PowerBook1,1")
+		 || of_machine_is_compatible("AAPL,PowerBook1998"))
 		chip->is_pbook_G3 = 1;
 	chip->node = of_find_node_by_name(NULL, "awacs");
 	sound = of_node_get(chip->node);
@@ -1033,8 +1033,8 @@ static int __devinit snd_pmac_detect(struct snd_pmac *chip)
 	}
 	if (of_device_is_compatible(sound, "tumbler")) {
 		chip->model = PMAC_TUMBLER;
-		chip->can_capture = machine_is_compatible("PowerMac4,2")
-				|| machine_is_compatible("PowerBook4,1");
+		chip->can_capture = of_machine_is_compatible("PowerMac4,2")
+				|| of_machine_is_compatible("PowerBook4,1");
 		chip->can_duplex = 0;
 		// chip->can_byte_swap = 0; /* FIXME: check this */
 		chip->num_freqs = ARRAY_SIZE(tumbler_freqs);
diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c
index 3326e2a1e863..1a5b8e0d6a34 100644
--- a/sound/soc/fsl/efika-audio-fabric.c
+++ b/sound/soc/fsl/efika-audio-fabric.c
@@ -55,7 +55,7 @@ static __init int efika_fabric_init(void)
 	struct platform_device *pdev;
 	int rc;
 
-	if (!machine_is_compatible("bplan,efika"))
+	if (!of_machine_is_compatible("bplan,efika"))
 		return -ENODEV;
 
 	card.platform = &mpc5200_audio_dma_platform;
diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
index b928ef7d28eb..6644cba7cbf2 100644
--- a/sound/soc/fsl/pcm030-audio-fabric.c
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -55,7 +55,7 @@ static __init int pcm030_fabric_init(void)
 	struct platform_device *pdev;
 	int rc;
 
-	if (!machine_is_compatible("phytec,pcm030"))
+	if (!of_machine_is_compatible("phytec,pcm030"))
 		return -ENODEV;
 
 	card.platform = &mpc5200_audio_dma_platform;
-- 
cgit v1.2.3


From 51975db0b7333cf389b64b5040c2a910341d241a Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Mon, 1 Feb 2010 21:34:14 -0700
Subject: of/flattree: merge early_init_dt_scan_memory() common code

Merge common code between PowerPC and Microblaze architectures.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/kernel/prom.c | 51 ++------------------------------
 arch/powerpc/kernel/prom.c    | 69 +++++++++++--------------------------------
 drivers/of/fdt.c              | 50 +++++++++++++++++++++++++++++++
 include/linux/of_fdt.h        |  3 ++
 4 files changed, 73 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index f7bd0ee8d481..459c32e4a5fe 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -45,61 +45,14 @@
 /* export that to outside world */
 struct device_node *of_chosen;
 
-#define early_init_dt_scan_drconf_memory(node) 0
-
 void __init early_init_dt_scan_chosen_arch(unsigned long node)
 {
 	/* No Microblaze specific code here */
 }
 
-static int __init early_init_dt_scan_memory(unsigned long node,
-				const char *uname, int depth, void *data)
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	__be32 *reg, *endp;
-	unsigned long l;
-
-	/* Look for the ibm,dynamic-reconfiguration-memory node */
-/*	if (depth == 1 &&
-		strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
-		return early_init_dt_scan_drconf_memory(node);
-*/
-	/* We are scanning "memory" nodes only */
-	if (type == NULL) {
-		/*
-		 * The longtrail doesn't have a device_type on the
-		 * /memory node, so look for the node called /memory@0.
-		 */
-		if (depth != 1 || strcmp(uname, "memory@0") != 0)
-			return 0;
-	} else if (strcmp(type, "memory") != 0)
-		return 0;
-
-	reg = (__be32 *)of_get_flat_dt_prop(node, "linux,usable-memory", &l);
-	if (reg == NULL)
-		reg = (__be32 *)of_get_flat_dt_prop(node, "reg", &l);
-	if (reg == NULL)
-		return 0;
-
-	endp = reg + (l / sizeof(__be32));
-
-	pr_debug("memory scan node %s, reg size %ld, data: %x %x %x %x,\n",
-		uname, l, reg[0], reg[1], reg[2], reg[3]);
-
-	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
-		u64 base, size;
-
-		base = dt_mem_next_cell(dt_root_addr_cells, &reg);
-		size = dt_mem_next_cell(dt_root_size_cells, &reg);
-
-		if (size == 0)
-			continue;
-		pr_debug(" - %llx ,  %llx\n", (unsigned long long)base,
-			(unsigned long long)size);
-
-		lmb_add(base, size);
-	}
-	return 0;
+	lmb_add(base, size);
 }
 
 #ifdef CONFIG_EARLY_PRINTK
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 28be19ab0f18..e0f368ff8d12 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -483,64 +483,31 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 #define early_init_dt_scan_drconf_memory(node)	0
 #endif /* CONFIG_PPC_PSERIES */
 
-static int __init early_init_dt_scan_memory(unsigned long node,
-					    const char *uname, int depth, void *data)
+static int __init early_init_dt_scan_memory_ppc(unsigned long node,
+						const char *uname,
+						int depth, void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	__be32 *reg, *endp;
-	unsigned long l;
-
-	/* Look for the ibm,dynamic-reconfiguration-memory node */
 	if (depth == 1 &&
 	    strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
 		return early_init_dt_scan_drconf_memory(node);
+	
+	return early_init_dt_scan_memory(node, uname, depth, data);
+}
 
-	/* We are scanning "memory" nodes only */
-	if (type == NULL) {
-		/*
-		 * The longtrail doesn't have a device_type on the
-		 * /memory node, so look for the node called /memory@0.
-		 */
-		if (depth != 1 || strcmp(uname, "memory@0") != 0)
-			return 0;
-	} else if (strcmp(type, "memory") != 0)
-		return 0;
-
-	reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
-	if (reg == NULL)
-		reg = of_get_flat_dt_prop(node, "reg", &l);
-	if (reg == NULL)
-		return 0;
-
-	endp = reg + (l / sizeof(__be32));
-
-	DBG("memory scan node %s, reg size %ld, data: %x %x %x %x,\n",
-	    uname, l, reg[0], reg[1], reg[2], reg[3]);
-
-	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
-		u64 base, size;
-
-		base = dt_mem_next_cell(dt_root_addr_cells, &reg);
-		size = dt_mem_next_cell(dt_root_size_cells, &reg);
-
-		if (size == 0)
-			continue;
-		DBG(" - %llx ,  %llx\n", (unsigned long long)base,
-		    (unsigned long long)size);
-#ifdef CONFIG_PPC64
-		if (iommu_is_off) {
-			if (base >= 0x80000000ul)
-				continue;
-			if ((base + size) > 0x80000000ul)
-				size = 0x80000000ul - base;
-		}
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+#if defined(CONFIG_PPC64)
+	if (iommu_is_off) {
+		if (base >= 0x80000000ul)
+			return;
+		if ((base + size) > 0x80000000ul)
+			size = 0x80000000ul - base;
+	}
 #endif
-		lmb_add(base, size);
 
-		memstart_addr = min((u64)memstart_addr, base);
-	}
+	lmb_add(base, size);
 
-	return 0;
+	memstart_addr = min((u64)memstart_addr, base);
 }
 
 static void __init early_reserve_mem(void)
@@ -706,7 +673,7 @@ void __init early_init_devtree(void *params)
 	/* Scan memory nodes and rebuild LMBs */
 	lmb_init();
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
-	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
 	/* Save command line for /proc/cmdline and then parse parameters */
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 7f8861121a31..f84152d112b0 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -15,6 +15,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 
+
 #ifdef CONFIG_PPC
 #include <asm/machdep.h>
 #endif /* CONFIG_PPC */
@@ -443,6 +444,55 @@ u64 __init dt_mem_next_cell(int s, u32 **cellp)
 	return of_read_number(p, s);
 }
 
+/**
+ * early_init_dt_scan_memory - Look for an parse memory nodes
+ */
+int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
+				     int depth, void *data)
+{
+	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	__be32 *reg, *endp;
+	unsigned long l;
+
+	/* We are scanning "memory" nodes only */
+	if (type == NULL) {
+		/*
+		 * The longtrail doesn't have a device_type on the
+		 * /memory node, so look for the node called /memory@0.
+		 */
+		if (depth != 1 || strcmp(uname, "memory@0") != 0)
+			return 0;
+	} else if (strcmp(type, "memory") != 0)
+		return 0;
+
+	reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+	if (reg == NULL)
+		reg = of_get_flat_dt_prop(node, "reg", &l);
+	if (reg == NULL)
+		return 0;
+
+	endp = reg + (l / sizeof(__be32));
+
+	pr_debug("memory scan node %s, reg size %ld, data: %x %x %x %x,\n",
+	    uname, l, reg[0], reg[1], reg[2], reg[3]);
+
+	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+		u64 base, size;
+
+		base = dt_mem_next_cell(dt_root_addr_cells, &reg);
+		size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+		if (size == 0)
+			continue;
+		pr_debug(" - %llx ,  %llx\n", (unsigned long long)base,
+		    (unsigned long long)size);
+
+		early_init_dt_add_memory_arch(base, size);
+	}
+
+	return 0;
+}
+
 int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data)
 {
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index fbf29610616d..bf26bd5df9f1 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -75,6 +75,9 @@ extern void early_init_dt_scan_chosen_arch(unsigned long node);
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_check_for_initrd(unsigned long node);
+extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
+				     int depth, void *data);
+extern void early_init_dt_add_memory_arch(u64 base, u64 size);
 extern u64 dt_mem_next_cell(int s, u32 **cellp);
 
 /* Early flat tree scan hooks */
-- 
cgit v1.2.3


From 8cfb3343f70bcf9403218df120ecf345f06dd585 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jeremy.kerr@canonical.com>
Date: Mon, 1 Feb 2010 21:34:14 -0700
Subject: of: make set_node_proc_entry private to proc_devtree.c

We only need set_node_proc_entry in proc_devtree.c, so move it there.

This fixes the !HAVE_ARCH_DEVTREE_FIXUPS build, as we can't make make
the definition in linux/of.h conditional on this #define (definitions in
asm/prom.h can't be exposed to linux/of.h, due to the enforced #include
ordering).

Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 fs/proc/proc_devtree.c | 5 +++--
 include/linux/of.h     | 6 ------
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 123257bb356b..2309bf17203f 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -14,12 +14,13 @@
 #include <asm/uaccess.h>
 #include "internal.h"
 
-#ifndef HAVE_ARCH_DEVTREE_FIXUPS
 static inline void set_node_proc_entry(struct device_node *np,
 				       struct proc_dir_entry *de)
 {
-}
+#ifdef HAVE_ARCH_DEVTREE_FIXUPS
+	np->pde = de;
 #endif
+}
 
 static struct proc_dir_entry *proc_device_tree;
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 3cc0d7ae290e..fd47c81d7a25 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -73,12 +73,6 @@ static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
 	set_bit(flag, &n->_flags);
 }
 
-static inline void
-set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de)
-{
-	dn->pde = de;
-}
-
 extern struct device_node *of_find_all_nodes(struct device_node *prev);
 
 #if defined(CONFIG_SPARC)
-- 
cgit v1.2.3


From 1406bc2f57787797d1f6a3675c019a7093769275 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jeremy.kerr@canonical.com>
Date: Sat, 30 Jan 2010 01:31:21 -0700
Subject: of/flattree: use callback to setup initrd from /chosen

At present, the fdt code sets the kernel-wide initrd_start and
initrd_end variables when parsing /chosen. On ARM, we only set these
once the bootmem has been reserved.

This change adds an arch hook to setup the initrd from the device
tree:

 void early_init_dt_setup_initrd_arch(unsigned long start,
				      unsigned long end);

The arch-specific code can then setup the initrd however it likes.

Compiled on powerpc, with CONFIG_BLK_DEV_INITRD=y and =n.

Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/microblaze/kernel/prom.c | 10 ++++++++++
 arch/powerpc/kernel/prom.c    | 10 ++++++++++
 drivers/of/fdt.c              | 27 +++++++++++----------------
 include/linux/of_fdt.h        | 10 ++++++++++
 4 files changed, 41 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 459c32e4a5fe..050b7993c51c 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -118,6 +118,16 @@ void __init early_init_devtree(void *params)
 	pr_debug(" <- early_init_devtree()\n");
 }
 
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+		unsigned long end)
+{
+	initrd_start = (unsigned long)__va(start);
+	initrd_end = (unsigned long)__va(end);
+	initrd_below_start_ok = 1;
+}
+#endif
+
 /*******
  *
  * New implementation of the OF "find" APIs, return a refcounted
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index e0f368ff8d12..40fce1c2f33b 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -510,6 +510,16 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	memstart_addr = min((u64)memstart_addr, base);
 }
 
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+		unsigned long end)
+{
+	initrd_start = (unsigned long)__va(start);
+	initrd_end = (unsigned long)__va(end);
+	initrd_below_start_ok = 1;
+}
+#endif
+
 static void __init early_reserve_mem(void)
 {
 	u64 base, size;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index f84152d112b0..9290ca5aa892 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -384,28 +384,23 @@ unsigned long __init unflatten_dt_node(unsigned long mem,
  */
 void __init early_init_dt_check_for_initrd(unsigned long node)
 {
-	unsigned long len;
+	unsigned long start, end, len;
 	u32 *prop;
 
 	pr_debug("Looking for initrd properties... ");
 
 	prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len);
-	if (prop) {
-		initrd_start = (unsigned long)
-				__va(of_read_ulong(prop, len/4));
-
-		prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len);
-		if (prop) {
-			initrd_end = (unsigned long)
-				__va(of_read_ulong(prop, len/4));
-			initrd_below_start_ok = 1;
-		} else {
-			initrd_start = 0;
-		}
-	}
+	if (!prop)
+		return;
+	start = of_read_ulong(prop, len/4);
+
+	prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len);
+	if (!prop)
+		return;
+	end = of_read_ulong(prop, len/4);
 
-	pr_debug("initrd_start=0x%lx  initrd_end=0x%lx\n",
-		 initrd_start, initrd_end);
+	early_init_dt_setup_initrd_arch(start, end);
+	pr_debug("initrd_start=0x%lx  initrd_end=0x%lx\n", start, end);
 }
 #else
 inline void early_init_dt_check_for_initrd(unsigned long node)
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index bf26bd5df9f1..f32f0fc5314a 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -80,6 +80,16 @@ extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
 extern u64 dt_mem_next_cell(int s, u32 **cellp);
 
+/*
+ * If BLK_DEV_INITRD, the fdt early init code will call this function,
+ * to be provided by the arch code. start and end are specified as
+ * physical addresses.
+ */
+#ifdef CONFIG_BLK_DEV_INITRD
+extern void early_init_dt_setup_initrd_arch(unsigned long start,
+					    unsigned long end);
+#endif
+
 /* Early flat tree scan hooks */
 extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data);
-- 
cgit v1.2.3


From 2e89e685a8fd0e8334de967739d11e2e28c1a4dd Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jeremy.kerr@canonical.com>
Date: Sat, 30 Jan 2010 01:41:49 -0700
Subject: of: use __be32 for cell value accessors

Currently, we're using u32 for cell values, and hence assuming
host-endian device trees.

As we'd like to support little-endian platforms, use a __be32 for cell
values, and convert in the cell accessors.

Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/fdt.c       | 4 ++--
 include/linux/of.h     | 8 +++++---
 include/linux/of_fdt.h | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 56fbd6e3122a..968a86af5301 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -431,9 +431,9 @@ int __init early_init_dt_scan_root(unsigned long node, const char *uname,
 	return 1;
 }
 
-u64 __init dt_mem_next_cell(int s, u32 **cellp)
+u64 __init dt_mem_next_cell(int s, __be32 **cellp)
 {
-	u32 *p = *cellp;
+	__be32 *p = *cellp;
 
 	*cellp = p + s;
 	return of_read_number(p, s);
diff --git a/include/linux/of.h b/include/linux/of.h
index fd47c81d7a25..fa5571b6e219 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -20,6 +20,8 @@
 #include <linux/kref.h>
 #include <linux/mod_devicetable.h>
 
+#include <asm/byteorder.h>
+
 typedef u32 phandle;
 typedef u32 ihandle;
 
@@ -95,16 +97,16 @@ extern void of_node_put(struct device_node *node);
  */
 
 /* Helper to read a big number; size is in cells (not bytes) */
-static inline u64 of_read_number(const u32 *cell, int size)
+static inline u64 of_read_number(const __be32 *cell, int size)
 {
 	u64 r = 0;
 	while (size--)
-		r = (r << 32) | *(cell++);
+		r = (r << 32) | be32_to_cpu(*(cell++));
 	return r;
 }
 
 /* Like of_read_number, but we want an unsigned long result */
-static inline unsigned long of_read_ulong(const u32 *cell, int size)
+static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 {
 	/* toss away upper bits if unsigned long is smaller than u64 */
 	return of_read_number(cell, size);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index f32f0fc5314a..6a35d91a53a6 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -78,7 +78,7 @@ extern void early_init_dt_check_for_initrd(unsigned long node);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
-extern u64 dt_mem_next_cell(int s, u32 **cellp);
+extern u64 dt_mem_next_cell(int s, __be32 **cellp);
 
 /*
  * If BLK_DEV_INITRD, the fdt early init code will call this function,
-- 
cgit v1.2.3


From 087f79c48c090a2c0cd9ee45231d63290d2036d2 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jeremy.kerr@canonical.com>
Date: Sat, 30 Jan 2010 04:14:19 -0700
Subject: of/flattree: endian-convert members of boot_param_header

The boot_param_header has big-endian fields, so change the types to
__be32, and perform endian conversion when we access them.

Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/kernel/prom.c |  2 +-
 drivers/of/fdt.c           | 16 ++++++++--------
 include/linux/of_fdt.h     | 20 ++++++++++----------
 3 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 40fce1c2f33b..43c78d74ddcb 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -98,7 +98,7 @@ static void __init move_device_tree(void)
 	DBG("-> move_device_tree\n");
 
 	start = __pa(initial_boot_params);
-	size = initial_boot_params->totalsize;
+	size = be32_to_cpu(initial_boot_params->totalsize);
 
 	if ((memory_limit && (start + size) > memory_limit) ||
 			overlaps_crashkernel(start, size)) {
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 5c5f03ef7f06..18d282fefe58 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -28,7 +28,7 @@ struct boot_param_header *initial_boot_params;
 char *find_flat_dt_string(u32 offset)
 {
 	return ((char *)initial_boot_params) +
-		initial_boot_params->off_dt_strings + offset;
+		be32_to_cpu(initial_boot_params->off_dt_strings) + offset;
 }
 
 /**
@@ -46,7 +46,7 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 			   void *data)
 {
 	unsigned long p = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
+		be32_to_cpu(initial_boot_params->off_dt_struct);
 	int rc = 0;
 	int depth = -1;
 
@@ -66,7 +66,7 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 		if (tag == OF_DT_PROP) {
 			u32 sz = be32_to_cpup((__be32 *)p);
 			p += 8;
-			if (initial_boot_params->version < 0x10)
+			if (be32_to_cpu(initial_boot_params->version) < 0x10)
 				p = _ALIGN(p, sz >= 8 ? 8 : 4);
 			p += sz;
 			p = _ALIGN(p, 4);
@@ -101,7 +101,7 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 unsigned long __init of_get_flat_dt_root(void)
 {
 	unsigned long p = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
+		be32_to_cpu(initial_boot_params->off_dt_struct);
 
 	while (be32_to_cpup((__be32 *)p) == OF_DT_NOP)
 		p += 4;
@@ -135,7 +135,7 @@ void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
 		sz = be32_to_cpup((__be32 *)p);
 		noff = be32_to_cpup((__be32 *)(p + 4));
 		p += 8;
-		if (initial_boot_params->version < 0x10)
+		if (be32_to_cpu(initial_boot_params->version) < 0x10)
 			p = _ALIGN(p, sz >= 8 ? 8 : 4);
 
 		nstr = find_flat_dt_string(noff);
@@ -296,7 +296,7 @@ unsigned long __init unflatten_dt_node(unsigned long mem,
 		sz = be32_to_cpup((__be32 *)(*p));
 		noff = be32_to_cpup((__be32 *)((*p) + 4));
 		*p += 8;
-		if (initial_boot_params->version < 0x10)
+		if (be32_to_cpu(initial_boot_params->version) < 0x10)
 			*p = _ALIGN(*p, sz >= 8 ? 8 : 4);
 
 		pname = find_flat_dt_string(noff);
@@ -544,7 +544,7 @@ void __init unflatten_device_tree(void)
 
 	/* First pass, scan for size */
 	start = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
+		be32_to_cpu(initial_boot_params->off_dt_struct);
 	size = unflatten_dt_node(0, &start, NULL, NULL, 0);
 	size = (size | 3) + 1;
 
@@ -560,7 +560,7 @@ void __init unflatten_device_tree(void)
 
 	/* Second pass, do actual unflattening */
 	start = ((unsigned long)initial_boot_params) +
-		initial_boot_params->off_dt_struct;
+		be32_to_cpu(initial_boot_params->off_dt_struct);
 	unflatten_dt_node(mem, &start, NULL, &allnextp, 0);
 	if (be32_to_cpup((__be32 *)start) != OF_DT_END)
 		pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start));
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 6a35d91a53a6..0007187ab59b 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -42,19 +42,19 @@
  * ends when size is 0
  */
 struct boot_param_header {
-	u32	magic;			/* magic word OF_DT_HEADER */
-	u32	totalsize;		/* total size of DT block */
-	u32	off_dt_struct;		/* offset to structure */
-	u32	off_dt_strings;		/* offset to strings */
-	u32	off_mem_rsvmap;		/* offset to memory reserve map */
-	u32	version;		/* format version */
-	u32	last_comp_version;	/* last compatible version */
+	__be32	magic;			/* magic word OF_DT_HEADER */
+	__be32	totalsize;		/* total size of DT block */
+	__be32	off_dt_struct;		/* offset to structure */
+	__be32	off_dt_strings;		/* offset to strings */
+	__be32	off_mem_rsvmap;		/* offset to memory reserve map */
+	__be32	version;		/* format version */
+	__be32	last_comp_version;	/* last compatible version */
 	/* version 2 fields below */
-	u32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
+	__be32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
 	/* version 3 fields below */
-	u32	dt_strings_size;	/* size of the DT strings block */
+	__be32	dt_strings_size;	/* size of the DT strings block */
 	/* version 17 fields below */
-	u32	dt_struct_size;		/* size of the DT structure block */
+	__be32	dt_struct_size;		/* size of the DT structure block */
 };
 
 /* TBD: Temporary export of fdt globals - remove when code fully merged */
-- 
cgit v1.2.3


From b9efa1b27e25b1286504973c0a6bf0f24106faa8 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 20 Jan 2010 16:06:27 -0500
Subject: nfs41: implement cb_recall_slot

Drain the fore channel and reset the max_slots to the new value.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.h         |  8 ++++++++
 fs/nfs/callback_proc.c    | 32 ++++++++++++++++++++++++++++++++
 fs/nfs/callback_xdr.c     | 22 +++++++++++++++++++++-
 fs/nfs/nfs4_fs.h          |  2 ++
 fs/nfs/nfs4state.c        | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs_sb.h |  2 ++
 6 files changed, 109 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index d4036be0b589..85a7cfd1b8dd 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -119,6 +119,14 @@ struct cb_recallanyargs {
 };
 
 extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
+
+struct cb_recallslotargs {
+	struct sockaddr	*crsa_addr;
+	uint32_t	crsa_target_max_slots;
+};
+extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+					  void *dummy);
+
 #endif /* CONFIG_NFS_V4_1 */
 
 extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 4062f7690a33..e5155d9df595 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -361,4 +361,36 @@ out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
 }
+
+/* Reduce the fore channel's max_slots to the target value */
+unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
+{
+	struct nfs_client *clp;
+	struct nfs4_slot_table *fc_tbl;
+	int status;
+
+	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+	clp = nfs_find_client(args->crsa_addr, 4);
+	if (clp == NULL)
+		goto out;
+
+	dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
+		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+		args->crsa_target_max_slots);
+
+	fc_tbl = &clp->cl_session->fc_slot_table;
+
+	status = htonl(NFS4ERR_BAD_HIGH_SLOT);
+	if (args->crsa_target_max_slots >= fc_tbl->max_slots ||
+	    args->crsa_target_max_slots < 1)
+		goto out;
+
+	fc_tbl->target_max_slots = args->crsa_target_max_slots;
+	nfs41_handle_recall_slot(clp);
+	status = htonl(NFS4_OK);
+	nfs_put_client(clp);	/* balance nfs_find_client */
+out:
+	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 08b430d922c4..8e66e20b59fd 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -24,6 +24,7 @@
 #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
 					4 + 1 + 3)
 #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+#define CB_OP_RECALLSLOT_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 #endif /* CONFIG_NFS_V4_1 */
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -349,6 +350,20 @@ static unsigned decode_recallany_args(struct svc_rqst *rqstp,
 	return 0;
 }
 
+static unsigned decode_recallslot_args(struct svc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					struct cb_recallslotargs *args)
+{
+	__be32 *p;
+
+	args->crsa_addr = svc_addr(rqstp);
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_BADXDR);
+	args->crsa_target_max_slots = ntohl(*p++);
+	return 0;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
@@ -557,6 +572,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	case OP_CB_RECALL:
 	case OP_CB_SEQUENCE:
 	case OP_CB_RECALL_ANY:
+	case OP_CB_RECALL_SLOT:
 		*op = &callback_ops[op_nr];
 		break;
 
@@ -565,7 +581,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	case OP_CB_NOTIFY:
 	case OP_CB_PUSH_DELEG:
 	case OP_CB_RECALLABLE_OBJ_AVAIL:
-	case OP_CB_RECALL_SLOT:
 	case OP_CB_WANTS_CANCELLED:
 	case OP_CB_NOTIFY_LOCK:
 		return htonl(NFS4ERR_NOTSUPP);
@@ -734,6 +749,11 @@ static struct callback_op callback_ops[] = {
 		.decode_args = (callback_decode_arg_t)decode_recallany_args,
 		.res_maxsize = CB_OP_RECALLANY_RES_MAXSZ,
 	},
+	[OP_CB_RECALL_SLOT] = {
+		.process_op = (callback_process_op_t)nfs4_callback_recallslot,
+		.decode_args = (callback_decode_arg_t)decode_recallslot_args,
+		.res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ,
+	},
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 0c6fda33d66e..a187200a7aac 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -46,6 +46,7 @@ enum nfs4_client_state {
 	NFS4CLNT_DELEGRETURN,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_SESSION_DRAINING,
+	NFS4CLNT_RECALL_SLOT,
 };
 
 /*
@@ -280,6 +281,7 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
 extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
+extern void nfs41_handle_recall_slot(struct nfs_client *clp);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8406cacd3240..9164758c1ace 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1249,6 +1249,12 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
 }
 
 #ifdef CONFIG_NFS_V4_1
+void nfs41_handle_recall_slot(struct nfs_client *clp)
+{
+	set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+	nfs4_schedule_state_recovery(clp);
+}
+
 void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
 {
 	if (!flags)
@@ -1299,9 +1305,38 @@ out:
 	return status;
 }
 
+static int nfs4_recall_slot(struct nfs_client *clp)
+{
+	struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table;
+	struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs;
+	struct nfs4_slot *new, *old;
+	int i;
+
+	nfs4_begin_drain_session(clp);
+	new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
+		      GFP_KERNEL);
+        if (!new)
+		return -ENOMEM;
+
+	spin_lock(&fc_tbl->slot_tbl_lock);
+	for (i = 0; i < fc_tbl->target_max_slots; i++)
+		new[i].seq_nr = fc_tbl->slots[i].seq_nr;
+	old = fc_tbl->slots;
+	fc_tbl->slots = new;
+	fc_tbl->max_slots = fc_tbl->target_max_slots;
+	fc_tbl->target_max_slots = 0;
+	fc_attrs->max_reqs = fc_tbl->max_slots;
+	spin_unlock(&fc_tbl->slot_tbl_lock);
+
+	kfree(old);
+	nfs4_end_drain_session(clp);
+	return 0;
+}
+
 #else /* CONFIG_NFS_V4_1 */
 static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
 static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
+static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
 #endif /* CONFIG_NFS_V4_1 */
 
 /* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1398,6 +1433,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			nfs_client_return_marked_delegations(clp);
 			continue;
 		}
+		/* Recall session slots */
+		if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)
+		   && nfs4_has_session(clp)) {
+			status = nfs4_recall_slot(clp);
+			if (status < 0)
+				goto out_error;
+			continue;
+		}
+
 
 		nfs4_clear_state_manager_bit(clp);
 		/* Did we race with an attempt to give us more work? */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 34fc6be5bfcf..ecd9e6c74d06 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -193,6 +193,8 @@ struct nfs4_slot_table {
 	int		max_slots;		/* # slots in table */
 	int		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
+	int		target_max_slots;	/* Set by CB_RECALL_SLOT as
+						 * the new max_slots */
 };
 
 static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
-- 
cgit v1.2.3


From ba17686f62db88f6a591121e768a0c83a2a2647d Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Tue, 26 Jan 2010 21:24:04 -0500
Subject: nfs41 do not allocate unused back channel pages

Signed-off-by: Andy Adamson <andros@netapp.com>
[Trond.Myklebust@netapp.com: moved definition of svc_is_backchannel()
 into include/linux/sunrpc/bc_xprt.h.]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/bc_xprt.h | 15 +++++++++++++++
 net/sunrpc/svc.c               |  4 ++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 6508f0dc0eff..d7152b451e21 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -38,12 +38,27 @@ int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs);
 void bc_release_request(struct rpc_task *);
 int bc_send(struct rpc_rqst *req);
+
+/*
+ * Determine if a shared backchannel is in use
+ */
+static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
+{
+	if (rqstp->rq_server->bc_xprt)
+		return 1;
+	return 0;
+}
 #else /* CONFIG_NFS_V4_1 */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 					 unsigned int min_reqs)
 {
 	return 0;
 }
+
+static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* _LINUX_SUNRPC_BC_XPRT_H */
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 538ca433a56c..6dcf8c9c784c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -506,6 +506,10 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
 {
 	unsigned int pages, arghi;
 
+	/* bc_xprt uses fore channel allocated buffers */
+	if (svc_is_backchannel(rqstp))
+		return 1;
+
 	pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
 				       * We assume one is at most one page
 				       */
-- 
cgit v1.2.3


From 90a006abf8015c8cab893555244d8fc673b24839 Mon Sep 17 00:00:00 2001
From: Michael Poole <mdpoole@troilus.org>
Date: Sun, 24 Jan 2010 22:32:29 -0500
Subject: HID: Export hid_register_report

The Apple Magic Mouse (and probably other devices) publish reports that are not
called out in their HID report descriptors -- they only send them when enabled
through other writes to the device.  This allows a driver to handle these
unlisted reports.

Signed-off-by: Michael Poole <mdpoole@troilus.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 3 ++-
 include/linux/hid.h    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f7f80e1f1eef..66a91eb3e4c4 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(hid_debug);
  * Register a new report for a device.
  */
 
-static struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id)
+struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id)
 {
 	struct hid_report_enum *report_enum = device->report_enum + type;
 	struct hid_report *report;
@@ -75,6 +75,7 @@ static struct hid_report *hid_register_report(struct hid_device *device, unsigne
 
 	return report;
 }
+EXPORT_SYMBOL_GPL(hid_register_report);
 
 /*
  * Register a new field for this report.
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 3661a626941d..456838c8884e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -690,6 +690,7 @@ int hid_input_report(struct hid_device *, int type, u8 *, int, int);
 int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field);
 void hid_output_report(struct hid_report *report, __u8 *data);
 struct hid_device *hid_allocate_device(void);
+struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
 int hid_check_keys_pressed(struct hid_device *hid);
 int hid_connect(struct hid_device *hid, unsigned int connect_mask);
-- 
cgit v1.2.3


From 0a02604628c49037e1de2091d75462fd856b26ed Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 10 Feb 2010 15:00:32 +0100
Subject: netfilter: xtables: consistent struct compat_xt_counters definition

There is compat_u64 type which deals with different u64 type alignment
on different compat-capable platforms, so use it and removed some
hardcoded assumptions.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 365fabe1b16e..8b6c0e7d2657 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -562,11 +562,7 @@ struct compat_xt_entry_target {
  * current task alignment */
 
 struct compat_xt_counters {
-#if defined(CONFIG_X86_64) || defined(CONFIG_IA64)
-	u_int32_t cnt[4];
-#else
-	u_int64_t cnt[2];
-#endif
+	compat_u64 pcnt, bcnt;			/* Packet and byte counters */
 };
 
 struct compat_xt_counters_info {
-- 
cgit v1.2.3


From 42107f5009da223daa800d6da6904d77297ae829 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 10 Feb 2010 15:03:27 +0100
Subject: netfilter: xtables: symmetric COMPAT_XT_ALIGN definition

Rewrite COMPAT_XT_ALIGN in terms of dummy structure hack.
Compat counters logically have nothing to do with it.
Use ALIGN() macro while I'm at it for same types.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 13 +++++++++----
 net/netfilter/x_tables.c           |  4 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 8b6c0e7d2657..9d671ebf0605 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -93,8 +93,7 @@ struct _xt_align {
 	__u64 u64;
 };
 
-#define XT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) 	\
-			& ~(__alignof__(struct _xt_align)-1))
+#define XT_ALIGN(s) ALIGN((s), __alignof__(struct _xt_align))
 
 /* Standard return verdict, or do jump. */
 #define XT_STANDARD_TARGET ""
@@ -571,8 +570,14 @@ struct compat_xt_counters_info {
 	struct compat_xt_counters counters[0];
 };
 
-#define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \
-		& ~(__alignof__(struct compat_xt_counters)-1))
+struct _compat_xt_align {
+	__u8 u8;
+	__u16 u16;
+	__u32 u32;
+	compat_u64 u64;
+};
+
+#define COMPAT_XT_ALIGN(s) ALIGN((s), __alignof__(struct _compat_xt_align))
 
 extern void xt_compat_lock(u_int8_t af);
 extern void xt_compat_unlock(u_int8_t af);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f01955cce314..5c564ff10a3b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -364,7 +364,7 @@ int xt_check_match(struct xt_mtchk_param *par,
 		 * ebt_among is exempt from centralized matchsize checking
 		 * because it uses a dynamic-size data set.
 		 */
-		pr_err("%s_tables: %s match: invalid size %Zu != %u\n",
+		pr_err("%s_tables: %s match: invalid size %u != %u\n",
 		       xt_prefix[par->family], par->match->name,
 		       XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
@@ -514,7 +514,7 @@ int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
 	if (XT_ALIGN(par->target->targetsize) != size) {
-		pr_err("%s_tables: %s target: invalid size %Zu != %u\n",
+		pr_err("%s_tables: %s target: invalid size %u != %u\n",
 		       xt_prefix[par->family], par->target->name,
 		       XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
-- 
cgit v1.2.3


From eeb5b4ae81f4a750355fa0c15f4fea22fdf83be1 Mon Sep 17 00:00:00 2001
From: Kevin Dankwardt <k@kcomputing.com>
Date: Wed, 10 Feb 2010 23:43:40 +0900
Subject: fat: Fix stat->f_namelen

I found that the length of a file name when created cannot exceed 255
characters, yet, pathconf(), via statfs(), returns the maximum as 260.

Signed-off-by: Kevin Dankwardt <k@kcomputing.com>
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
---
 fs/fat/inode.c           | 2 +-
 fs/fat/namei_vfat.c      | 6 +++---
 include/linux/msdos_fs.h | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 14da530b05ca..d0a504c8feef 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -558,7 +558,7 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_bavail = sbi->free_clusters;
 	buf->f_fsid.val[0] = (u32)id;
 	buf->f_fsid.val[1] = (u32)(id >> 32);
-	buf->f_namelen = sbi->options.isvfat ? 260 : 12;
+	buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12;
 
 	return 0;
 }
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 411c192a05fa..c1ef50154868 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -502,14 +502,14 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
 		*outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
 		if (*outlen < 0)
 			return *outlen;
-		else if (*outlen > 255)
+		else if (*outlen > FAT_LFN_LEN)
 			return -ENAMETOOLONG;
 
 		op = &outname[*outlen * sizeof(wchar_t)];
 	} else {
 		if (nls) {
 			for (i = 0, ip = name, op = outname, *outlen = 0;
-			     i < len && *outlen <= 255;
+			     i < len && *outlen <= FAT_LFN_LEN;
 			     *outlen += 1)
 			{
 				if (escape && (*ip == ':')) {
@@ -549,7 +549,7 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
 				return -ENAMETOOLONG;
 		} else {
 			for (i = 0, ip = name, op = outname, *outlen = 0;
-			     i < len && *outlen <= 255;
+			     i < len && *outlen <= FAT_LFN_LEN;
 			     i++, *outlen += 1)
 			{
 				*op++ = *ip++;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index ce38f1caa5e1..34066e65fdeb 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -15,6 +15,7 @@
 #define MSDOS_DPB_BITS	4		/* log2(MSDOS_DPB) */
 #define MSDOS_DPS	(SECTOR_SIZE / sizeof(struct msdos_dir_entry))
 #define MSDOS_DPS_BITS	4		/* log2(MSDOS_DPS) */
+#define MSDOS_LONGNAME	256		/* maximum name length */
 #define CF_LE_W(v)	le16_to_cpu(v)
 #define CF_LE_L(v)	le32_to_cpu(v)
 #define CT_LE_W(v)	cpu_to_le16(v)
@@ -47,8 +48,8 @@
 #define DELETED_FLAG	0xe5	/* marks file as deleted when in name[0] */
 #define IS_FREE(n)	(!*(n) || *(n) == DELETED_FLAG)
 
+#define FAT_LFN_LEN	255	/* maximum long name length */
 #define MSDOS_NAME	11	/* maximum name length */
-#define MSDOS_LONGNAME	256	/* maximum name length */
 #define MSDOS_SLOTS	21	/* max # of slots for short and long names */
 #define MSDOS_DOT	".          "	/* ".", padded to MSDOS_NAME chars */
 #define MSDOS_DOTDOT	"..         "	/* "..", padded to MSDOS_NAME chars */
-- 
cgit v1.2.3


From 2b95efe7f6bb750256a702cc32d33b0cb2cd8223 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 17 Jun 2009 13:57:48 +0200
Subject: netfilter: xtables: use xt_table for hook instantiation

The respective xt_table structures already have most of the metadata
needed for hook setup. Add a 'priority' field to struct xt_table so
that xt_hook_link() can be called with a reduced number of arguments.

So should we be having more tables in the future, it comes at no
static cost (only runtime, as before) - space saved:
6807373->6806555.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h     |  4 +++
 net/ipv4/netfilter/arptable_filter.c   | 33 +++++----------------
 net/ipv4/netfilter/iptable_filter.c    | 33 +++++----------------
 net/ipv4/netfilter/iptable_mangle.c    | 47 +++++------------------------
 net/ipv4/netfilter/iptable_raw.c       | 27 +++++------------
 net/ipv4/netfilter/iptable_security.c  | 33 +++++----------------
 net/ipv6/netfilter/ip6table_filter.c   | 33 +++++----------------
 net/ipv6/netfilter/ip6table_mangle.c   | 48 +++++-------------------------
 net/ipv6/netfilter/ip6table_raw.c      | 26 +++++-----------
 net/ipv6/netfilter/ip6table_security.c | 33 +++++----------------
 net/netfilter/x_tables.c               | 54 ++++++++++++++++++++++++++++++++++
 11 files changed, 121 insertions(+), 250 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 365fabe1b16e..fdd3342c1235 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -361,6 +361,7 @@ struct xt_table {
 	struct module *me;
 
 	u_int8_t af;		/* address/protocol family */
+	int priority;		/* hook order */
 
 	/* A unique name... */
 	const char name[XT_TABLE_MAXNAMELEN];
@@ -522,6 +523,9 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 	return ret;
 }
 
+extern struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
+extern void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
+
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index deeda9b2cf05..b361de0dac4c 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -50,6 +50,7 @@ static const struct xt_table packet_filter = {
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_ARP,
+	.priority	= NF_IP_PRI_FILTER,
 };
 
 /* The work comes in here from netfilter.c */
@@ -63,29 +64,7 @@ arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
 	return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
 }
 
-static struct nf_hook_ops arpt_ops[] __read_mostly = {
-	{
-		.hook		= arptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_ARP,
-		.hooknum	= NF_ARP_IN,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-	{
-		.hook		= arptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_ARP,
-		.hooknum	= NF_ARP_OUT,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-	{
-		.hook		= arptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_ARP,
-		.hooknum	= NF_ARP_FORWARD,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-};
+static struct nf_hook_ops *arpfilter_ops __read_mostly;
 
 static int __net_init arptable_filter_net_init(struct net *net)
 {
@@ -115,9 +94,11 @@ static int __init arptable_filter_init(void)
 	if (ret < 0)
 		return ret;
 
-	ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
-	if (ret < 0)
+	arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
+	if (IS_ERR(arpfilter_ops)) {
+		ret = PTR_ERR(arpfilter_ops);
 		goto cleanup_table;
+	}
 	return ret;
 
 cleanup_table:
@@ -127,7 +108,7 @@ cleanup_table:
 
 static void __exit arptable_filter_fini(void)
 {
-	nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
+	xt_hook_unlink(&packet_filter, arpfilter_ops);
 	unregister_pernet_subsys(&arptable_filter_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 1bfeaae6f624..c14bb85db1d9 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -58,6 +58,7 @@ static const struct xt_table packet_filter = {
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
+	.priority	= NF_IP_PRI_FILTER,
 };
 
 static unsigned int
@@ -77,29 +78,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
 	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
 }
 
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
-	{
-		.hook		= iptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-	{
-		.hook		= iptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-	{
-		.hook		= iptable_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_FILTER,
-	},
-};
+static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
 static int forward = NF_ACCEPT;
@@ -142,9 +121,11 @@ static int __init iptable_filter_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	if (ret < 0)
+	filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
+	if (IS_ERR(filter_ops)) {
+		ret = PTR_ERR(filter_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -155,7 +136,7 @@ static int __init iptable_filter_init(void)
 
 static void __exit iptable_filter_fini(void)
 {
-	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	xt_hook_unlink(&packet_filter, filter_ops);
 	unregister_pernet_subsys(&iptable_filter_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4e699cd275c6..2355a229f8ee 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -69,6 +69,7 @@ static const struct xt_table packet_mangler = {
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
+	.priority	= NF_IP_PRI_MANGLE,
 };
 
 static unsigned int
@@ -129,43 +130,7 @@ iptable_mangle_hook(unsigned int hook,
 			    dev_net(in)->ipv4.iptable_mangle);
 }
 
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-	{
-		.hook		= iptable_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_MANGLE,
-	},
-};
+static struct nf_hook_ops *mangle_ops __read_mostly;
 
 static int __net_init iptable_mangle_net_init(struct net *net)
 {
@@ -196,9 +161,11 @@ static int __init iptable_mangle_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	if (ret < 0)
+	mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
+	if (IS_ERR(mangle_ops)) {
+		ret = PTR_ERR(mangle_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -209,7 +176,7 @@ static int __init iptable_mangle_init(void)
 
 static void __exit iptable_mangle_fini(void)
 {
-	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	xt_hook_unlink(&packet_mangler, mangle_ops);
 	unregister_pernet_subsys(&iptable_mangle_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index d16e43777c31..62a99154f14c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -41,6 +41,7 @@ static const struct xt_table packet_raw = {
 	.valid_hooks =  RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV4,
+	.priority = NF_IP_PRI_RAW,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -61,23 +62,7 @@ iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
 	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
 }
 
-/* 'raw' is the very first table. */
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
-	{
-		.hook = iptable_raw_hook,
-		.pf = NFPROTO_IPV4,
-		.hooknum = NF_INET_PRE_ROUTING,
-		.priority = NF_IP_PRI_RAW,
-		.owner = THIS_MODULE,
-	},
-	{
-		.hook = iptable_raw_hook,
-		.pf = NFPROTO_IPV4,
-		.hooknum = NF_INET_LOCAL_OUT,
-		.priority = NF_IP_PRI_RAW,
-		.owner = THIS_MODULE,
-	},
-};
+static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int __net_init iptable_raw_net_init(struct net *net)
 {
@@ -108,9 +93,11 @@ static int __init iptable_raw_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	if (ret < 0)
+	rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
+	if (IS_ERR(rawtable_ops)) {
+		ret = PTR_ERR(rawtable_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -121,7 +108,7 @@ static int __init iptable_raw_init(void)
 
 static void __exit iptable_raw_fini(void)
 {
-	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	xt_hook_unlink(&packet_raw, rawtable_ops);
 	unregister_pernet_subsys(&iptable_raw_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 324505aaaa73..b1bf3ca2c6c7 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -62,6 +62,7 @@ static const struct xt_table security_table = {
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV4,
+	.priority	= NF_IP_PRI_SECURITY,
 };
 
 static unsigned int
@@ -82,29 +83,7 @@ iptable_security_hook(unsigned int hook, struct sk_buff *skb,
 	return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
 }
 
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
-	{
-		.hook		= iptable_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_SECURITY,
-	},
-	{
-		.hook		= iptable_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP_PRI_SECURITY,
-	},
-	{
-		.hook		= iptable_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_SECURITY,
-	},
-};
+static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int __net_init iptable_security_net_init(struct net *net)
 {
@@ -135,9 +114,11 @@ static int __init iptable_security_init(void)
         if (ret < 0)
 		return ret;
 
-	ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-	if (ret < 0)
+	sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
+	if (IS_ERR(sectbl_ops)) {
+		ret = PTR_ERR(sectbl_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -148,7 +129,7 @@ cleanup_table:
 
 static void __exit iptable_security_fini(void)
 {
-	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+	xt_hook_unlink(&security_table, sectbl_ops);
 	unregister_pernet_subsys(&iptable_security_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 866f34ae236b..6e95d0614ca9 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -56,6 +56,7 @@ static const struct xt_table packet_filter = {
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_FILTER,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -69,29 +70,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
 	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
 }
 
-static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-	{
-		.hook		= ip6table_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-	{
-		.hook		= ip6table_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-	{
-		.hook		= ip6table_filter_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_FILTER,
-	},
-};
+static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
 static int forward = NF_ACCEPT;
@@ -134,9 +113,11 @@ static int __init ip6table_filter_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	if (ret < 0)
+	filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook);
+	if (IS_ERR(filter_ops)) {
+		ret = PTR_ERR(filter_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -147,7 +128,7 @@ static int __init ip6table_filter_init(void)
 
 static void __exit ip6table_filter_fini(void)
 {
-	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	xt_hook_unlink(&packet_filter, filter_ops);
 	unregister_pernet_subsys(&ip6table_filter_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 405ac1f76390..5023ac52ffec 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -62,6 +62,7 @@ static const struct xt_table packet_mangler = {
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_MANGLE,
 };
 
 static unsigned int
@@ -122,44 +123,7 @@ ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
 			     dev_net(in)->ipv6.ip6table_mangle);
 }
 
-static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-	{
-		.hook		= ip6table_mangle_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP6_PRI_MANGLE,
-	},
-};
-
+static struct nf_hook_ops *mangle_ops __read_mostly;
 static int __net_init ip6table_mangle_net_init(struct net *net)
 {
 	/* Register table */
@@ -189,9 +153,11 @@ static int __init ip6table_mangle_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	if (ret < 0)
+	mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook);
+	if (IS_ERR(mangle_ops)) {
+		ret = PTR_ERR(mangle_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -202,7 +168,7 @@ static int __init ip6table_mangle_init(void)
 
 static void __exit ip6table_mangle_fini(void)
 {
-	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	xt_hook_unlink(&packet_mangler, mangle_ops);
 	unregister_pernet_subsys(&ip6table_mangle_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5451a36fbc21..3bfa69511641 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,6 +40,7 @@ static const struct xt_table packet_raw = {
 	.valid_hooks = RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV6,
+	.priority = NF_IP6_PRI_FIRST,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -53,22 +54,7 @@ ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
 	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
 }
 
-static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-	{
-	  .hook = ip6table_raw_hook,
-	  .pf = NFPROTO_IPV6,
-	  .hooknum = NF_INET_PRE_ROUTING,
-	  .priority = NF_IP6_PRI_FIRST,
-	  .owner = THIS_MODULE,
-	},
-	{
-	  .hook = ip6table_raw_hook,
-	  .pf = NFPROTO_IPV6,
-	  .hooknum = NF_INET_LOCAL_OUT,
-	  .priority = NF_IP6_PRI_FIRST,
-	  .owner = THIS_MODULE,
-	},
-};
+static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int __net_init ip6table_raw_net_init(struct net *net)
 {
@@ -99,9 +85,11 @@ static int __init ip6table_raw_init(void)
 		return ret;
 
 	/* Register hooks */
-	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	if (ret < 0)
+	rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook);
+	if (IS_ERR(rawtable_ops)) {
+		ret = PTR_ERR(rawtable_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -112,7 +100,7 @@ static int __init ip6table_raw_init(void)
 
 static void __exit ip6table_raw_fini(void)
 {
-	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	xt_hook_unlink(&packet_raw, rawtable_ops);
 	unregister_pernet_subsys(&ip6table_raw_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 841ea77f5218..dd2200f17a6c 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -61,6 +61,7 @@ static const struct xt_table security_table = {
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
 	.af		= NFPROTO_IPV6,
+	.priority	= NF_IP6_PRI_SECURITY,
 };
 
 static unsigned int
@@ -74,29 +75,7 @@ ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
 	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
 }
 
-static struct nf_hook_ops ip6t_ops[] __read_mostly = {
-	{
-		.hook		= ip6table_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_SECURITY,
-	},
-	{
-		.hook		= ip6table_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_FORWARD,
-		.priority	= NF_IP6_PRI_SECURITY,
-	},
-	{
-		.hook		= ip6table_security_hook,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_SECURITY,
-	},
-};
+static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int __net_init ip6table_security_net_init(struct net *net)
 {
@@ -127,9 +106,11 @@ static int __init ip6table_security_init(void)
 	if (ret < 0)
 		return ret;
 
-	ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
-	if (ret < 0)
+	sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook);
+	if (IS_ERR(sectbl_ops)) {
+		ret = PTR_ERR(sectbl_ops);
 		goto cleanup_table;
+	}
 
 	return ret;
 
@@ -140,7 +121,7 @@ cleanup_table:
 
 static void __exit ip6table_security_fini(void)
 {
-	nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+	xt_hook_unlink(&security_table, sectbl_ops);
 	unregister_pernet_subsys(&ip6table_security_net_ops);
 }
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f01955cce314..b51cb0d7234a 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1091,6 +1091,60 @@ static const struct file_operations xt_target_ops = {
 
 #endif /* CONFIG_PROC_FS */
 
+/**
+ * xt_hook_link - set up hooks for a new table
+ * @table:	table with metadata needed to set up hooks
+ * @fn:		Hook function
+ *
+ * This function will take care of creating and registering the necessary
+ * Netfilter hooks for XT tables.
+ */
+struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
+{
+	unsigned int hook_mask = table->valid_hooks;
+	uint8_t i, num_hooks = hweight32(hook_mask);
+	uint8_t hooknum;
+	struct nf_hook_ops *ops;
+	int ret;
+
+	ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
+	if (ops == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
+	     hook_mask >>= 1, ++hooknum) {
+		if (!(hook_mask & 1))
+			continue;
+		ops[i].hook     = fn;
+		ops[i].owner    = table->me;
+		ops[i].pf       = table->af;
+		ops[i].hooknum  = hooknum;
+		ops[i].priority = table->priority;
+		++i;
+	}
+
+	ret = nf_register_hooks(ops, num_hooks);
+	if (ret < 0) {
+		kfree(ops);
+		return ERR_PTR(ret);
+	}
+
+	return ops;
+}
+EXPORT_SYMBOL_GPL(xt_hook_link);
+
+/**
+ * xt_hook_unlink - remove hooks for a table
+ * @ops:	nf_hook_ops array as returned by nf_hook_link
+ * @hook_mask:	the very same mask that was passed to nf_hook_link
+ */
+void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
+{
+	nf_unregister_hooks(ops, hweight32(table->valid_hooks));
+	kfree(ops);
+}
+EXPORT_SYMBOL_GPL(xt_hook_unlink);
+
 int xt_proto_init(struct net *net, u_int8_t af)
 {
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From e3eaa9910b380530cfd2c0670fcd3f627674da8a Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 17 Jun 2009 22:14:54 +0200
Subject: netfilter: xtables: generate initial table on-demand

The static initial tables are pretty large, and after the net
namespace has been instantiated, they just hang around for nothing.
This commit removes them and creates tables on-demand at runtime when
needed.

Size shrinks by 7735 bytes (x86_64).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter_arp/arp_tables.h  |  1 +
 include/linux/netfilter_ipv4/ip_tables.h  |  1 +
 include/linux/netfilter_ipv6/ip6_tables.h |  1 +
 net/ipv4/netfilter/arp_tables.c           |  7 +++++
 net/ipv4/netfilter/arptable_filter.c      | 40 ++++++---------------------
 net/ipv4/netfilter/ip_tables.c            |  7 +++++
 net/ipv4/netfilter/iptable_filter.c       | 46 ++++++++-----------------------
 net/ipv4/netfilter/iptable_mangle.c       | 46 +++++--------------------------
 net/ipv4/netfilter/iptable_raw.c          | 36 +++++-------------------
 net/ipv4/netfilter/iptable_security.c     | 39 +++++---------------------
 net/ipv4/netfilter/nf_nat_rule.c          | 39 +++++---------------------
 net/ipv6/netfilter/ip6_tables.c           |  7 +++++
 net/ipv6/netfilter/ip6table_filter.c      | 46 ++++++++-----------------------
 net/ipv6/netfilter/ip6table_mangle.c      | 45 +++++-------------------------
 net/ipv6/netfilter/ip6table_raw.c         | 36 +++++-------------------
 net/ipv6/netfilter/ip6table_security.c    | 39 +++++---------------------
 net/netfilter/x_tables.c                  |  4 ++-
 net/netfilter/xt_repldata.h               | 35 +++++++++++++++++++++++
 18 files changed, 141 insertions(+), 334 deletions(-)
 create mode 100644 net/netfilter/xt_repldata.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index f2336523a9df..0b33980611b2 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -258,6 +258,7 @@ struct arpt_error {
 	.target.errorname = "ERROR",					       \
 }
 
+extern void *arpt_alloc_initial_table(const struct xt_table *);
 extern struct xt_table *arpt_register_table(struct net *net,
 					    const struct xt_table *table,
 					    const struct arpt_replace *repl);
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 8d1f273d350b..364973b42133 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -282,6 +282,7 @@ struct ipt_error {
 	.target.errorname = "ERROR",					       \
 }
 
+extern void *ipt_alloc_initial_table(const struct xt_table *);
 extern unsigned int ipt_do_table(struct sk_buff *skb,
 				 unsigned int hook,
 				 const struct net_device *in,
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index d2952d2fa658..8031eb486a10 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -297,6 +297,7 @@ ip6t_get_target(struct ip6t_entry *e)
 #include <linux/init.h>
 extern void ip6t_init(void) __init;
 
+extern void *ip6t_alloc_initial_table(const struct xt_table *);
 extern struct xt_table *ip6t_register_table(struct net *net,
 					    const struct xt_table *table,
 					    const struct ip6t_replace *repl);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 90203e1b9187..72723ea1054b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -27,6 +27,7 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
+#include "../../netfilter/xt_repldata.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
@@ -58,6 +59,12 @@ do {								\
 #define ARP_NF_ASSERT(x)
 #endif
 
+void *arpt_alloc_initial_table(const struct xt_table *info)
+{
+	return xt_alloc_initial_table(arpt, ARPT);
+}
+EXPORT_SYMBOL_GPL(arpt_alloc_initial_table);
+
 static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 				      const char *hdr_addr, int len)
 {
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index b361de0dac4c..bfe26f32b930 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp/arp_tables.h>
 
 MODULE_LICENSE("GPL");
@@ -15,36 +16,6 @@ MODULE_DESCRIPTION("arptables filter table");
 #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
 			   (1 << NF_ARP_FORWARD))
 
-static const struct
-{
-	struct arpt_replace repl;
-	struct arpt_standard entries[3];
-	struct arpt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "filter",
-		.valid_hooks = FILTER_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error),
-		.hook_entry = {
-			[NF_ARP_IN] = 0,
-			[NF_ARP_OUT] = sizeof(struct arpt_standard),
-			[NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
-		},
-		.underflow = {
-			[NF_ARP_IN] = 0,
-			[NF_ARP_OUT] = sizeof(struct arpt_standard),
-			[NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
-		},
-	},
-	.entries = {
-		ARPT_STANDARD_INIT(NF_ACCEPT),	/* ARP_IN */
-		ARPT_STANDARD_INIT(NF_ACCEPT),	/* ARP_OUT */
-		ARPT_STANDARD_INIT(NF_ACCEPT),	/* ARP_FORWARD */
-	},
-	.term = ARPT_ERROR_INIT,
-};
-
 static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
@@ -68,9 +39,14 @@ static struct nf_hook_ops *arpfilter_ops __read_mostly;
 
 static int __net_init arptable_filter_net_init(struct net *net)
 {
-	/* Register table */
+	struct arpt_replace *repl;
+	
+	repl = arpt_alloc_initial_table(&packet_filter);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv4.arptable_filter =
-		arpt_register_table(net, &packet_filter, &initial_table.repl);
+		arpt_register_table(net, &packet_filter, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.arptable_filter))
 		return PTR_ERR(net->ipv4.arptable_filter);
 	return 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 5bf7de1527a5..2057b1bb6178 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -28,6 +28,7 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -66,6 +67,12 @@ do {								\
 #define inline
 #endif
 
+void *ipt_alloc_initial_table(const struct xt_table *info)
+{
+	return xt_alloc_initial_table(ipt, IPT);
+}
+EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
+
 /*
    We keep a set of rules for each CPU, so we can avoid write-locking
    them in the softirq when updating the counters and therefore
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c14bb85db1d9..c8dc9800d620 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -23,36 +23,6 @@ MODULE_DESCRIPTION("iptables filter table");
 			    (1 << NF_INET_FORWARD) | \
 			    (1 << NF_INET_LOCAL_OUT))
 
-static struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[3];
-	struct ipt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "filter",
-		.valid_hooks = FILTER_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_LOCAL_IN] = 0,
-			[NF_INET_FORWARD] = sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
-		},
-		.underflow = {
-			[NF_INET_LOCAL_IN] = 0,
-			[NF_INET_FORWARD] = sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
@@ -86,9 +56,18 @@ module_param(forward, bool, 0000);
 
 static int __net_init iptable_filter_net_init(struct net *net)
 {
-	/* Register table */
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&packet_filter);
+	if (repl == NULL)
+		return -ENOMEM;
+	/* Entry 1 is the FORWARD hook */
+	((struct ipt_standard *)repl->entries)[1].target.verdict =
+		-forward - 1;
+
 	net->ipv4.iptable_filter =
-		ipt_register_table(net, &packet_filter, &initial_table.repl);
+		ipt_register_table(net, &packet_filter, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.iptable_filter))
 		return PTR_ERR(net->ipv4.iptable_filter);
 	return 0;
@@ -113,9 +92,6 @@ static int __init iptable_filter_init(void)
 		return -EINVAL;
 	}
 
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
-
 	ret = register_pernet_subsys(&iptable_filter_net_ops);
 	if (ret < 0)
 		return ret;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 2355a229f8ee..58d7097baa3d 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -27,43 +27,6 @@ MODULE_DESCRIPTION("iptables mangle table");
 			    (1 << NF_INET_LOCAL_OUT) | \
 			    (1 << NF_INET_POST_ROUTING))
 
-/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
-static const struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[5];
-	struct ipt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "mangle",
-		.valid_hooks = MANGLE_VALID_HOOKS,
-		.num_entries = 6,
-		.size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] 	= 0,
-			[NF_INET_LOCAL_IN] 	= sizeof(struct ipt_standard),
-			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard) * 2,
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
-			[NF_INET_POST_ROUTING] 	= sizeof(struct ipt_standard) * 4,
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] 	= 0,
-			[NF_INET_LOCAL_IN] 	= sizeof(struct ipt_standard),
-			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard) * 2,
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 3,
-			[NF_INET_POST_ROUTING]	= sizeof(struct ipt_standard) * 4,
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* POST_ROUTING */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
@@ -134,9 +97,14 @@ static struct nf_hook_ops *mangle_ops __read_mostly;
 
 static int __net_init iptable_mangle_net_init(struct net *net)
 {
-	/* Register table */
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&packet_mangler);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv4.iptable_mangle =
-		ipt_register_table(net, &packet_mangler, &initial_table.repl);
+		ipt_register_table(net, &packet_mangler, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.iptable_mangle))
 		return PTR_ERR(net->ipv4.iptable_mangle);
 	return 0;
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 62a99154f14c..06fb9d11953c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -9,33 +9,6 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-static const struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[2];
-	struct ipt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "raw",
-		.valid_hooks = RAW_VALID_HOOKS,
-		.num_entries = 3,
-		.size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_LOCAL_OUT]  = sizeof(struct ipt_standard)
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks =  RAW_VALID_HOOKS,
@@ -66,9 +39,14 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int __net_init iptable_raw_net_init(struct net *net)
 {
-	/* Register table */
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&packet_raw);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv4.iptable_raw =
-		ipt_register_table(net, &packet_raw, &initial_table.repl);
+		ipt_register_table(net, &packet_raw, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.iptable_raw))
 		return PTR_ERR(net->ipv4.iptable_raw);
 	return 0;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index b1bf3ca2c6c7..cce2f64e6f21 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -27,36 +27,6 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
-static const struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[3];
-	struct ipt_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "security",
-		.valid_hooks = SECURITY_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_LOCAL_IN] 	= 0,
-			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 2,
-		},
-		.underflow = {
-			[NF_INET_LOCAL_IN] 	= 0,
-			[NF_INET_FORWARD] 	= sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ipt_standard) * 2,
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
@@ -87,9 +57,14 @@ static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int __net_init iptable_security_net_init(struct net *net)
 {
-	net->ipv4.iptable_security =
-		ipt_register_table(net, &security_table, &initial_table.repl);
+	struct ipt_replace *repl;
 
+	repl = ipt_alloc_initial_table(&security_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv4.iptable_security =
+		ipt_register_table(net, &security_table, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.iptable_security))
 		return PTR_ERR(net->ipv4.iptable_security);
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 85da34fdc755..ab74cc0535e2 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,36 +28,6 @@
 			 (1 << NF_INET_POST_ROUTING) | \
 			 (1 << NF_INET_LOCAL_OUT))
 
-static const struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[3];
-	struct ipt_error term;
-} nat_initial_table __net_initdata = {
-	.repl = {
-		.name = "nat",
-		.valid_hooks = NAT_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
-		},
-	},
-	.entries = {
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* POST_ROUTING */
-		IPT_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IPT_ERROR_INIT,			/* ERROR */
-};
-
 static const struct xt_table nat_table = {
 	.name		= "nat",
 	.valid_hooks	= NAT_VALID_HOOKS,
@@ -186,8 +156,13 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
 
 static int __net_init nf_nat_rule_net_init(struct net *net)
 {
-	net->ipv4.nat_table = ipt_register_table(net, &nat_table,
-						 &nat_initial_table.repl);
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&nat_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv4.nat_table))
 		return PTR_ERR(net->ipv4.nat_table);
 	return 0;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4332f4591482..dcd7825fe7b6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -29,6 +29,7 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter/x_tables.h>
 #include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -67,6 +68,12 @@ do {								\
 #define inline
 #endif
 
+void *ip6t_alloc_initial_table(const struct xt_table *info)
+{
+	return xt_alloc_initial_table(ip6t, IP6T);
+}
+EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
+
 /*
    We keep a set of rules for each CPU, so we can avoid write-locking
    them in the softirq when updating the counters and therefore
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 6e95d0614ca9..36b72cafc227 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -21,36 +21,6 @@ MODULE_DESCRIPTION("ip6tables filter table");
 			    (1 << NF_INET_FORWARD) | \
 			    (1 << NF_INET_LOCAL_OUT))
 
-static struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[3];
-	struct ip6t_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "filter",
-		.valid_hooks = FILTER_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_INET_LOCAL_IN] = 0,
-			[NF_INET_FORWARD] = sizeof(struct ip6t_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
-		},
-		.underflow = {
-			[NF_INET_LOCAL_IN] = 0,
-			[NF_INET_FORWARD] = sizeof(struct ip6t_standard),
-			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2
-		},
-	},
-	.entries = {
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IP6T_ERROR_INIT,		/* ERROR */
-};
-
 static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
@@ -78,9 +48,18 @@ module_param(forward, bool, 0000);
 
 static int __net_init ip6table_filter_net_init(struct net *net)
 {
-	/* Register table */
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&packet_filter);
+	if (repl == NULL)
+		return -ENOMEM;
+	/* Entry 1 is the FORWARD hook */
+	((struct ip6t_standard *)repl->entries)[1].target.verdict =
+		-forward - 1;
+
 	net->ipv6.ip6table_filter =
-		ip6t_register_table(net, &packet_filter, &initial_table.repl);
+		ip6t_register_table(net, &packet_filter, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv6.ip6table_filter))
 		return PTR_ERR(net->ipv6.ip6table_filter);
 	return 0;
@@ -105,9 +84,6 @@ static int __init ip6table_filter_init(void)
 		return -EINVAL;
 	}
 
-	/* Entry 1 is the FORWARD hook */
-	initial_table.entries[1].target.verdict = -forward - 1;
-
 	ret = register_pernet_subsys(&ip6table_filter_net_ops);
 	if (ret < 0)
 		return ret;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 5023ac52ffec..dc803b7e8e54 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -21,42 +21,6 @@ MODULE_DESCRIPTION("ip6tables mangle table");
 			    (1 << NF_INET_LOCAL_OUT) | \
 			    (1 << NF_INET_POST_ROUTING))
 
-static const struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[5];
-	struct ip6t_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "mangle",
-		.valid_hooks = MANGLE_VALID_HOOKS,
-		.num_entries = 6,
-		.size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] 	= 0,
-			[NF_INET_LOCAL_IN]	= sizeof(struct ip6t_standard),
-			[NF_INET_FORWARD]	= sizeof(struct ip6t_standard) * 2,
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
-			[NF_INET_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4,
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] 	= 0,
-			[NF_INET_LOCAL_IN]	= sizeof(struct ip6t_standard),
-			[NF_INET_FORWARD]	= sizeof(struct ip6t_standard) * 2,
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 3,
-			[NF_INET_POST_ROUTING]	= sizeof(struct ip6t_standard) * 4,
-		},
-	},
-	.entries = {
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* POST_ROUTING */
-	},
-	.term = IP6T_ERROR_INIT,		/* ERROR */
-};
-
 static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
@@ -126,9 +90,14 @@ ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
 static struct nf_hook_ops *mangle_ops __read_mostly;
 static int __net_init ip6table_mangle_net_init(struct net *net)
 {
-	/* Register table */
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&packet_mangler);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv6.ip6table_mangle =
-		ip6t_register_table(net, &packet_mangler, &initial_table.repl);
+		ip6t_register_table(net, &packet_mangler, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv6.ip6table_mangle))
 		return PTR_ERR(net->ipv6.ip6table_mangle);
 	return 0;
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 3bfa69511641..aef31a29de9e 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -8,33 +8,6 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-static const struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[2];
-	struct ip6t_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "raw",
-		.valid_hooks = RAW_VALID_HOOKS,
-		.num_entries = 3,
-		.size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
-		},
-		.underflow = {
-			[NF_INET_PRE_ROUTING] = 0,
-			[NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard)
-		},
-	},
-	.entries = {
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* PRE_ROUTING */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IP6T_ERROR_INIT,		/* ERROR */
-};
-
 static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks = RAW_VALID_HOOKS,
@@ -58,9 +31,14 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int __net_init ip6table_raw_net_init(struct net *net)
 {
-	/* Register table */
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&packet_raw);
+	if (repl == NULL)
+		return -ENOMEM;
 	net->ipv6.ip6table_raw =
-		ip6t_register_table(net, &packet_raw, &initial_table.repl);
+		ip6t_register_table(net, &packet_raw, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv6.ip6table_raw))
 		return PTR_ERR(net->ipv6.ip6table_raw);
 	return 0;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index dd2200f17a6c..0824d865aa9b 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -26,36 +26,6 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
-static const struct
-{
-	struct ip6t_replace repl;
-	struct ip6t_standard entries[3];
-	struct ip6t_error term;
-} initial_table __net_initdata = {
-	.repl = {
-		.name = "security",
-		.valid_hooks = SECURITY_VALID_HOOKS,
-		.num_entries = 4,
-		.size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
-		.hook_entry = {
-			[NF_INET_LOCAL_IN] 	= 0,
-			[NF_INET_FORWARD] 	= sizeof(struct ip6t_standard),
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 2,
-		},
-		.underflow = {
-			[NF_INET_LOCAL_IN] 	= 0,
-			[NF_INET_FORWARD] 	= sizeof(struct ip6t_standard),
-			[NF_INET_LOCAL_OUT] 	= sizeof(struct ip6t_standard) * 2,
-		},
-	},
-	.entries = {
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_IN */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* FORWARD */
-		IP6T_STANDARD_INIT(NF_ACCEPT),	/* LOCAL_OUT */
-	},
-	.term = IP6T_ERROR_INIT,		/* ERROR */
-};
-
 static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
@@ -79,9 +49,14 @@ static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int __net_init ip6table_security_net_init(struct net *net)
 {
-	net->ipv6.ip6table_security =
-		ip6t_register_table(net, &security_table, &initial_table.repl);
+	struct ip6t_replace *repl;
 
+	repl = ip6t_alloc_initial_table(&security_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv6.ip6table_security =
+		ip6t_register_table(net, &security_table, repl);
+	kfree(repl);
 	if (IS_ERR(net->ipv6.ip6table_security))
 		return PTR_ERR(net->ipv6.ip6table_security);
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b51cb0d7234a..dc2e05cb54c0 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -26,7 +26,9 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp.h>
-
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
new file mode 100644
index 000000000000..6efe4e5a81c6
--- /dev/null
+++ b/net/netfilter/xt_repldata.h
@@ -0,0 +1,35 @@
+/*
+ * Today's hack: quantum tunneling in structs
+ *
+ * 'entries' and 'term' are never anywhere referenced by word in code. In fact,
+ * they serve as the hanging-off data accessed through repl.data[].
+ */
+
+#define xt_alloc_initial_table(type, typ2) ({ \
+	unsigned int hook_mask = info->valid_hooks; \
+	unsigned int nhooks = hweight32(hook_mask); \
+	unsigned int bytes = 0, hooknum = 0, i = 0; \
+	struct { \
+		struct type##_replace repl; \
+		struct type##_standard entries[nhooks]; \
+		struct type##_error term; \
+	} *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \
+	if (tbl == NULL) \
+		return NULL; \
+	strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+	tbl->term = (struct type##_error)typ2##_ERROR_INIT;  \
+	tbl->repl.valid_hooks = hook_mask; \
+	tbl->repl.num_entries = nhooks + 1; \
+	tbl->repl.size = nhooks * sizeof(struct type##_standard) + \
+	                 sizeof(struct type##_error); \
+	for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \
+		if (!(hook_mask & 1)) \
+			continue; \
+		tbl->repl.hook_entry[hooknum] = bytes; \
+		tbl->repl.underflow[hooknum]  = bytes; \
+		tbl->entries[i++] = (struct type##_standard) \
+			typ2##_STANDARD_INIT(NF_ACCEPT); \
+		bytes += sizeof(struct type##_standard); \
+	} \
+	tbl; \
+})
-- 
cgit v1.2.3


From 66655de6d132b726be64c324bc3f9ea366d20697 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 8 Feb 2010 23:18:22 +0000
Subject: seq_file: Add helpers for iteration over a hlist

Some places in kernel need to iterate over a hlist in seq_file,
so provide some common helpers.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/seq_file.c            | 59 +++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/seq_file.h | 11 +++++++++
 2 files changed, 67 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index eae7d9dbf3ff..f65b16f02da3 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -674,7 +674,6 @@ struct list_head *seq_list_start(struct list_head *head, loff_t pos)
 
 	return NULL;
 }
-
 EXPORT_SYMBOL(seq_list_start);
 
 struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
@@ -684,7 +683,6 @@ struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
 
 	return seq_list_start(head, pos - 1);
 }
-
 EXPORT_SYMBOL(seq_list_start_head);
 
 struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
@@ -695,5 +693,60 @@ struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
 	++*ppos;
 	return lh == head ? NULL : lh;
 }
-
 EXPORT_SYMBOL(seq_list_next);
+
+/**
+ * seq_hlist_start - start an iteration of a hlist
+ * @head: the head of the hlist
+ * @pos:  the start position of the sequence
+ *
+ * Called at seq_file->op->start().
+ */
+struct hlist_node *seq_hlist_start(struct hlist_head *head, loff_t pos)
+{
+	struct hlist_node *node;
+
+	hlist_for_each(node, head)
+		if (pos-- == 0)
+			return node;
+	return NULL;
+}
+EXPORT_SYMBOL(seq_hlist_start);
+
+/**
+ * seq_hlist_start_head - start an iteration of a hlist
+ * @head: the head of the hlist
+ * @pos:  the start position of the sequence
+ *
+ * Called at seq_file->op->start(). Call this function if you want to
+ * print a header at the top of the output.
+ */
+struct hlist_node *seq_hlist_start_head(struct hlist_head *head, loff_t pos)
+{
+	if (!pos)
+		return SEQ_START_TOKEN;
+
+	return seq_hlist_start(head, pos - 1);
+}
+EXPORT_SYMBOL(seq_hlist_start_head);
+
+/**
+ * seq_hlist_next - move to the next position of the hlist
+ * @v:    the current iterator
+ * @head: the head of the hlist
+ * @pos:  the current posision
+ *
+ * Called at seq_file->op->next().
+ */
+struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head,
+				  loff_t *ppos)
+{
+	struct hlist_node *node = v;
+
+	++*ppos;
+	if (v == SEQ_START_TOKEN)
+		return head->first;
+	else
+		return node->next;
+}
+EXPORT_SYMBOL(seq_hlist_next);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 8366d8f12e53..c95bcdc18f4c 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -135,4 +135,15 @@ extern struct list_head *seq_list_start_head(struct list_head *head,
 extern struct list_head *seq_list_next(void *v, struct list_head *head,
 		loff_t *ppos);
 
+/*
+ * Helpers for iteration over hlist_head-s in seq_files
+ */
+
+extern struct hlist_node *seq_hlist_start(struct hlist_head *head,
+		loff_t pos);
+extern struct hlist_node *seq_hlist_start_head(struct hlist_head *head,
+		loff_t pos);
+extern struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head,
+		loff_t *ppos);
+
 #endif
-- 
cgit v1.2.3


From ced5b697a76d325e7a7ac7d382dbbb632c765093 Mon Sep 17 00:00:00 2001
From: Brandon Phiilps <bphilips@suse.de>
Date: Wed, 10 Feb 2010 01:20:06 -0800
Subject: x86: Avoid race condition in pci_enable_msix()

Keep chip_data in create_irq_nr and destroy_irq.

When two drivers are setting up MSI-X at the same time via
pci_enable_msix() there is a race.  See this dmesg excerpt:

[   85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X
[   85.170611]   alloc irq_desc for 99 on node -1
[   85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X
[   85.170614]   alloc kstat_irqs on node -1
[   85.170616] alloc irq_2_iommu on node -1
[   85.170617]   alloc irq_desc for 100 on node -1
[   85.170619]   alloc kstat_irqs on node -1
[   85.170621] alloc irq_2_iommu on node -1
[   85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X
[   85.170626]   alloc irq_desc for 101 on node -1
[   85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X
[   85.170630]   alloc kstat_irqs on node -1
[   85.170631] alloc irq_2_iommu on node -1
[   85.170635]   alloc irq_desc for 102 on node -1
[   85.170636]   alloc kstat_irqs on node -1
[   85.170639] alloc irq_2_iommu on node -1
[   85.170646] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000088

As you can see igb and ixgbe are both alternating on create_irq_nr()
via pci_enable_msix() in their probe function.

ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe
choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and
calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data =
NULL via dynamic_irq_init().

igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[]
via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this:

	cfg_new = irq_desc_ptrs[102]->chip_data;
	if (cfg_new->vector != 0)
		continue;

This hits the NULL deref.

Another possible race exists via pci_disable_msix() in a driver or in
the number of error paths that call free_msi_irqs():

destroy_irq()
dynamic_irq_cleanup() which sets desc->chip_data = NULL
...race window...
desc->chip_data = cfg;

Remove the save and restore code for cfg in create_irq_nr() and
destroy_irq() and take the desc->lock when checking the irq_cfg.

Reported-and-analyzed-by: Brandon Philips <bphilips@suse.de>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org>
Signed-off-by: Brandon Phililps <bphilips@suse.de>
Cc: stable@kernel.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/apic/io_apic.c | 18 ++++-----------
 include/linux/irq.h            |  2 ++
 kernel/irq/chip.c              | 52 ++++++++++++++++++++++++++++++++++--------
 3 files changed, 50 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53243ca7816d..c86591b906fa 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (irq > 0) {
-		dynamic_irq_init(irq);
-		/* restore it, in case dynamic_irq_init clear it */
-		if (desc_new)
-			desc_new->chip_data = cfg_new;
-	}
+	if (irq > 0)
+		dynamic_irq_init_keep_chip_data(irq);
+
 	return irq;
 }
 
@@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 
-	/* store it, in case dynamic_irq_cleanup clear it */
-	desc = irq_to_desc(irq);
-	cfg = desc->chip_data;
-	dynamic_irq_cleanup(irq);
-	/* connect back irq_cfg */
-	desc->chip_data = cfg;
+	dynamic_irq_cleanup_keep_chip_data(irq);
 
 	free_irte(irq);
 	spin_lock_irqsave(&vector_lock, flags);
+	cfg = irq_to_desc(irq)->chip_data;
 	__clear_irq_vector(irq, cfg);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 451481c082b5..4d9b26e044bc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -400,7 +400,9 @@ static inline int irq_has_action(unsigned int irq)
 
 /* Dynamic irq helper functions */
 extern void dynamic_irq_init(unsigned int irq);
+void dynamic_irq_init_keep_chip_data(unsigned int irq);
 extern void dynamic_irq_cleanup(unsigned int irq);
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
 
 /* Set/get chip/data for an IRQ: */
 extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ecc3fa28f666..d70394f12ee9 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -18,11 +18,7 @@
 
 #include "internals.h"
 
-/**
- *	dynamic_irq_init - initialize a dynamically allocated irq
- *	@irq:	irq number to initialize
- */
-void dynamic_irq_init(unsigned int irq)
+static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
 {
 	struct irq_desc *desc;
 	unsigned long flags;
@@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq)
 	desc->depth = 1;
 	desc->msi_desc = NULL;
 	desc->handler_data = NULL;
-	desc->chip_data = NULL;
+	if (!keep_chip_data)
+		desc->chip_data = NULL;
 	desc->action = NULL;
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
@@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq)
 }
 
 /**
- *	dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ *	dynamic_irq_init - initialize a dynamically allocated irq
  *	@irq:	irq number to initialize
  */
-void dynamic_irq_cleanup(unsigned int irq)
+void dynamic_irq_init(unsigned int irq)
+{
+	dynamic_irq_init_x(irq, false);
+}
+
+/**
+ *	dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ *
+ *	does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_init_keep_chip_data(unsigned int irq)
+{
+	dynamic_irq_init_x(irq, true);
+}
+
+static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
@@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq)
 	}
 	desc->msi_desc = NULL;
 	desc->handler_data = NULL;
-	desc->chip_data = NULL;
+	if (!keep_chip_data)
+		desc->chip_data = NULL;
 	desc->handle_irq = handle_bad_irq;
 	desc->chip = &no_irq_chip;
 	desc->name = NULL;
@@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq)
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
+/**
+ *	dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ */
+void dynamic_irq_cleanup(unsigned int irq)
+{
+	dynamic_irq_cleanup_x(irq, false);
+}
+
+/**
+ *	dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ *
+ *	does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
+{
+	dynamic_irq_cleanup_x(irq, true);
+}
+
 
 /**
  *	set_irq_chip - set the irq chip for an irq
-- 
cgit v1.2.3


From 27811d8cabe56e0c3622251b049086f49face4ff Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:07 -0800
Subject: x86: Move range related operation to one file

We have almost the same code for mtrr cleanup and amd_bus checkup, and
this code  will also be used in replacing bootmem with early_res,
so try to move them together and reuse it from different parts.

Also rename update_range to subtract_range as that is what the
function is actually doing.

-v2: update comments as Christoph requested

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-4-git-send-email-yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mtrr/cleanup.c | 180 ++++---------------------------------
 arch/x86/kernel/mmconf-fam10h_64.c |   7 +-
 arch/x86/pci/amd_bus.c             |  70 +++------------
 include/linux/range.h              |  22 +++++
 kernel/Makefile                    |   2 +-
 kernel/range.c                     | 163 +++++++++++++++++++++++++++++++++
 6 files changed, 214 insertions(+), 230 deletions(-)
 create mode 100644 include/linux/range.h
 create mode 100644 kernel/range.c

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 09b1698e0466..669da09ab9a8 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -22,10 +22,10 @@
 #include <linux/pci.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
-#include <linux/sort.h>
 #include <linux/mutex.h>
 #include <linux/uaccess.h>
 #include <linux/kvm_para.h>
+#include <linux/range.h>
 
 #include <asm/processor.h>
 #include <asm/e820.h>
@@ -34,11 +34,6 @@
 
 #include "mtrr.h"
 
-struct res_range {
-	unsigned long	start;
-	unsigned long	end;
-};
-
 struct var_mtrr_range_state {
 	unsigned long	base_pfn;
 	unsigned long	size_pfn;
@@ -56,7 +51,7 @@ struct var_mtrr_state {
 /* Should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM				256
 
-static struct res_range __initdata		range[RANGE_NUM];
+static struct range __initdata		range[RANGE_NUM];
 static int __initdata				nr_range;
 
 static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
 static int __initdata debug_print;
 #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
 
-
-static int __init
-add_range(struct res_range *range, int nr_range,
-	  unsigned long start, unsigned long end)
-{
-	/* Out of slots: */
-	if (nr_range >= RANGE_NUM)
-		return nr_range;
-
-	range[nr_range].start = start;
-	range[nr_range].end = end;
-
-	nr_range++;
-
-	return nr_range;
-}
-
-static int __init
-add_range_with_merge(struct res_range *range, int nr_range,
-		     unsigned long start, unsigned long end)
-{
-	int i;
-
-	/* Try to merge it with old one: */
-	for (i = 0; i < nr_range; i++) {
-		unsigned long final_start, final_end;
-		unsigned long common_start, common_end;
-
-		if (!range[i].end)
-			continue;
-
-		common_start = max(range[i].start, start);
-		common_end = min(range[i].end, end);
-		if (common_start > common_end + 1)
-			continue;
-
-		final_start = min(range[i].start, start);
-		final_end = max(range[i].end, end);
-
-		range[i].start = final_start;
-		range[i].end =  final_end;
-		return nr_range;
-	}
-
-	/* Need to add it: */
-	return add_range(range, nr_range, start, end);
-}
-
-static void __init
-subtract_range(struct res_range *range, unsigned long start, unsigned long end)
-{
-	int i, j;
-
-	for (j = 0; j < RANGE_NUM; j++) {
-		if (!range[j].end)
-			continue;
-
-		if (start <= range[j].start && end >= range[j].end) {
-			range[j].start = 0;
-			range[j].end = 0;
-			continue;
-		}
-
-		if (start <= range[j].start && end < range[j].end &&
-		    range[j].start < end + 1) {
-			range[j].start = end + 1;
-			continue;
-		}
-
-
-		if (start > range[j].start && end >= range[j].end &&
-		    range[j].end > start - 1) {
-			range[j].end = start - 1;
-			continue;
-		}
-
-		if (start > range[j].start && end < range[j].end) {
-			/* Find the new spare: */
-			for (i = 0; i < RANGE_NUM; i++) {
-				if (range[i].end == 0)
-					break;
-			}
-			if (i < RANGE_NUM) {
-				range[i].end = range[j].end;
-				range[i].start = end + 1;
-			} else {
-				printk(KERN_ERR "run of slot in ranges\n");
-			}
-			range[j].end = start - 1;
-			continue;
-		}
-	}
-}
-
-static int __init cmp_range(const void *x1, const void *x2)
-{
-	const struct res_range *r1 = x1;
-	const struct res_range *r2 = x2;
-	long start1, start2;
-
-	start1 = r1->start;
-	start2 = r2->start;
-
-	return start1 - start2;
-}
-
-static int __init clean_sort_range(struct res_range *range, int az)
-{
-	int i, j, k = az - 1, nr_range = 0;
-
-	for (i = 0; i < k; i++) {
-		if (range[i].end)
-			continue;
-		for (j = k; j > i; j--) {
-			if (range[j].end) {
-				k = j;
-				break;
-			}
-		}
-		if (j == i)
-			break;
-		range[i].start = range[k].start;
-		range[i].end   = range[k].end;
-		range[k].start = 0;
-		range[k].end   = 0;
-		k--;
-	}
-	/* count it */
-	for (i = 0; i < az; i++) {
-		if (!range[i].end) {
-			nr_range = i;
-			break;
-		}
-	}
-
-	/* sort them */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-
-	return nr_range;
-}
-
 #define BIOS_BUG_MSG KERN_WARNING \
 	"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
 static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+x86_get_mtrr_mem_range(struct range *range, int nr_range,
 		       unsigned long extra_remove_base,
 		       unsigned long extra_remove_size)
 {
@@ -223,13 +77,13 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 			continue;
 		base = range_state[i].base_pfn;
 		size = range_state[i].size_pfn;
-		nr_range = add_range_with_merge(range, nr_range, base,
-						base + size - 1);
+		nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
+						base, base + size - 1);
 	}
 	if (debug_print) {
 		printk(KERN_DEBUG "After WB checking\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end + 1);
 	}
 
@@ -252,10 +106,10 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 			size -= (1<<(20-PAGE_SHIFT)) - base;
 			base = 1<<(20-PAGE_SHIFT);
 		}
-		subtract_range(range, base, base + size - 1);
+		subtract_range(range, RANGE_NUM, base, base + size - 1);
 	}
 	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,
+		subtract_range(range, RANGE_NUM, extra_remove_base,
 				 extra_remove_base + extra_remove_size  - 1);
 
 	if  (debug_print) {
@@ -263,7 +117,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 		for (i = 0; i < RANGE_NUM; i++) {
 			if (!range[i].end)
 				continue;
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end + 1);
 		}
 	}
@@ -273,20 +127,16 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 	if  (debug_print) {
 		printk(KERN_DEBUG "After sorting\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end + 1);
 	}
 
-	/* clear those is not used */
-	for (i = nr_range; i < RANGE_NUM; i++)
-		memset(&range[i], 0, sizeof(range[i]));
-
 	return nr_range;
 }
 
 #ifdef CONFIG_MTRR_SANITIZER
 
-static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+static unsigned long __init sum_ranges(struct range *range, int nr_range)
 {
 	unsigned long sum = 0;
 	int i;
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
 early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
 
 static int __init
-x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+x86_setup_var_mtrrs(struct range *range, int nr_range,
 		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
 		      unsigned long x_remove_base,
 		      unsigned long x_remove_size, int i)
 {
-	static struct res_range range_new[RANGE_NUM];
+	static struct range range_new[RANGE_NUM];
 	unsigned long range_sums_new;
 	static int nr_range_new;
 	int num_reg;
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
 	 * [0, 1M) should always be covered by var mtrr with WB
 	 * and fixed mtrrs should take effect before var mtrr for it:
 	 */
-	nr_range = add_range_with_merge(range, nr_range, 0,
+	nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
 					(1ULL<<(20 - PAGE_SHIFT)) - 1);
 	/* Sort the ranges: */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	sort_range(range, nr_range);
 
 	range_sums = sum_ranges(range, nr_range);
 	printk(KERN_INFO "total RAM covered: %ldM\n",
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 712d15fdc416..71825806cd44 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -7,6 +7,8 @@
 #include <linux/string.h>
 #include <linux/pci.h>
 #include <linux/dmi.h>
+#include <linux/range.h>
+
 #include <asm/pci-direct.h>
 #include <linux/sort.h>
 #include <asm/io.h>
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
 	{ 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
 };
 
-struct range {
-	u64 start;
-	u64 end;
-};
-
 static int __cpuinit cmp_range(const void *x1, const void *x2)
 {
 	const struct range *r1 = x1;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 95ecbd495955..2356ea18697d 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,6 +2,8 @@
 #include <linux/pci.h>
 #include <linux/topology.h>
 #include <linux/cpu.h>
+#include <linux/range.h>
+
 #include <asm/pci_x86.h>
 
 #ifdef CONFIG_X86_64
@@ -17,58 +19,6 @@
 
 #ifdef CONFIG_X86_64
 
-#define RANGE_NUM 16
-
-struct res_range {
-	size_t start;
-	size_t end;
-};
-
-static void __init update_range(struct res_range *range, size_t start,
-				size_t end)
-{
-	int i;
-	int j;
-
-	for (j = 0; j < RANGE_NUM; j++) {
-		if (!range[j].end)
-			continue;
-
-		if (start <= range[j].start && end >= range[j].end) {
-			range[j].start = 0;
-			range[j].end = 0;
-			continue;
-		}
-
-		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
-			range[j].start = end + 1;
-			continue;
-		}
-
-
-		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
-			range[j].end = start - 1;
-			continue;
-		}
-
-		if (start > range[j].start && end < range[j].end) {
-			/* find the new spare */
-			for (i = 0; i < RANGE_NUM; i++) {
-				if (range[i].end == 0)
-					break;
-			}
-			if (i < RANGE_NUM) {
-				range[i].end = range[j].end;
-				range[i].start = end + 1;
-			} else {
-				printk(KERN_ERR "run of slot in ranges\n");
-			}
-			range[j].end = start - 1;
-			continue;
-		}
-	}
-}
-
 struct pci_hostbridge_probe {
 	u32 bus;
 	u32 slot;
@@ -111,6 +61,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void)
 	fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
 }
 
+#define RANGE_NUM 16
+
 /**
  * early_fill_mp_bus_to_node()
  * called before pcibios_scan_root and pci_scan_bus
@@ -132,7 +84,7 @@ static int __init early_fill_mp_bus_info(void)
 	struct resource *res;
 	size_t start;
 	size_t end;
-	struct res_range range[RANGE_NUM];
+	struct range range[RANGE_NUM];
 	u64 val;
 	u32 address;
 
@@ -226,7 +178,7 @@ static int __init early_fill_mp_bus_info(void)
 		if (end > 0xffff)
 			end = 0xffff;
 		update_res(info, start, end, IORESOURCE_IO, 1);
-		update_range(range, start, end);
+		subtract_range(range, RANGE_NUM, start, end);
 	}
 	/* add left over io port range to def node/link, [0, 0xffff] */
 	/* find the position */
@@ -256,14 +208,14 @@ static int __init early_fill_mp_bus_info(void)
 	end = (val & 0xffffff800000ULL);
 	printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
 	if (end < (1ULL<<32))
-		update_range(range, 0, end - 1);
+		subtract_range(range, RANGE_NUM, 0, end - 1);
 
 	/* get mmconfig */
 	get_pci_mmcfg_amd_fam10h_range();
 	/* need to take out mmconf range */
 	if (fam10h_mmconf_end) {
 		printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
-		update_range(range, fam10h_mmconf_start, fam10h_mmconf_end);
+		subtract_range(range, RANGE_NUM, fam10h_mmconf_start, fam10h_mmconf_end);
 	}
 
 	/* mmio resource */
@@ -318,7 +270,7 @@ static int __init early_fill_mp_bus_info(void)
 				/* we got a hole */
 				endx = fam10h_mmconf_start - 1;
 				update_res(info, start, endx, IORESOURCE_MEM, 0);
-				update_range(range, start, endx);
+				subtract_range(range, RANGE_NUM, start, endx);
 				printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx);
 				start = fam10h_mmconf_end + 1;
 				changed = 1;
@@ -334,7 +286,7 @@ static int __init early_fill_mp_bus_info(void)
 		}
 
 		update_res(info, start, end, IORESOURCE_MEM, 1);
-		update_range(range, start, end);
+		subtract_range(range, RANGE_NUM, start, end);
 		printk(KERN_CONT "\n");
 	}
 
@@ -349,7 +301,7 @@ static int __init early_fill_mp_bus_info(void)
 		rdmsrl(address, val);
 		end = (val & 0xffffff800000ULL);
 		printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
-		update_range(range, 1ULL<<32, end - 1);
+		subtract_range(range, RANGE_NUM, 1ULL<<32, end - 1);
 	}
 
 	/*
diff --git a/include/linux/range.h b/include/linux/range.h
new file mode 100644
index 000000000000..0789f1412e1f
--- /dev/null
+++ b/include/linux/range.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_RANGE_H
+#define _LINUX_RANGE_H
+
+struct range {
+	u64   start;
+	u64   end;
+};
+
+int add_range(struct range *range, int az, int nr_range,
+		u64 start, u64 end);
+
+
+int add_range_with_merge(struct range *range, int az, int nr_range,
+				u64 start, u64 end);
+
+void subtract_range(struct range *range, int az, u64 start, u64 end);
+
+int clean_sort_range(struct range *range, int az);
+
+void sort_range(struct range *range, int nr_range);
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 864ff75d65f2..ad47330ccf32 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-	    async.o
+	    async.o range.o
 obj-y += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/range.c b/kernel/range.c
new file mode 100644
index 000000000000..71e0021281fe
--- /dev/null
+++ b/kernel/range.c
@@ -0,0 +1,163 @@
+/*
+ * Range add and subtract
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sort.h>
+
+#include <linux/range.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+int add_range(struct range *range, int az, int nr_range, u64 start, u64 end)
+{
+	if (start > end)
+		return nr_range;
+
+	/* Out of slots: */
+	if (nr_range >= az)
+		return nr_range;
+
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+}
+
+int add_range_with_merge(struct range *range, int az, int nr_range,
+		     u64 start, u64 end)
+{
+	int i;
+
+	if (start > end)
+		return nr_range;
+
+	/* Try to merge it with old one: */
+	for (i = 0; i < nr_range; i++) {
+		u64 final_start, final_end;
+		u64 common_start, common_end;
+
+		if (!range[i].end)
+			continue;
+
+		common_start = max(range[i].start, start);
+		common_end = min(range[i].end, end);
+		if (common_start > common_end + 1)
+			continue;
+
+		final_start = min(range[i].start, start);
+		final_end = max(range[i].end, end);
+
+		range[i].start = final_start;
+		range[i].end =  final_end;
+		return nr_range;
+	}
+
+	/* Need to add it: */
+	return add_range(range, az, nr_range, start, end);
+}
+
+void subtract_range(struct range *range, int az, u64 start, u64 end)
+{
+	int i, j;
+
+	if (start > end)
+		return;
+
+	for (j = 0; j < az; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end &&
+		    range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end &&
+		    range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* Find the new spare: */
+			for (i = 0; i < az; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < az) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int cmp_range(const void *x1, const void *x2)
+{
+	const struct range *r1 = x1;
+	const struct range *r2 = x2;
+	s64 start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+int clean_sort_range(struct range *range, int az)
+{
+	int i, j, k = az - 1, nr_range = 0;
+
+	for (i = 0; i < k; i++) {
+		if (range[i].end)
+			continue;
+		for (j = k; j > i; j--) {
+			if (range[j].end) {
+				k = j;
+				break;
+			}
+		}
+		if (j == i)
+			break;
+		range[i].start = range[k].start;
+		range[i].end   = range[k].end;
+		range[k].start = 0;
+		range[k].end   = 0;
+		k--;
+	}
+	/* count it */
+	for (i = 0; i < az; i++) {
+		if (!range[i].end) {
+			nr_range = i;
+			break;
+		}
+	}
+
+	/* sort them */
+	sort(range, nr_range, sizeof(struct range), cmp_range, NULL);
+
+	return nr_range;
+}
+
+void sort_range(struct range *range, int nr_range)
+{
+	/* sort them */
+	sort(range, nr_range, sizeof(struct range), cmp_range, NULL);
+}
-- 
cgit v1.2.3


From 9ad3f2c7c69659c343843393944d739fec1f2e73 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:11 -0800
Subject: x86/pci: Add cap_resource()

Prepare for 32bit pci root bus

-v2: hpa said we should compare with (resource_size_t)~0
-v3: according to Linus to use MAX_RESOURCE instead.
     also need need to put related patches together
-v4: according to Andrew, use min in cap_resource()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-8-git-send-email-yinghai@kernel.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/pci/amd_bus.c  | 8 +++++---
 arch/x86/pci/bus_numa.c | 4 ++++
 include/linux/range.h   | 8 ++++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index f06bb1b4a80a..f7e13b63154e 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -201,7 +201,7 @@ static int __init early_fill_mp_bus_info(void)
 
 	memset(range, 0, sizeof(range));
 	/* 0xfd00000000-0xffffffffff for HT */
-	range[0].end = (0xfdULL<<32) - 1;
+	range[0].end = cap_resource((0xfdULL<<32) - 1);
 
 	/* need to take out [0, TOM) for RAM*/
 	address = MSR_K8_TOP_MEM1;
@@ -286,7 +286,8 @@ static int __init early_fill_mp_bus_info(void)
 			}
 		}
 
-		update_res(info, start, end, IORESOURCE_MEM, 1);
+		update_res(info, cap_resource(start), cap_resource(end),
+				 IORESOURCE_MEM, 1);
 		subtract_range(range, RANGE_NUM, start, end);
 		printk(KERN_CONT "\n");
 	}
@@ -321,7 +322,8 @@ static int __init early_fill_mp_bus_info(void)
 			if (!range[i].end)
 				continue;
 
-			update_res(info, range[i].start, range[i].end,
+			update_res(info, cap_resource(range[i].start),
+				   cap_resource(range[i].end),
 				   IORESOURCE_MEM, 1);
 		}
 	}
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index 3411687b676e..3510778aa8bb 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -1,5 +1,6 @@
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/range.h>
 
 #include "bus_numa.h"
 
@@ -55,6 +56,9 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,
 	if (start > end)
 		return;
 
+	if (start == MAX_RESOURCE)
+		return;
+
 	if (!merge)
 		goto addit;
 
diff --git a/include/linux/range.h b/include/linux/range.h
index 0789f1412e1f..bd184a5db791 100644
--- a/include/linux/range.h
+++ b/include/linux/range.h
@@ -19,4 +19,12 @@ int clean_sort_range(struct range *range, int az);
 
 void sort_range(struct range *range, int nr_range);
 
+#define MAX_RESOURCE ((resource_size_t)~0)
+static inline resource_size_t cap_resource(u64 val)
+{
+	if (val > MAX_RESOURCE)
+		return MAX_RESOURCE;
+
+	return val;
+}
 #endif
-- 
cgit v1.2.3


From 15682bc488d4af8c9bb998844a94281025e0a333 Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Wed, 10 Feb 2010 20:03:05 -0800
Subject: ethtool: Introduce n-tuple filter programming support

This patchset enables the ethtool layer to program n-tuple
filters to an underlying device.  The idea is to allow capable
hardware to have static rules applied that can assist steering
flows into appropriate queues.

Hardware that is known to support these types of filters today
are ixgbe and niu.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h   |  50 +++++++
 include/linux/netdevice.h |   3 +
 net/core/dev.c            |   5 +
 net/core/ethtool.c        | 329 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 386 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ef4a2d84d922..a3cac53a0766 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -14,6 +14,7 @@
 #define _LINUX_ETHTOOL_H
 
 #include <linux/types.h>
+#include <linux/rculist.h>
 
 /* This should work for both 32 and 64 bit userland. */
 struct ethtool_cmd {
@@ -242,6 +243,7 @@ enum ethtool_stringset {
 	ETH_SS_TEST		= 0,
 	ETH_SS_STATS,
 	ETH_SS_PRIV_FLAGS,
+	ETH_SS_NTUPLE_FILTERS,
 };
 
 /* for passing string sets for data tagging */
@@ -290,6 +292,7 @@ struct ethtool_perm_addr {
  */
 enum ethtool_flags {
 	ETH_FLAG_LRO		= (1 << 15),	/* LRO is enabled */
+	ETH_FLAG_NTUPLE		= (1 << 27),	/* N-tuple filters enabled */
 };
 
 /* The following structures are for supporting RX network flow
@@ -363,6 +366,35 @@ struct ethtool_rxnfc {
 	__u32				rule_locs[0];
 };
 
+struct ethtool_rx_ntuple_flow_spec {
+	__u32		 flow_type;
+	union {
+		struct ethtool_tcpip4_spec		tcp_ip4_spec;
+		struct ethtool_tcpip4_spec		udp_ip4_spec;
+		struct ethtool_tcpip4_spec		sctp_ip4_spec;
+		struct ethtool_ah_espip4_spec		ah_ip4_spec;
+		struct ethtool_ah_espip4_spec		esp_ip4_spec;
+		struct ethtool_rawip4_spec		raw_ip4_spec;
+		struct ethtool_ether_spec		ether_spec;
+		struct ethtool_usrip4_spec		usr_ip4_spec;
+		__u8					hdata[64];
+	} h_u, m_u; /* entry, mask */
+
+	__u16	        vlan_tag;
+	__u16	        vlan_tag_mask;
+	__u64		data;      /* user-defined flow spec data */
+	__u64		data_mask; /* user-defined flow spec mask */
+
+	/* signed to distinguish between queue and actions (DROP) */
+	__s32		action;
+#define ETHTOOL_RXNTUPLE_ACTION_DROP -1
+};
+
+struct ethtool_rx_ntuple {
+	__u32					cmd;
+	struct ethtool_rx_ntuple_flow_spec	fs;
+};
+
 #define ETHTOOL_FLASH_MAX_FILENAME	128
 enum ethtool_flash_op_type {
 	ETHTOOL_FLASH_ALL_REGIONS	= 0,
@@ -377,6 +409,18 @@ struct ethtool_flash {
 
 #ifdef __KERNEL__
 
+struct ethtool_rx_ntuple_flow_spec_container {
+	struct ethtool_rx_ntuple_flow_spec fs;
+	struct list_head list;
+};
+
+struct ethtool_rx_ntuple_list {
+#define ETHTOOL_MAX_NTUPLE_LIST_ENTRY 1024
+#define ETHTOOL_MAX_NTUPLE_STRING_PER_ENTRY 14
+	struct list_head	list;
+	unsigned int		count;
+};
+
 struct net_device;
 
 /* Some generic methods drivers may use in their ethtool_ops */
@@ -394,6 +438,7 @@ u32 ethtool_op_get_ufo(struct net_device *dev);
 int ethtool_op_set_ufo(struct net_device *dev, u32 data);
 u32 ethtool_op_get_flags(struct net_device *dev);
 int ethtool_op_set_flags(struct net_device *dev, u32 data);
+void ethtool_ntuple_flush(struct net_device *dev);
 
 /**
  * &ethtool_ops - Alter and report network device settings
@@ -500,6 +545,8 @@ struct ethtool_ops {
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int     (*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
+	int	(*set_rx_ntuple)(struct net_device *, struct ethtool_rx_ntuple *);
+	int	(*get_rx_ntuple)(struct net_device *, u32 stringset, void *);
 };
 #endif /* __KERNEL__ */
 
@@ -559,6 +606,9 @@ struct ethtool_ops {
 #define	ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
 #define	ETHTOOL_RESET		0x00000034 /* Reset hardware */
 
+#define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
+#define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
 #define SPARC_ETH_SSET		ETHTOOL_SSET
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e535700a3b72..cdf53a8d9ff5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -746,6 +746,7 @@ struct net_device {
 #define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
 #define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
 #define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
+#define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -954,6 +955,8 @@ struct net_device {
 	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
 #endif
+	/* n-tuple filter list attached to this device */
+	struct ethtool_rx_ntuple_list ethtool_ntuple_list;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 94c1eeed25e5..ae75f25ac0a5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5419,6 +5419,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	netdev_init_queues(dev);
 
+	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
+	dev->ethtool_ntuple_list.count = 0;
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
@@ -5455,6 +5457,9 @@ void free_netdev(struct net_device *dev)
 	/* Flush device addresses */
 	dev_addr_flush(dev);
 
+	/* Clear ethtool n-tuple list */
+	ethtool_ntuple_flush(dev);
+
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d8aee584e8d1..6ec73d3983a3 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -120,7 +120,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
  * NETIF_F_xxx values in include/linux/netdevice.h
  */
 static const u32 flags_dup_features =
-	ETH_FLAG_LRO;
+	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
 
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
@@ -139,9 +139,26 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
 	else
 		dev->features &= ~NETIF_F_LRO;
 
+	if (data & ETH_FLAG_NTUPLE)
+		dev->features |= NETIF_F_NTUPLE;
+	else
+		dev->features &= ~NETIF_F_NTUPLE;
+
 	return 0;
 }
 
+void ethtool_ntuple_flush(struct net_device *dev)
+{
+	struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
+
+	list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
+		list_del(&fsc->list);
+		kfree(fsc);
+	}
+	dev->ethtool_ntuple_list.count = 0;
+}
+EXPORT_SYMBOL(ethtool_ntuple_flush);
+
 /* Handlers for each ethtool command */
 
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -266,6 +283,307 @@ err_out:
 	return ret;
 }
 
+static int __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
+                                  struct ethtool_rx_ntuple_flow_spec *spec)
+{
+	struct ethtool_rx_ntuple_flow_spec_container *fsc;
+
+	/* don't add filters forever */
+	if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY)
+		return 0;
+
+	fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
+	if (!fsc)
+		return -ENOMEM;
+
+	/* Copy the whole filter over */
+	fsc->fs.flow_type = spec->flow_type;
+	memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
+	memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
+
+	fsc->fs.vlan_tag = spec->vlan_tag;
+	fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
+	fsc->fs.data = spec->data;
+	fsc->fs.data_mask = spec->data_mask;
+	fsc->fs.action = spec->action;
+
+	/* add to the list */
+	list_add_tail_rcu(&fsc->list, &list->list);
+	list->count++;
+
+	return 0;
+}
+
+static int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_rx_ntuple cmd;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+
+	if (!ops->set_rx_ntuple)
+		return -EOPNOTSUPP;
+
+	if (!(dev->features & NETIF_F_NTUPLE))
+		return -EINVAL;
+
+	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+		return -EFAULT;
+
+	ret = ops->set_rx_ntuple(dev, &cmd);
+
+	/*
+	 * Cache filter in dev struct for GET operation only if
+	 * the underlying driver doesn't have its own GET operation, and
+	 * only if the filter was added successfully.
+	 */
+	if (!ops->get_rx_ntuple && !ret)
+		if (__rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs))
+			return -ENOMEM;
+
+	return ret;
+}
+
+static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_gstrings gstrings;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_rx_ntuple_flow_spec_container *fsc;
+	u8 *data;
+	char *p;
+	int ret, i, num_strings = 0;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
+		return -EFAULT;
+
+	ret = ops->get_sset_count(dev, gstrings.string_set);
+	if (ret < 0)
+		return ret;
+
+	gstrings.len = ret;
+
+	data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+
+	if (ops->get_rx_ntuple) {
+		/* driver-specific filter grab */
+		ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
+		goto copy;
+	}
+
+	/* default ethtool filter grab */
+	i = 0;
+	p = (char *)data;
+	list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
+		sprintf(p, "Filter %d:\n", i);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+
+		switch (fsc->fs.flow_type) {
+		case TCP_V4_FLOW:
+			sprintf(p, "\tFlow Type: TCP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case UDP_V4_FLOW:
+			sprintf(p, "\tFlow Type: UDP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case SCTP_V4_FLOW:
+			sprintf(p, "\tFlow Type: SCTP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case AH_ESP_V4_FLOW:
+			sprintf(p, "\tFlow Type: AH ESP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case ESP_V4_FLOW:
+			sprintf(p, "\tFlow Type: ESP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IP_USER_FLOW:
+			sprintf(p, "\tFlow Type: Raw IP\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IPV4_FLOW:
+			sprintf(p, "\tFlow Type: IPv4\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		default:
+			sprintf(p, "\tFlow Type: Unknown\n");
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			goto unknown_filter;
+		};
+
+		/* now the rest of the filters */
+		switch (fsc->fs.flow_type) {
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+			        fsc->fs.m_u.tcp_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+			        fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.psrc,
+			        fsc->fs.m_u.tcp_ip4_spec.psrc);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.pdst,
+			        fsc->fs.m_u.tcp_ip4_spec.pdst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.tcp_ip4_spec.tos,
+			        fsc->fs.m_u.tcp_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case AH_ESP_V4_FLOW:
+		case ESP_V4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+			        fsc->fs.h_u.ah_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+			        fsc->fs.m_u.ah_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+			        fsc->fs.h_u.ah_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+			        fsc->fs.m_u.ah_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSPI: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.ah_ip4_spec.spi,
+			        fsc->fs.m_u.ah_ip4_spec.spi);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.ah_ip4_spec.tos,
+			        fsc->fs.m_u.ah_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IP_USER_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+			        fsc->fs.h_u.raw_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+			        fsc->fs.m_u.raw_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+			        fsc->fs.h_u.raw_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+			        fsc->fs.m_u.raw_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		case IPV4_FLOW:
+			sprintf(p, "\tSrc IP addr: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tSrc IP mask: 0x%x\n",
+			        fsc->fs.m_u.usr_ip4_spec.ip4src);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP addr: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tDest IP mask: 0x%x\n",
+			        fsc->fs.m_u.usr_ip4_spec.ip4dst);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+			        fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.tos,
+			        fsc->fs.m_u.usr_ip4_spec.tos);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.ip_ver,
+			        fsc->fs.m_u.usr_ip4_spec.ip_ver);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
+			        fsc->fs.h_u.usr_ip4_spec.proto,
+			        fsc->fs.m_u.usr_ip4_spec.proto);
+			p += ETH_GSTRING_LEN;
+			num_strings++;
+			break;
+		};
+		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
+		        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+		if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
+			sprintf(p, "\tAction: Drop\n");
+		else
+			sprintf(p, "\tAction: Direct to queue %d\n",
+			        fsc->fs.action);
+		p += ETH_GSTRING_LEN;
+		num_strings++;
+unknown_filter:
+		i++;
+	}
+copy:
+	/* indicate to userspace how many strings we actually have */
+	gstrings.len = num_strings;
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
+		goto out;
+	useraddr += sizeof(gstrings);
+	if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_regs regs;
@@ -313,6 +631,9 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
 	if (copy_from_user(&reset, useraddr, sizeof(reset)))
 		return -EFAULT;
 
+	/* Clear ethtool n-tuple list */
+	ethtool_ntuple_flush(dev);
+
 	ret = dev->ethtool_ops->reset(dev, &reset.data);
 	if (ret)
 		return ret;
@@ -1112,6 +1433,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_RESET:
 		rc = ethtool_reset(dev, useraddr);
 		break;
+	case ETHTOOL_SRXNTUPLE:
+		rc = ethtool_set_rx_ntuple(dev, useraddr);
+		break;
+	case ETHTOOL_GRXNTUPLE:
+		rc = ethtool_get_rx_ntuple(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 8f8be2439cd368cc6ba94888919ee90b5a26f0cb Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 10 Feb 2010 23:03:22 -0800
Subject: Input: sh_keysc - update the driver with mode 6

Add mode 6 support to the sh_keysc driver. Also update the KYOUTDR mask
value to include all 16 register bits.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/sh_keysc.c | 3 ++-
 include/linux/input/sh_keysc.h    | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index c2fc97732f0c..854e2035cd6e 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c
@@ -31,6 +31,7 @@ static const struct {
 	[SH_KEYSC_MODE_3] = { 2, 4, 7 },
 	[SH_KEYSC_MODE_4] = { 3, 6, 6 },
 	[SH_KEYSC_MODE_5] = { 4, 6, 7 },
+	[SH_KEYSC_MODE_6] = { 5, 7, 7 },
 };
 
 struct sh_keysc_priv {
@@ -109,7 +110,7 @@ static irqreturn_t sh_keysc_isr(int irq, void *dev_id)
 			n = keyin_nr * i;
 
 			/* drive one KEYOUT pin low, read KEYIN pins */
-			sh_keysc_write(priv, KYOUTDR, 0xfff ^ (3 << (i * 2)));
+			sh_keysc_write(priv, KYOUTDR, 0xffff ^ (3 << (i * 2)));
 			udelay(pdata->delay);
 			tmp = sh_keysc_read(priv, KYINDR);
 
diff --git a/include/linux/input/sh_keysc.h b/include/linux/input/sh_keysc.h
index 2aff38bcf2ba..649dc7f12925 100644
--- a/include/linux/input/sh_keysc.h
+++ b/include/linux/input/sh_keysc.h
@@ -1,15 +1,15 @@
 #ifndef __SH_KEYSC_H__
 #define __SH_KEYSC_H__
 
-#define SH_KEYSC_MAXKEYS 42
+#define SH_KEYSC_MAXKEYS 49
 
 struct sh_keysc_info {
 	enum { SH_KEYSC_MODE_1, SH_KEYSC_MODE_2, SH_KEYSC_MODE_3,
-	       SH_KEYSC_MODE_4, SH_KEYSC_MODE_5 } mode;
+	       SH_KEYSC_MODE_4, SH_KEYSC_MODE_5, SH_KEYSC_MODE_6 } mode;
 	int scan_timing; /* 0 -> 7, see KYCR1, SCN[2:0] */
 	int delay;
 	int kycr2_delay;
-	int keycodes[SH_KEYSC_MAXKEYS];
+	int keycodes[SH_KEYSC_MAXKEYS]; /* KEYIN * KEYOUT */
 };
 
 #endif /* __SH_KEYSC_H__ */
-- 
cgit v1.2.3


From 3b6b9fab42fe98358d70735cf98d43fc18dc79c9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:23:53 +0100
Subject: netfilter: nf_conntrack_sip: pass data offset to NAT functions

When using TCP multiple SIP messages might be present in a single packet.
A following patch will parse them by setting the dptr to the beginning of
each message. The NAT helper needs to reload the dptr value after mangling
the packet however, so it needs to know the offset of the message to the
beginning of the packet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_sip.h |  14 ++--
 net/ipv4/netfilter/nf_nat_sip.c            | 101 +++++++++++++++--------------
 net/netfilter/nf_conntrack_sip.c           |  82 ++++++++++++-----------
 3 files changed, 105 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 23aa2ec6b7b7..2c6950b8bf7e 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -34,10 +34,10 @@ struct sdp_media_type {
 struct sip_handler {
 	const char	*method;
 	unsigned int	len;
-	int		(*request)(struct sk_buff *skb,
+	int		(*request)(struct sk_buff *skb, unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq);
-	int		(*response)(struct sk_buff *skb,
+	int		(*response)(struct sk_buff *skb, unsigned int dataoff,
 				    const char **dptr, unsigned int *datalen,
 				    unsigned int cseq, unsigned int code);
 };
@@ -100,33 +100,39 @@ enum sdp_header_types {
 };
 
 extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
+				       unsigned int dataoff,
 				       const char **dptr,
 				       unsigned int *datalen);
 extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+					      unsigned int dataoff,
 					      const char **dptr,
 					      unsigned int *datalen,
 					      struct nf_conntrack_expect *exp,
 					      unsigned int matchoff,
 					      unsigned int matchlen);
 extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
-					    const char **dptr,
 					    unsigned int dataoff,
+					    const char **dptr,
 					    unsigned int *datalen,
+					    unsigned int sdpoff,
 					    enum sdp_header_types type,
 					    enum sdp_header_types term,
 					    const union nf_inet_addr *addr);
 extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
+					    unsigned int dataoff,
 					    const char **dptr,
 					    unsigned int *datalen,
 					    unsigned int matchoff,
 					    unsigned int matchlen,
 					    u_int16_t port);
 extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					       const char **dptr,
 					       unsigned int dataoff,
+					       const char **dptr,
 					       unsigned int *datalen,
+					       unsigned int sdpoff,
 					       const union nf_inet_addr *addr);
 extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
+					     unsigned int dataoff,
 					     const char **dptr,
 					     unsigned int *datalen,
 					     struct nf_conntrack_expect *rtp_exp,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 07d61a57613c..2454ea5abb79 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -29,7 +29,7 @@ MODULE_DESCRIPTION("SIP NAT helper");
 MODULE_ALIAS("ip_nat_sip");
 
 
-static unsigned int mangle_packet(struct sk_buff *skb,
+static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
 				  const char **dptr, unsigned int *datalen,
 				  unsigned int matchoff, unsigned int matchlen,
 				  const char *buffer, unsigned int buflen)
@@ -42,12 +42,12 @@ static unsigned int mangle_packet(struct sk_buff *skb,
 		return 0;
 
 	/* Reload data pointer and adjust datalen value */
-	*dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
+	*dptr = skb->data + dataoff;
 	*datalen += buflen - matchlen;
 	return 1;
 }
 
-static int map_addr(struct sk_buff *skb,
+static int map_addr(struct sk_buff *skb, unsigned int dataoff,
 		    const char **dptr, unsigned int *datalen,
 		    unsigned int matchoff, unsigned int matchlen,
 		    union nf_inet_addr *addr, __be16 port)
@@ -76,11 +76,11 @@ static int map_addr(struct sk_buff *skb,
 
 	buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
 
-	return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			     buffer, buflen);
 }
 
-static int map_sip_addr(struct sk_buff *skb,
+static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
 			const char **dptr, unsigned int *datalen,
 			enum sip_header_types type)
 {
@@ -93,16 +93,17 @@ static int map_sip_addr(struct sk_buff *skb,
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
 				    &matchoff, &matchlen, &addr, &port) <= 0)
 		return 1;
-	return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port);
+	return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+			&addr, port);
 }
 
-static unsigned int ip_nat_sip(struct sk_buff *skb,
+static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned int dataoff, matchoff, matchlen;
+	unsigned int coff, matchoff, matchlen;
 	union nf_inet_addr addr;
 	__be16 port;
 	int request, in_header;
@@ -112,7 +113,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 		if (ct_sip_parse_request(ct, *dptr, *datalen,
 					 &matchoff, &matchlen,
 					 &addr, &port) > 0 &&
-		    !map_addr(skb, dptr, datalen, matchoff, matchlen,
+		    !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 		request = 1;
@@ -138,7 +139,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 				goto next;
 		}
 
-		if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
+		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 
@@ -153,8 +154,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 		    addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
 					&ct->tuplehash[!dir].tuple.dst.u3.ip);
-			if (!mangle_packet(skb, dptr, datalen, poff, plen,
-					   buffer, buflen))
+			if (!mangle_packet(skb, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
 
@@ -167,8 +168,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 		    addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
 					&ct->tuplehash[!dir].tuple.src.u3.ip);
-			if (!mangle_packet(skb, dptr, datalen, poff, plen,
-					   buffer, buflen))
+			if (!mangle_packet(skb, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
 
@@ -181,27 +182,27 @@ static unsigned int ip_nat_sip(struct sk_buff *skb,
 		    htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
 			__be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
 			buflen = sprintf(buffer, "%u", ntohs(p));
-			if (!mangle_packet(skb, dptr, datalen, poff, plen,
-					   buffer, buflen))
+			if (!mangle_packet(skb, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
 	}
 
 next:
 	/* Translate Contact headers */
-	dataoff = 0;
+	coff = 0;
 	in_header = 0;
-	while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+	while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
 				       SIP_HDR_CONTACT, &in_header,
 				       &matchoff, &matchlen,
 				       &addr, &port) > 0) {
-		if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
+		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 	}
 
-	if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) ||
-	    !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO))
+	if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
+	    !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
 		return NF_DROP;
 	return NF_ACCEPT;
 }
@@ -232,7 +233,7 @@ static void ip_nat_sip_expected(struct nf_conn *ct,
 	}
 }
 
-static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
+static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
 				      const char **dptr, unsigned int *datalen,
 				      struct nf_conntrack_expect *exp,
 				      unsigned int matchoff,
@@ -279,8 +280,8 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
 	if (exp->tuple.dst.u3.ip != exp->saved_ip ||
 	    exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
 		buflen = sprintf(buffer, "%pI4:%u", &newip, port);
-		if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
-				   buffer, buflen))
+		if (!mangle_packet(skb, dataoff, dptr, datalen,
+				   matchoff, matchlen, buffer, buflen))
 			goto err;
 	}
 	return NF_ACCEPT;
@@ -290,7 +291,7 @@ err:
 	return NF_DROP;
 }
 
-static int mangle_content_len(struct sk_buff *skb,
+static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
 			      const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -312,12 +313,13 @@ static int mangle_content_len(struct sk_buff *skb,
 		return 0;
 
 	buflen = sprintf(buffer, "%u", c_len);
-	return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			     buffer, buflen);
 }
 
-static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
-			     unsigned int dataoff, unsigned int *datalen,
+static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
+			     const char **dptr, unsigned int *datalen,
+			     unsigned int sdpoff,
 			     enum sdp_header_types type,
 			     enum sdp_header_types term,
 			     char *buffer, int buflen)
@@ -326,16 +328,16 @@ static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	unsigned int matchlen, matchoff;
 
-	if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term,
+	if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
 				  &matchoff, &matchlen) <= 0)
 		return -ENOENT;
-	return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			     buffer, buflen) ? 0 : -EINVAL;
 }
 
-static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
-				    unsigned int dataoff,
-				    unsigned int *datalen,
+static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
+				    unsigned int sdpoff,
 				    enum sdp_header_types type,
 				    enum sdp_header_types term,
 				    const union nf_inet_addr *addr)
@@ -344,16 +346,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
 	unsigned int buflen;
 
 	buflen = sprintf(buffer, "%pI4", &addr->ip);
-	if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
+	if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term,
 			      buffer, buflen))
 		return 0;
 
-	return mangle_content_len(skb, dptr, datalen);
+	return mangle_content_len(skb, dataoff, dptr, datalen);
 }
 
-static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
-				    const char **dptr,
-				    unsigned int *datalen,
+static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
 				    unsigned int matchoff,
 				    unsigned int matchlen,
 				    u_int16_t port)
@@ -362,16 +363,16 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
 	unsigned int buflen;
 
 	buflen = sprintf(buffer, "%u", port);
-	if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
+	if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
 			   buffer, buflen))
 		return 0;
 
-	return mangle_content_len(skb, dptr, datalen);
+	return mangle_content_len(skb, dataoff, dptr, datalen);
 }
 
-static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
-				       unsigned int dataoff,
-				       unsigned int *datalen,
+static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff,
+				       const char **dptr, unsigned int *datalen,
+				       unsigned int sdpoff,
 				       const union nf_inet_addr *addr)
 {
 	char buffer[sizeof("nnn.nnn.nnn.nnn")];
@@ -379,12 +380,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
 
 	/* Mangle session description owner and contact addresses */
 	buflen = sprintf(buffer, "%pI4", &addr->ip);
-	if (mangle_sdp_packet(skb, dptr, dataoff, datalen,
+	if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
 			       SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
 			       buffer, buflen))
 		return 0;
 
-	switch (mangle_sdp_packet(skb, dptr, dataoff, datalen,
+	switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
 				  SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
 				  buffer, buflen)) {
 	case 0:
@@ -401,14 +402,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
 		return 0;
 	}
 
-	return mangle_content_len(skb, dptr, datalen);
+	return mangle_content_len(skb, dataoff, dptr, datalen);
 }
 
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
-				     const char **dptr,
-				     unsigned int *datalen,
+static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
+				     const char **dptr, unsigned int *datalen,
 				     struct nf_conntrack_expect *rtp_exp,
 				     struct nf_conntrack_expect *rtcp_exp,
 				     unsigned int mediaoff,
@@ -456,7 +456,8 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
 
 	/* Update media port. */
 	if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
-	    !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port))
+	    !ip_nat_sdp_port(skb, dataoff, dptr, datalen,
+			     mediaoff, medialen, port))
 		goto err2;
 
 	return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 0ca2f2b5c2fa..0ec37d6a2df7 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -50,12 +50,13 @@ module_param(sip_direct_media, int, 0600);
 MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
 				   "endpoints only (default 1)");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
 				const char **dptr,
 				unsigned int *datalen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
 unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+				       unsigned int dataoff,
 				       const char **dptr,
 				       unsigned int *datalen,
 				       struct nf_conntrack_expect *exp,
@@ -63,17 +64,17 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 				       unsigned int matchlen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
 
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr,
-				     unsigned int dataoff,
 				     unsigned int *datalen,
+				     unsigned int sdpoff,
 				     enum sdp_header_types type,
 				     enum sdp_header_types term,
 				     const union nf_inet_addr *addr)
 				     __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
 
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr,
 				     unsigned int *datalen,
 				     unsigned int matchoff,
@@ -82,14 +83,15 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
 
 unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					const char **dptr,
 					unsigned int dataoff,
+					const char **dptr,
 					unsigned int *datalen,
+					unsigned int sdpoff,
 					const union nf_inet_addr *addr)
 					__read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
 
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff,
 				      const char **dptr,
 				      unsigned int *datalen,
 				      struct nf_conntrack_expect *rtp_exp,
@@ -729,7 +731,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 
-static int set_expected_rtp_rtcp(struct sk_buff *skb,
+static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
 				 const char **dptr, unsigned int *datalen,
 				 union nf_inet_addr *daddr, __be16 port,
 				 enum sip_expectation_classes class,
@@ -806,7 +808,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 	if (direct_rtp) {
 		nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
 		if (nf_nat_sdp_port &&
-		    !nf_nat_sdp_port(skb, dptr, datalen,
+		    !nf_nat_sdp_port(skb, dataoff, dptr, datalen,
 				     mediaoff, medialen, ntohs(rtp_port)))
 			goto err1;
 	}
@@ -828,7 +830,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 
 	nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
 	if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
-		ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp,
+		ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen,
+				       rtp_exp, rtcp_exp,
 				       mediaoff, medialen, daddr);
 	else {
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -867,7 +870,7 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr,
 	return NULL;
 }
 
-static int process_sdp(struct sk_buff *skb,
+static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 		       const char **dptr, unsigned int *datalen,
 		       unsigned int cseq)
 {
@@ -942,7 +945,7 @@ static int process_sdp(struct sk_buff *skb,
 		else
 			return NF_DROP;
 
-		ret = set_expected_rtp_rtcp(skb, dptr, datalen,
+		ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen,
 					    &rtp_addr, htons(port), t->class,
 					    mediaoff, medialen);
 		if (ret != NF_ACCEPT)
@@ -950,8 +953,9 @@ static int process_sdp(struct sk_buff *skb,
 
 		/* Update media connection address if present */
 		if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
-			ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen,
-					      c_hdr, SDP_HDR_MEDIA, &rtp_addr);
+			ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen,
+					      mediaoff, c_hdr, SDP_HDR_MEDIA,
+					      &rtp_addr);
 			if (ret != NF_ACCEPT)
 				return ret;
 		}
@@ -961,14 +965,15 @@ static int process_sdp(struct sk_buff *skb,
 	/* Update session connection and owner addresses */
 	nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
 	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr);
+		ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
+					 &rtp_addr);
 
 	if (ret == NF_ACCEPT && i > 0)
 		help->help.ct_sip_info.invite_cseq = cseq;
 
 	return ret;
 }
-static int process_invite_response(struct sk_buff *skb,
+static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq, unsigned int code)
 {
@@ -978,13 +983,13 @@ static int process_invite_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_update_response(struct sk_buff *skb,
+static int process_update_response(struct sk_buff *skb, unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq, unsigned int code)
 {
@@ -994,13 +999,13 @@ static int process_update_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_prack_response(struct sk_buff *skb,
+static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
 				  const char **dptr, unsigned int *datalen,
 				  unsigned int cseq, unsigned int code)
 {
@@ -1010,13 +1015,13 @@ static int process_prack_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_bye_request(struct sk_buff *skb,
+static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen,
 			       unsigned int cseq)
 {
@@ -1031,7 +1036,7 @@ static int process_bye_request(struct sk_buff *skb,
  * signalling connections. The expectation is marked inactive and is activated
  * when receiving a response indicating success from the registrar.
  */
-static int process_register_request(struct sk_buff *skb,
+static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 				    const char **dptr, unsigned int *datalen,
 				    unsigned int cseq)
 {
@@ -1101,7 +1106,7 @@ static int process_register_request(struct sk_buff *skb,
 
 	nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
 	if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sip_expect(skb, dptr, datalen, exp,
+		ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp,
 					matchoff, matchlen);
 	else {
 		if (nf_ct_expect_related(exp) != 0)
@@ -1117,7 +1122,7 @@ store_cseq:
 	return ret;
 }
 
-static int process_register_response(struct sk_buff *skb,
+static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr, unsigned int *datalen,
 				     unsigned int cseq, unsigned int code)
 {
@@ -1127,7 +1132,7 @@ static int process_register_response(struct sk_buff *skb,
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	union nf_inet_addr addr;
 	__be16 port;
-	unsigned int matchoff, matchlen, dataoff = 0;
+	unsigned int matchoff, matchlen, coff = 0;
 	unsigned int expires = 0;
 	int in_contact = 0, ret;
 
@@ -1154,7 +1159,7 @@ static int process_register_response(struct sk_buff *skb,
 	while (1) {
 		unsigned int c_expires = expires;
 
-		ret = ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+		ret = ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
 					      SIP_HDR_CONTACT, &in_contact,
 					      &matchoff, &matchlen,
 					      &addr, &port);
@@ -1193,13 +1198,13 @@ static const struct sip_handler sip_handlers[] = {
 	SIP_HANDLER("REGISTER", process_register_request, process_register_response),
 };
 
-static int process_sip_response(struct sk_buff *skb,
+static int process_sip_response(struct sk_buff *skb, unsigned int dataoff,
 				const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	unsigned int matchoff, matchlen;
-	unsigned int code, cseq, dataoff, i;
+	unsigned int matchoff, matchlen, matchend;
+	unsigned int code, cseq, i;
 
 	if (*datalen < strlen("SIP/2.0 200"))
 		return NF_ACCEPT;
@@ -1213,7 +1218,7 @@ static int process_sip_response(struct sk_buff *skb,
 	cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
 	if (!cseq)
 		return NF_DROP;
-	dataoff = matchoff + matchlen + 1;
+	matchend = matchoff + matchlen + 1;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
 		const struct sip_handler *handler;
@@ -1221,15 +1226,16 @@ static int process_sip_response(struct sk_buff *skb,
 		handler = &sip_handlers[i];
 		if (handler->response == NULL)
 			continue;
-		if (*datalen < dataoff + handler->len ||
-		    strnicmp(*dptr + dataoff, handler->method, handler->len))
+		if (*datalen < matchend + handler->len ||
+		    strnicmp(*dptr + matchend, handler->method, handler->len))
 			continue;
-		return handler->response(skb, dptr, datalen, cseq, code);
+		return handler->response(skb, dataoff, dptr, datalen,
+					 cseq, code);
 	}
 	return NF_ACCEPT;
 }
 
-static int process_sip_request(struct sk_buff *skb,
+static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -1254,7 +1260,7 @@ static int process_sip_request(struct sk_buff *skb,
 		if (!cseq)
 			return NF_DROP;
 
-		return handler->request(skb, dptr, datalen, cseq);
+		return handler->request(skb, dataoff, dptr, datalen, cseq);
 	}
 	return NF_ACCEPT;
 }
@@ -1288,13 +1294,13 @@ static int sip_help(struct sk_buff *skb,
 		return NF_ACCEPT;
 
 	if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
-		ret = process_sip_request(skb, &dptr, &datalen);
+		ret = process_sip_request(skb, dataoff, &dptr, &datalen);
 	else
-		ret = process_sip_response(skb, &dptr, &datalen);
+		ret = process_sip_response(skb, dataoff, &dptr, &datalen);
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
 		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, &dptr, &datalen))
+		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, &dptr, &datalen))
 			ret = NF_DROP;
 	}
 
-- 
cgit v1.2.3


From f5b321bd37fbec9188feb1f721ab46a5ac0b35da Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:26:19 +0100
Subject: netfilter: nf_conntrack_sip: add TCP support

Add TCP support, which is mandated by RFC3261 for all SIP elements.

SIP over TCP is similar to UDP, except that messages are delimited
by Content-Length: headers and multiple messages may appear in one
packet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_sip.h |   3 +-
 net/ipv4/netfilter/nf_nat_sip.c            |   2 +-
 net/netfilter/nf_conntrack_sip.c           | 205 +++++++++++++++++++++++++----
 3 files changed, 179 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 2c6950b8bf7e..fa9bb8981450 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -84,7 +84,8 @@ enum sip_header_types {
 	SIP_HDR_FROM,
 	SIP_HDR_TO,
 	SIP_HDR_CONTACT,
-	SIP_HDR_VIA,
+	SIP_HDR_VIA_UDP,
+	SIP_HDR_VIA_TCP,
 	SIP_HDR_EXPIRES,
 	SIP_HDR_CONTENT_LENGTH,
 };
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 2454ea5abb79..b232e4040dc6 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -122,7 +122,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 
 	/* Translate topmost Via header and parameters */
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
-				    SIP_HDR_VIA, NULL, &matchoff, &matchlen,
+				    SIP_HDR_VIA_UDP, NULL, &matchoff, &matchlen,
 				    &addr, &port) > 0) {
 		unsigned int matchend, poff, plen, buflen, n;
 		char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 0ec37d6a2df7..1cc75c5a822b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -16,6 +16,7 @@
 #include <linux/inet.h>
 #include <linux/in.h>
 #include <linux/udp.h>
+#include <linux/tcp.h>
 #include <linux/netfilter.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -287,7 +288,8 @@ static const struct sip_header ct_sip_hdrs[] = {
 	[SIP_HDR_FROM]			= SIP_HDR("From", "f", "sip:", skp_epaddr_len),
 	[SIP_HDR_TO]			= SIP_HDR("To", "t", "sip:", skp_epaddr_len),
 	[SIP_HDR_CONTACT]		= SIP_HDR("Contact", "m", "sip:", skp_epaddr_len),
-	[SIP_HDR_VIA]			= SIP_HDR("Via", "v", "UDP ", epaddr_len),
+	[SIP_HDR_VIA_UDP]		= SIP_HDR("Via", "v", "UDP ", epaddr_len),
+	[SIP_HDR_VIA_TCP]		= SIP_HDR("Via", "v", "TCP ", epaddr_len),
 	[SIP_HDR_EXPIRES]		= SIP_HDR("Expires", NULL, NULL, digits_len),
 	[SIP_HDR_CONTENT_LENGTH]	= SIP_HDR("Content-Length", "l", NULL, digits_len),
 };
@@ -519,6 +521,33 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri);
 
+static int ct_sip_parse_param(const struct nf_conn *ct, const char *dptr,
+			      unsigned int dataoff, unsigned int datalen,
+			      const char *name,
+			      unsigned int *matchoff, unsigned int *matchlen)
+{
+	const char *limit = dptr + datalen;
+	const char *start;
+	const char *end;
+
+	limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+	if (!limit)
+		limit = dptr + datalen;
+
+	start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+	if (!start)
+		return 0;
+	start += strlen(name);
+
+	end = ct_sip_header_search(start, limit, ";", strlen(";"));
+	if (!end)
+		end = limit;
+
+	*matchoff = start - dptr;
+	*matchlen = end - start;
+	return 1;
+}
+
 /* Parse address from header parameter and return address, offset and length */
 int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 			       unsigned int dataoff, unsigned int datalen,
@@ -577,6 +606,29 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param);
 
+static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
+				  unsigned int dataoff, unsigned int datalen,
+				  u8 *proto)
+{
+	unsigned int matchoff, matchlen;
+
+	if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=",
+			       &matchoff, &matchlen)) {
+		if (!strnicmp(dptr + matchoff, "TCP", strlen("TCP")))
+			*proto = IPPROTO_TCP;
+		else if (!strnicmp(dptr + matchoff, "UDP", strlen("UDP")))
+			*proto = IPPROTO_UDP;
+		else
+			return 0;
+
+		if (*proto != nf_ct_protonum(ct))
+			return 0;
+	} else
+		*proto = nf_ct_protonum(ct);
+
+	return 1;
+}
+
 /* SDP header parsing: a SDP session description contains an ordered set of
  * headers, starting with a section containing general session parameters,
  * optionally followed by multiple media descriptions.
@@ -685,7 +737,7 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
 
 static int refresh_signalling_expectation(struct nf_conn *ct,
 					  union nf_inet_addr *addr,
-					  __be16 port,
+					  u8 proto, __be16 port,
 					  unsigned int expires)
 {
 	struct nf_conn_help *help = nfct_help(ct);
@@ -697,6 +749,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
 	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
 		if (exp->class != SIP_EXPECT_SIGNALLING ||
 		    !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
+		    exp->tuple.dst.protonum != proto ||
 		    exp->tuple.dst.u.udp.port != port)
 			continue;
 		if (!del_timer(&exp->timeout))
@@ -1048,6 +1101,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 	struct nf_conntrack_expect *exp;
 	union nf_inet_addr *saddr, daddr;
 	__be16 port;
+	u8 proto;
 	unsigned int expires = 0;
 	int ret;
 	typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
@@ -1080,6 +1134,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 	if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr))
 		return NF_ACCEPT;
 
+	if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, *datalen,
+				   &proto) == 0)
+		return NF_ACCEPT;
+
 	if (ct_sip_parse_numerical_param(ct, *dptr,
 					 matchoff + matchlen, *datalen,
 					 "expires=", NULL, NULL, &expires) < 0)
@@ -1099,7 +1157,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 		saddr = &ct->tuplehash[!dir].tuple.src.u3;
 
 	nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
-			  saddr, &daddr, IPPROTO_UDP, NULL, &port);
+			  saddr, &daddr, proto, NULL, &port);
 	exp->timeout.expires = sip_timeout * HZ;
 	exp->helper = nfct_help(ct)->helper;
 	exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
@@ -1132,6 +1190,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	union nf_inet_addr addr;
 	__be16 port;
+	u8 proto;
 	unsigned int matchoff, matchlen, coff = 0;
 	unsigned int expires = 0;
 	int in_contact = 0, ret;
@@ -1172,6 +1231,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr))
 			continue;
 
+		if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen,
+					   *datalen, &proto) == 0)
+			continue;
+
 		ret = ct_sip_parse_numerical_param(ct, *dptr,
 						   matchoff + matchlen,
 						   *datalen, "expires=",
@@ -1180,7 +1243,8 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 			return NF_DROP;
 		if (c_expires == 0)
 			break;
-		if (refresh_signalling_expectation(ct, &addr, port, c_expires))
+		if (refresh_signalling_expectation(ct, &addr, proto, port,
+						   c_expires))
 			return NF_ACCEPT;
 	}
 
@@ -1265,50 +1329,123 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
 	return NF_ACCEPT;
 }
 
-static int sip_help(struct sk_buff *skb,
-		    unsigned int protoff,
-		    struct nf_conn *ct,
-		    enum ip_conntrack_info ctinfo)
+static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
+			   unsigned int dataoff, const char **dptr,
+			   unsigned int *datalen)
+{
+	typeof(nf_nat_sip_hook) nf_nat_sip;
+	int ret;
+
+	if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
+		ret = process_sip_request(skb, dataoff, dptr, datalen);
+	else
+		ret = process_sip_response(skb, dataoff, dptr, datalen);
+
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
+		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen))
+			ret = NF_DROP;
+	}
+
+	return ret;
+}
+
+static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
+	struct tcphdr *th, _tcph;
 	unsigned int dataoff, datalen;
-	const char *dptr;
+	unsigned int matchoff, matchlen, clen;
+	unsigned int msglen, origlen;
+	const char *dptr, *end;
+	s16 diff, tdiff = 0;
 	int ret;
-	typeof(nf_nat_sip_hook) nf_nat_sip;
+
+	if (ctinfo != IP_CT_ESTABLISHED &&
+	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+		return NF_ACCEPT;
 
 	/* No Data ? */
-	dataoff = protoff + sizeof(struct udphdr);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+	dataoff = protoff + th->doff * 4;
 	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
 	nf_ct_refresh(ct, skb, sip_timeout * HZ);
 
-	if (!skb_is_nonlinear(skb))
-		dptr = skb->data + dataoff;
-	else {
+	if (skb_is_nonlinear(skb)) {
 		pr_debug("Copy of skbuff not supported yet.\n");
 		return NF_ACCEPT;
 	}
 
+	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
 	if (datalen < strlen("SIP/2.0 200"))
 		return NF_ACCEPT;
 
-	if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
-		ret = process_sip_request(skb, dataoff, &dptr, &datalen);
-	else
-		ret = process_sip_response(skb, dataoff, &dptr, &datalen);
+	while (1) {
+		if (ct_sip_get_header(ct, dptr, 0, datalen,
+				      SIP_HDR_CONTENT_LENGTH,
+				      &matchoff, &matchlen) <= 0)
+			break;
 
-	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, &dptr, &datalen))
-			ret = NF_DROP;
+		clen = simple_strtoul(dptr + matchoff, (char **)&end, 10);
+		if (dptr + matchoff == end)
+			break;
+
+		if (end + strlen("\r\n\r\n") > dptr + datalen)
+			break;
+		if (end[0] != '\r' || end[1] != '\n' ||
+		    end[2] != '\r' || end[3] != '\n')
+			break;
+		end += strlen("\r\n\r\n") + clen;
+
+		msglen = origlen = end - dptr;
+
+		ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen);
+		if (ret != NF_ACCEPT)
+			break;
+		diff     = msglen - origlen;
+		tdiff   += diff;
+
+		dataoff += msglen;
+		dptr    += msglen;
+		datalen  = datalen + diff - msglen;
 	}
 
 	return ret;
 }
 
-static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly;
-static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly;
+static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff, datalen;
+	const char *dptr;
+
+	/* No Data ? */
+	dataoff = protoff + sizeof(struct udphdr);
+	if (dataoff >= skb->len)
+		return NF_ACCEPT;
+
+	nf_ct_refresh(ct, skb, sip_timeout * HZ);
+
+	if (skb_is_nonlinear(skb)) {
+		pr_debug("Copy of skbuff not supported yet.\n");
+		return NF_ACCEPT;
+	}
+
+	dptr = skb->data + dataoff;
+	datalen = skb->len - dataoff;
+	if (datalen < strlen("SIP/2.0 200"))
+		return NF_ACCEPT;
+
+	return process_sip_msg(skb, ct, dataoff, &dptr, &datalen);
+}
+
+static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
+static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
 	[SIP_EXPECT_SIGNALLING] = {
@@ -1333,7 +1470,7 @@ static void nf_conntrack_sip_fini(void)
 	int i, j;
 
 	for (i = 0; i < ports_c; i++) {
-		for (j = 0; j < 2; j++) {
+		for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
 			if (sip[i][j].me == NULL)
 				continue;
 			nf_conntrack_helper_unregister(&sip[i][j]);
@@ -1353,14 +1490,24 @@ static int __init nf_conntrack_sip_init(void)
 		memset(&sip[i], 0, sizeof(sip[i]));
 
 		sip[i][0].tuple.src.l3num = AF_INET;
-		sip[i][1].tuple.src.l3num = AF_INET6;
-		for (j = 0; j < 2; j++) {
-			sip[i][j].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][0].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][0].help = sip_help_udp;
+		sip[i][1].tuple.src.l3num = AF_INET;
+		sip[i][1].tuple.dst.protonum = IPPROTO_TCP;
+		sip[i][1].help = sip_help_tcp;
+
+		sip[i][2].tuple.src.l3num = AF_INET6;
+		sip[i][2].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][2].help = sip_help_udp;
+		sip[i][3].tuple.src.l3num = AF_INET6;
+		sip[i][3].tuple.dst.protonum = IPPROTO_TCP;
+		sip[i][3].help = sip_help_tcp;
+
+		for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
 			sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
 			sip[i][j].expect_policy = sip_exp_policy;
 			sip[i][j].expect_class_max = SIP_EXPECT_MAX;
 			sip[i][j].me = THIS_MODULE;
-			sip[i][j].help = sip_help;
 
 			tmpname = &sip_names[i][j][0];
 			if (ports[i] == SIP_PORT)
-- 
cgit v1.2.3


From 48f8ac26537c1b7b1a2422f5232f45d06c945348 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:29:38 +0100
Subject: netfilter: nf_nat_sip: add TCP support

Add support for mangling TCP SIP packets.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_sip.h |  1 +
 net/ipv4/netfilter/nf_nat_sip.c            | 53 ++++++++++++++++++++++++++----
 net/netfilter/nf_conntrack_sip.c           | 10 ++++++
 3 files changed, 58 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index fa9bb8981450..cd84d6f44d11 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -104,6 +104,7 @@ extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
 				       unsigned int dataoff,
 				       const char **dptr,
 				       unsigned int *datalen);
+extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off);
 extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 					      unsigned int dataoff,
 					      const char **dptr,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b232e4040dc6..11b538deaaec 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -1,4 +1,4 @@
-/* SIP extension for UDP NAT alteration.
+/* SIP extension for NAT alteration.
  *
  * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
  * based on RR's ip_nat_ftp.c and other modules.
@@ -15,6 +15,7 @@
 #include <linux/ip.h>
 #include <net/ip.h>
 #include <linux/udp.h>
+#include <linux/tcp.h>
 
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
@@ -36,10 +37,27 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-
-	if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen,
-				      buffer, buflen))
-		return 0;
+	struct tcphdr *th;
+	unsigned int baseoff;
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP) {
+		th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+		baseoff = ip_hdrlen(skb) + th->doff * 4;
+		matchoff += dataoff - baseoff;
+
+		if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+						matchoff, matchlen,
+						buffer, buflen, false))
+			return 0;
+	} else {
+		baseoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+		matchoff += dataoff - baseoff;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
+					      matchoff, matchlen,
+					      buffer, buflen))
+			return 0;
+	}
 
 	/* Reload data pointer and adjust datalen value */
 	*dptr = skb->data + dataoff;
@@ -104,6 +122,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned int coff, matchoff, matchlen;
+	enum sip_header_types hdr;
 	union nf_inet_addr addr;
 	__be16 port;
 	int request, in_header;
@@ -120,9 +139,14 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 	} else
 		request = 0;
 
+	if (nf_ct_protonum(ct) == IPPROTO_TCP)
+		hdr = SIP_HDR_VIA_TCP;
+	else
+		hdr = SIP_HDR_VIA_UDP;
+
 	/* Translate topmost Via header and parameters */
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
-				    SIP_HDR_VIA_UDP, NULL, &matchoff, &matchlen,
+				    hdr, NULL, &matchoff, &matchlen,
 				    &addr, &port) > 0) {
 		unsigned int matchend, poff, plen, buflen, n;
 		char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
@@ -204,9 +228,23 @@ next:
 	if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
 	    !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
 		return NF_DROP;
+
 	return NF_ACCEPT;
 }
 
+static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	const struct tcphdr *th;
+
+	if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
+		return;
+
+	th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+	nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
+}
+
 /* Handles expected signalling connections and media streams */
 static void ip_nat_sip_expected(struct nf_conn *ct,
 				struct nf_conntrack_expect *exp)
@@ -472,6 +510,7 @@ err1:
 static void __exit nf_nat_sip_fini(void)
 {
 	rcu_assign_pointer(nf_nat_sip_hook, NULL);
+	rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL);
 	rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
 	rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
 	rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
@@ -483,12 +522,14 @@ static void __exit nf_nat_sip_fini(void)
 static int __init nf_nat_sip_init(void)
 {
 	BUG_ON(nf_nat_sip_hook != NULL);
+	BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
 	BUG_ON(nf_nat_sip_expect_hook != NULL);
 	BUG_ON(nf_nat_sdp_addr_hook != NULL);
 	BUG_ON(nf_nat_sdp_port_hook != NULL);
 	BUG_ON(nf_nat_sdp_session_hook != NULL);
 	BUG_ON(nf_nat_sdp_media_hook != NULL);
 	rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
+	rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
 	rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
 	rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
 	rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1cc75c5a822b..3bb3aaff76e9 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -56,6 +56,9 @@ unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
 				unsigned int *datalen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
+void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
+
 unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 				       unsigned int dataoff,
 				       const char **dptr,
@@ -1360,6 +1363,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	const char *dptr, *end;
 	s16 diff, tdiff = 0;
 	int ret;
+	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
@@ -1415,6 +1419,12 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 		datalen  = datalen + diff - msglen;
 	}
 
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+		nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
+		if (nf_nat_sip_seq_adjust)
+			nf_nat_sip_seq_adjust(skb, tdiff);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 9d288dffe3a276e1f06ba556845c456d696c5a4f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:30:21 +0100
Subject: netfilter: nf_conntrack_sip: add T.38 FAX support

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_sip.h |  1 +
 include/net/netfilter/nf_conntrack.h       |  2 +-
 net/netfilter/nf_conntrack_sip.c           | 28 +++++++++++++++++++++++-----
 3 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index cd84d6f44d11..ff8cfbcf3b81 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -14,6 +14,7 @@ enum sip_expectation_classes {
 	SIP_EXPECT_SIGNALLING,
 	SIP_EXPECT_AUDIO,
 	SIP_EXPECT_VIDEO,
+	SIP_EXPECT_IMAGE,
 	__SIP_EXPECT_MAX
 };
 #define SIP_EXPECT_MAX	(__SIP_EXPECT_MAX - 1)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5043d61c99a7..5b7d8835523f 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -70,7 +70,7 @@ union nf_conntrack_help {
 struct nf_conntrack_helper;
 
 /* Must be kept in sync with the classes defined by helpers */
-#define NF_CT_MAX_EXPECT_CLASSES	3
+#define NF_CT_MAX_EXPECT_CLASSES	4
 
 /* nf_conn feature for connections that have a helper */
 struct nf_conn_help {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 3bb3aaff76e9..fbe8ff5a420a 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -907,6 +907,7 @@ err1:
 static const struct sdp_media_type sdp_media_types[] = {
 	SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO),
 	SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO),
+	SDP_MEDIA_TYPE("image ", SIP_EXPECT_IMAGE),
 };
 
 static const struct sdp_media_type *sdp_media_type(const char *dptr,
@@ -932,7 +933,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	struct nf_conn_help *help = nfct_help(ct);
 	unsigned int matchoff, matchlen;
 	unsigned int mediaoff, medialen;
 	unsigned int sdpoff;
@@ -1024,9 +1024,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 		ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
 					 &rtp_addr);
 
-	if (ret == NF_ACCEPT && i > 0)
-		help->help.ct_sip_info.invite_cseq = cseq;
-
 	return ret;
 }
 static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
@@ -1077,6 +1074,22 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
 	return NF_ACCEPT;
 }
 
+static int process_invite_request(struct sk_buff *skb, unsigned int dataoff,
+				  const char **dptr, unsigned int *datalen,
+				  unsigned int cseq)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_help *help = nfct_help(ct);
+	unsigned int ret;
+
+	flush_expectations(ct, true);
+	ret = process_sdp(skb, dataoff, dptr, datalen, cseq);
+	if (ret == NF_ACCEPT)
+		help->help.ct_sip_info.invite_cseq = cseq;
+	return ret;
+}
+
 static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen,
 			       unsigned int cseq)
@@ -1257,7 +1270,7 @@ flush:
 }
 
 static const struct sip_handler sip_handlers[] = {
-	SIP_HANDLER("INVITE", process_sdp, process_invite_response),
+	SIP_HANDLER("INVITE", process_invite_request, process_invite_response),
 	SIP_HANDLER("UPDATE", process_sdp, process_update_response),
 	SIP_HANDLER("ACK", process_sdp, NULL),
 	SIP_HANDLER("PRACK", process_sdp, process_prack_response),
@@ -1473,6 +1486,11 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
 		.max_expected	= 2 * IP_CT_DIR_MAX,
 		.timeout	= 3 * 60,
 	},
+	[SIP_EXPECT_IMAGE] = {
+		.name		= "image",
+		.max_expected	= IP_CT_DIR_MAX,
+		.timeout	= 3 * 60,
+	},
 };
 
 static void nf_conntrack_sip_fini(void)
-- 
cgit v1.2.3


From 5b3efd500854d45d305b53c54c97db5970959980 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 11 Feb 2010 11:50:59 -0800
Subject: x86, ptrace: regset extensions to support xstate

Add the xstate regset support which helps extend the kernel ptrace and the
core-dump interfaces to support AVX state etc.

This regset interface is designed to support all the future state that gets
supported using xsave/xrstor infrastructure.

Looking at the memory layout saved by "xsave", one can't say which state
is represented in the memory layout. This is because if a particular state is
in init state, in the xsave hdr it can be represented by bit '0'. And hence
we can't really say by the xsave header wether a state is in init state or
the state is not saved in the memory layout.

And hence the xsave memory layout available through this regset
interface uses SW usable bytes [464..511] to convey what state is represented
in the memory layout.

First 8 bytes of the sw_usable_bytes[464..467] will be set to OS enabled xstate
mask(which is same as the 64bit mask returned by the xgetbv's xCR0).

The note NT_X86_XSTATE represents the extended state information in the
core file, using the above mentioned memory layout.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <20100211195614.802495327@sbs-t61.sc.intel.com>
Signed-off-by: Hongjiu Lu <hjl.tools@gmail.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/i387.h  | 12 +++++--
 arch/x86/include/asm/user.h  | 58 +++++++++++++++++++++++++++++++
 arch/x86/include/asm/xsave.h |  2 ++
 arch/x86/kernel/i387.c       | 83 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/ptrace.c     | 34 ++++++++++++++++--
 arch/x86/kernel/xsave.c      |  1 +
 include/linux/elf.h          |  1 +
 7 files changed, 187 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index ebfb8a9e11f7..da2930924501 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -33,8 +33,16 @@ extern void init_thread_xstate(void);
 extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
 
 extern user_regset_active_fn fpregs_active, xfpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
-extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
+				xstateregs_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
+				 xstateregs_set;
+
+/*
+ * xstateregs_active == fpregs_active. Please refer to the comment
+ * at the definition of fpregs_active.
+ */
+#define xstateregs_active	fpregs_active
 
 extern struct _fpx_sw_bytes fx_sw_reserved;
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index 999873b22e7f..24532c7da3d6 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -1,5 +1,63 @@
+#ifndef _ASM_X86_USER_H
+#define _ASM_X86_USER_H
+
 #ifdef CONFIG_X86_32
 # include "user_32.h"
 #else
 # include "user_64.h"
 #endif
+
+#include <asm/types.h>
+
+struct user_ymmh_regs {
+	/* 16 * 16 bytes for each YMMH-reg */
+	__u32 ymmh_space[64];
+};
+
+struct user_xsave_hdr {
+	__u64 xstate_bv;
+	__u64 reserved1[2];
+	__u64 reserved2[5];
+};
+
+/*
+ * The structure layout of user_xstateregs, used for exporting the
+ * extended register state through ptrace and core-dump (NT_X86_XSTATE note)
+ * interfaces will be same as the memory layout of xsave used by the processor
+ * (except for the bytes 464..511, which can be used by the software) and hence
+ * the size of this structure varies depending on the features supported by the
+ * processor and OS. The size of the structure that users need to use can be
+ * obtained by doing:
+ *     cpuid_count(0xd, 0, &eax, &ptrace_xstateregs_struct_size, &ecx, &edx);
+ * i.e., cpuid.(eax=0xd,ecx=0).ebx will be the size that user (debuggers, etc.)
+ * need to use.
+ *
+ * For now, only the first 8 bytes of the software usable bytes[464..471] will
+ * be used and will be set to OS enabled xstate mask (which is same as the
+ * 64bit mask returned by the xgetbv's xCR0).  Users (analyzing core dump
+ * remotely, etc.) can use this mask as well as the mask saved in the
+ * xstate_hdr bytes and interpret what states the processor/OS supports
+ * and what states are in modified/initialized conditions for the
+ * particular process/thread.
+ *
+ * Also when the user modifies certain state FP/SSE/etc through the
+ * ptrace interface, they must ensure that the xsave_hdr.xstate_bv
+ * bytes[512..519] of the memory layout are updated correspondingly.
+ * i.e., for example when FP state is modified to a non-init state,
+ * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to
+ * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc.
+ */
+#define USER_XSTATE_FX_SW_WORDS 6
+#define USER_XSTATE_XCR0_WORD	0
+
+struct user_xstateregs {
+	struct {
+		__u64 fpx_space[58];
+		__u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS];
+	} i387;
+	struct user_xsave_hdr xsave_hdr;
+	struct user_ymmh_regs ymmh;
+	/* further processor state extensions go here */
+};
+
+#endif /* _ASM_X86_USER_H */
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 727acc152344..ddc04ccad03b 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -27,9 +27,11 @@
 extern unsigned int xstate_size;
 extern u64 pcntxt_mask;
 extern struct xsave_struct *init_xstate_buf;
+extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
 extern void xsave_cntxt_init(void);
 extern void xsave_init(void);
+extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 extern int init_fpu(struct task_struct *child);
 extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
 			    void __user *fpstate,
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f2f8540a7f3d..7a8a193b5144 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -164,6 +164,11 @@ int init_fpu(struct task_struct *tsk)
 	return 0;
 }
 
+/*
+ * The xstateregs_active() routine is the same as the fpregs_active() routine,
+ * as the "regset->n" for the xstate regset will be updated based on the feature
+ * capabilites supported by the xsave.
+ */
 int fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {
 	return tsk_used_math(target) ? regset->n : 0;
@@ -224,6 +229,84 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	return ret;
 }
 
+int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	int ret;
+
+	if (!cpu_has_xsave)
+		return -ENODEV;
+
+	ret = init_fpu(target);
+	if (ret)
+		return ret;
+
+	/*
+	 * First copy the fxsave bytes 0..463.
+	 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.xstate->xsave, 0,
+				  offsetof(struct user_xstateregs,
+					   i387.xstate_fx_sw));
+	if (ret)
+		return ret;
+
+	/*
+	 * Copy the 48bytes defined by software.
+	 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  xstate_fx_sw_bytes,
+				  offsetof(struct user_xstateregs,
+					   i387.xstate_fx_sw),
+				  offsetof(struct user_xstateregs,
+					   xsave_hdr));
+	if (ret)
+		return ret;
+
+	/*
+	 * Copy the rest of xstate memory layout.
+	 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &target->thread.xstate->xsave.xsave_hdr,
+				  offsetof(struct user_xstateregs,
+					   xsave_hdr), -1);
+	return ret;
+}
+
+int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
+		  unsigned int pos, unsigned int count,
+		  const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	struct xsave_hdr_struct *xsave_hdr;
+
+	if (!cpu_has_xsave)
+		return -ENODEV;
+
+	ret = init_fpu(target);
+	if (ret)
+		return ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &target->thread.xstate->xsave, 0, -1);
+
+	/*
+	 * mxcsr reserved bits must be masked to zero for security reasons.
+	 */
+	target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
+
+	xsave_hdr = &target->thread.xstate->xsave.xsave_hdr;
+
+	xsave_hdr->xstate_bv &= pcntxt_mask;
+	/*
+	 * These bits must be zero.
+	 */
+	xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+
+	return ret;
+}
+
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 
 /*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 017d937639fe..16433a59b396 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -48,6 +48,7 @@ enum x86_regset {
 	REGSET_FP,
 	REGSET_XFP,
 	REGSET_IOPERM64 = REGSET_XFP,
+	REGSET_XSTATE,
 	REGSET_TLS,
 	REGSET_IOPERM32,
 };
@@ -1584,7 +1585,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 #ifdef CONFIG_X86_64
 
-static const struct user_regset x86_64_regsets[] = {
+static struct user_regset x86_64_regsets[] __read_mostly = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
 		.n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1597,6 +1598,12 @@ static const struct user_regset x86_64_regsets[] = {
 		.size = sizeof(long), .align = sizeof(long),
 		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
 	},
+	[REGSET_XSTATE] = {
+		.core_note_type = NT_X86_XSTATE,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = xstateregs_active, .get = xstateregs_get,
+		.set = xstateregs_set
+	},
 	[REGSET_IOPERM64] = {
 		.core_note_type = NT_386_IOPERM,
 		.n = IO_BITMAP_LONGS,
@@ -1622,7 +1629,7 @@ static const struct user_regset_view user_x86_64_view = {
 #endif	/* CONFIG_X86_64 */
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
-static const struct user_regset x86_32_regsets[] = {
+static struct user_regset x86_32_regsets[] __read_mostly = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
 		.n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1641,6 +1648,12 @@ static const struct user_regset x86_32_regsets[] = {
 		.size = sizeof(u32), .align = sizeof(u32),
 		.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
 	},
+	[REGSET_XSTATE] = {
+		.core_note_type = NT_X86_XSTATE,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.active = xstateregs_active, .get = xstateregs_get,
+		.set = xstateregs_set
+	},
 	[REGSET_TLS] = {
 		.core_note_type = NT_386_TLS,
 		.n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
@@ -1663,6 +1676,23 @@ static const struct user_regset_view user_x86_32_view = {
 };
 #endif
 
+/*
+ * This represents bytes 464..511 in the memory layout exported through
+ * the REGSET_XSTATE interface.
+ */
+u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
+
+void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
+{
+#ifdef CONFIG_X86_64
+	x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
+#endif
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+	x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64);
+#endif
+	xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
+}
+
 const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 {
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index c5ee17e8c6d9..782c3a362ec6 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -337,6 +337,7 @@ void __ref xsave_cntxt_init(void)
 	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
 	xstate_size = ebx;
 
+	update_regset_xstate_info(xstate_size, pcntxt_mask);
 	prepare_fx_sw_frame();
 
 	setup_xstate_init();
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 0cc4d55151b7..a8c4af073ce9 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -361,6 +361,7 @@ typedef struct elf64_shdr {
 #define NT_PPC_VSX	0x102		/* PowerPC VSX registers */
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
+#define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
 #define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
 
 
-- 
cgit v1.2.3


From 2225a122ae26d542bdce523d9d87a4a7ba10e07b Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 11 Feb 2010 11:51:00 -0800
Subject: ptrace: Add support for generic PTRACE_GETREGSET/PTRACE_SETREGSET

Generic support for PTRACE_GETREGSET/PTRACE_SETREGSET commands which
export the regsets supported by each architecture using the correponding
NT_* types. These NT_* types are already part of the userland ABI, used
in representing the architecture specific register sets as different NOTES
in an ELF core file.

'addr' parameter for the ptrace system call encode the REGSET type (using
the corresppnding NT_* type) and the 'data' parameter points to the
struct iovec having the user buffer and the length of that buffer.

	struct iovec iov = { buf, len};
	ret = ptrace(PTRACE_GETREGSET/PTRACE_SETREGSET, pid, NT_XXX_TYPE, &iov);

On successful completion, iov.len will be updated by the kernel specifying
how much the kernel has written/read to/from the user's iov.buf.

x86 extended state registers are primarily exported using this interface.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <20100211195614.886724710@sbs-t61.sc.intel.com>
Acked-by: Hongjiu Lu <hjl.tools@gmail.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/elf.h    |  6 +++-
 include/linux/ptrace.h | 15 +++++++++
 kernel/ptrace.c        | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/elf.h b/include/linux/elf.h
index a8c4af073ce9..d8e6e61ad9ff 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -349,7 +349,11 @@ typedef struct elf64_shdr {
 #define ELF_OSABI ELFOSABI_NONE
 #endif
 
-/* Notes used in ET_CORE */
+/*
+ * Notes used in ET_CORE. Architectures export some of the arch register sets
+ * using the corresponding note types via the PTRACE_GETREGSET and
+ * PTRACE_SETREGSET requests.
+ */
 #define NT_PRSTATUS	1
 #define NT_PRFPREG	2
 #define NT_PRPSINFO	3
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 56f2d63a5cbb..dbfa821d5a6e 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -27,6 +27,21 @@
 #define PTRACE_GETSIGINFO	0x4202
 #define PTRACE_SETSIGINFO	0x4203
 
+/*
+ * Generic ptrace interface that exports the architecture specific regsets
+ * using the corresponding NT_* types (which are also used in the core dump).
+ *
+ * This interface usage is as follows:
+ *	struct iovec iov = { buf, len};
+ *
+ *	ret = ptrace(PTRACE_GETREGSET/PTRACE_SETREGSET, pid, NT_XXX_TYPE, &iov);
+ *
+ * On the successful completion, iov.len will be updated by the kernel,
+ * specifying how much the kernel has written/read to/from the user's iov.buf.
+ */
+#define PTRACE_GETREGSET	0x4204
+#define PTRACE_SETREGSET	0x4205
+
 /* options set using PTRACE_SETOPTIONS */
 #define PTRACE_O_TRACESYSGOOD	0x00000001
 #define PTRACE_O_TRACEFORK	0x00000002
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 23bd09cd042e..13b4554d8fbb 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -22,6 +22,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <linux/regset.h>
 
 
 /*
@@ -511,6 +512,47 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
 	return 0;
 }
 
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+
+static const struct user_regset *
+find_regset(const struct user_regset_view *view, unsigned int type)
+{
+	const struct user_regset *regset;
+	int n;
+
+	for (n = 0; n < view->n; ++n) {
+		regset = view->regsets + n;
+		if (regset->core_note_type == type)
+			return regset;
+	}
+
+	return NULL;
+}
+
+static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
+			 struct iovec *kiov)
+{
+	const struct user_regset_view *view = task_user_regset_view(task);
+	const struct user_regset *regset = find_regset(view, type);
+	int regset_no;
+
+	if (!regset || (kiov->iov_len % regset->size) != 0)
+		return -EIO;
+
+	regset_no = regset - view->regsets;
+	kiov->iov_len = min(kiov->iov_len,
+			    (__kernel_size_t) (regset->n * regset->size));
+
+	if (req == PTRACE_GETREGSET)
+		return copy_regset_to_user(task, view, regset_no, 0,
+					   kiov->iov_len, kiov->iov_base);
+	else
+		return copy_regset_from_user(task, view, regset_no, 0,
+					     kiov->iov_len, kiov->iov_base);
+}
+
+#endif
+
 int ptrace_request(struct task_struct *child, long request,
 		   long addr, long data)
 {
@@ -573,6 +615,26 @@ int ptrace_request(struct task_struct *child, long request,
 			return 0;
 		return ptrace_resume(child, request, SIGKILL);
 
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+	case PTRACE_GETREGSET:
+	case PTRACE_SETREGSET:
+	{
+		struct iovec kiov;
+		struct iovec __user *uiov = (struct iovec __user *) data;
+
+		if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov)))
+			return -EFAULT;
+
+		if (__get_user(kiov.iov_base, &uiov->iov_base) ||
+		    __get_user(kiov.iov_len, &uiov->iov_len))
+			return -EFAULT;
+
+		ret = ptrace_regset(child, request, addr, &kiov);
+		if (!ret)
+			ret = __put_user(kiov.iov_len, &uiov->iov_len);
+		break;
+	}
+#endif
 	default:
 		break;
 	}
@@ -711,6 +773,32 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 		else
 			ret = ptrace_setsiginfo(child, &siginfo);
 		break;
+#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+	case PTRACE_GETREGSET:
+	case PTRACE_SETREGSET:
+	{
+		struct iovec kiov;
+		struct compat_iovec __user *uiov =
+			(struct compat_iovec __user *) datap;
+		compat_uptr_t ptr;
+		compat_size_t len;
+
+		if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov)))
+			return -EFAULT;
+
+		if (__get_user(ptr, &uiov->iov_base) ||
+		    __get_user(len, &uiov->iov_len))
+			return -EFAULT;
+
+		kiov.iov_base = compat_ptr(ptr);
+		kiov.iov_len = len;
+
+		ret = ptrace_regset(child, request, addr, &kiov);
+		if (!ret)
+			ret = __put_user(kiov.iov_len, &uiov->iov_len);
+		break;
+	}
+#endif
 
 	default:
 		ret = ptrace_request(child, request, addr, data);
-- 
cgit v1.2.3


From a0dcf19f59d4f37150a6b7e115925d72aca15293 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 20 Nov 2009 10:50:34 +0000
Subject: ARM: PNX4008: move i2c suspend/resume callbacks into driver

Acked-by: Vitaly Wool <vitalywool@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pnx4008/i2c.c  | 24 ------------------------
 drivers/i2c/busses/i2c-pnx.c |  9 +++++++--
 include/linux/i2c-pnx.h      |  4 ----
 3 files changed, 7 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pnx4008/i2c.c b/arch/arm/mach-pnx4008/i2c.c
index c986b3a61311..707d819f1280 100644
--- a/arch/arm/mach-pnx4008/i2c.c
+++ b/arch/arm/mach-pnx4008/i2c.c
@@ -48,24 +48,6 @@ static int set_clock_stop(struct platform_device *pdev)
 	return retval;
 }
 
-static int i2c_pnx_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	int retval = 0;
-#ifdef CONFIG_PM
-	retval = set_clock_run(pdev);
-#endif
-	return retval;
-}
-
-static int i2c_pnx_resume(struct platform_device *pdev)
-{
-	int retval = 0;
-#ifdef CONFIG_PM
-	retval = set_clock_run(pdev);
-#endif
-	return retval;
-}
-
 static u32 calculate_input_freq(struct platform_device *pdev)
 {
 	return HCLK_MHZ;
@@ -102,8 +84,6 @@ static struct i2c_adapter pnx_adapter2 = {
 };
 
 static struct i2c_pnx_data i2c0_data = {
-	.suspend = i2c_pnx_suspend,
-	.resume = i2c_pnx_resume,
 	.calculate_input_freq = calculate_input_freq,
 	.set_clock_run = set_clock_run,
 	.set_clock_stop = set_clock_stop,
@@ -111,8 +91,6 @@ static struct i2c_pnx_data i2c0_data = {
 };
 
 static struct i2c_pnx_data i2c1_data = {
-	.suspend = i2c_pnx_suspend,
-	.resume = i2c_pnx_resume,
 	.calculate_input_freq = calculate_input_freq,
 	.set_clock_run = set_clock_run,
 	.set_clock_stop = set_clock_stop,
@@ -120,8 +98,6 @@ static struct i2c_pnx_data i2c1_data = {
 };
 
 static struct i2c_pnx_data i2c2_data = {
-	.suspend = i2c_pnx_suspend,
-	.resume = i2c_pnx_resume,
 	.calculate_input_freq = calculate_input_freq,
 	.set_clock_run = set_clock_run,
 	.set_clock_stop = set_clock_stop,
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index 5d1c2603a130..bc8075514e53 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -545,18 +545,23 @@ static struct i2c_algorithm pnx_algorithm = {
 	.functionality = i2c_pnx_func,
 };
 
+#ifdef CONFIG_PM
 static int i2c_pnx_controller_suspend(struct platform_device *pdev,
 				      pm_message_t state)
 {
 	struct i2c_pnx_data *i2c_pnx = platform_get_drvdata(pdev);
-	return i2c_pnx->suspend(pdev, state);
+	return i2c_pnx->set_clock_run(pdev);
 }
 
 static int i2c_pnx_controller_resume(struct platform_device *pdev)
 {
 	struct i2c_pnx_data *i2c_pnx = platform_get_drvdata(pdev);
-	return i2c_pnx->resume(pdev);
+	return i2c_pnx->set_clock_run(pdev);
 }
+#else
+#define i2c_pnx_controller_suspend	NULL
+#define i2c_pnx_controller_resume	NULL
+#endif
 
 static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 {
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 9eb07bbc6522..71de7f976adf 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -12,8 +12,6 @@
 #ifndef __I2C_PNX_H__
 #define __I2C_PNX_H__
 
-#include <linux/pm.h>
-
 struct platform_device;
 
 struct i2c_pnx_mif {
@@ -34,8 +32,6 @@ struct i2c_pnx_algo_data {
 };
 
 struct i2c_pnx_data {
-	int (*suspend) (struct platform_device *pdev, pm_message_t state);
-	int (*resume) (struct platform_device *pdev);
 	u32 (*calculate_input_freq) (struct platform_device *pdev);
 	int (*set_clock_run) (struct platform_device *pdev);
 	int (*set_clock_stop) (struct platform_device *pdev);
-- 
cgit v1.2.3


From 0321cb83e1c3f3a4282bd620c6cec78c5b80b572 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 20 Nov 2009 11:12:26 +0000
Subject: ARM: PNX4008: move i2c clock start/stop into driver

Acked-by: Vitaly Wool <vitalywool@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pnx4008/i2c.c  | 36 ------------------------------------
 drivers/i2c/busses/i2c-pnx.c | 39 ++++++++++++++++++++++++++++++---------
 include/linux/i2c-pnx.h      |  4 ++--
 3 files changed, 32 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pnx4008/i2c.c b/arch/arm/mach-pnx4008/i2c.c
index 707d819f1280..14b4906bec2f 100644
--- a/arch/arm/mach-pnx4008/i2c.c
+++ b/arch/arm/mach-pnx4008/i2c.c
@@ -18,36 +18,6 @@
 #include <mach/irqs.h>
 #include <mach/i2c.h>
 
-static int set_clock_run(struct platform_device *pdev)
-{
-	struct clk *clk;
-	int retval = 0;
-
-	clk = clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(clk)) {
-		clk_set_rate(clk, 1);
-		clk_put(clk);
-	} else
-		retval = -ENOENT;
-
-	return retval;
-}
-
-static int set_clock_stop(struct platform_device *pdev)
-{
-	struct clk *clk;
-	int retval = 0;
-
-	clk = clk_get(&pdev->dev, NULL);
-	if (!IS_ERR(clk)) {
-		clk_set_rate(clk, 0);
-		clk_put(clk);
-	} else
-		retval = -ENOENT;
-
-	return retval;
-}
-
 static u32 calculate_input_freq(struct platform_device *pdev)
 {
 	return HCLK_MHZ;
@@ -85,22 +55,16 @@ static struct i2c_adapter pnx_adapter2 = {
 
 static struct i2c_pnx_data i2c0_data = {
 	.calculate_input_freq = calculate_input_freq,
-	.set_clock_run = set_clock_run,
-	.set_clock_stop = set_clock_stop,
 	.adapter = &pnx_adapter0,
 };
 
 static struct i2c_pnx_data i2c1_data = {
 	.calculate_input_freq = calculate_input_freq,
-	.set_clock_run = set_clock_run,
-	.set_clock_stop = set_clock_stop,
 	.adapter = &pnx_adapter1,
 };
 
 static struct i2c_pnx_data i2c2_data = {
 	.calculate_input_freq = calculate_input_freq,
-	.set_clock_run = set_clock_run,
-	.set_clock_stop = set_clock_stop,
 	.adapter = &pnx_adapter2,
 };
 
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index bc8075514e53..98462671cdf7 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -20,6 +20,9 @@
 #include <linux/platform_device.h>
 #include <linux/i2c-pnx.h>
 #include <linux/io.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+
 #include <mach/hardware.h>
 #include <mach/i2c.h>
 #include <asm/irq.h>
@@ -550,13 +553,22 @@ static int i2c_pnx_controller_suspend(struct platform_device *pdev,
 				      pm_message_t state)
 {
 	struct i2c_pnx_data *i2c_pnx = platform_get_drvdata(pdev);
-	return i2c_pnx->set_clock_run(pdev);
+	struct i2c_pnx_algo_data *alg_data = i2c_pnx->adapter->algo_data;
+
+	/* FIXME: disable clock? */
+	clk_set_rate(alg_data->clk, 1);
+
+	return 0;
 }
 
 static int i2c_pnx_controller_resume(struct platform_device *pdev)
 {
 	struct i2c_pnx_data *i2c_pnx = platform_get_drvdata(pdev);
-	return i2c_pnx->set_clock_run(pdev);
+	struct i2c_pnx_algo_data *alg_data = i2c_pnx->adapter->algo_data;
+
+	clk_set_rate(alg_data->clk, 1);
+
+	return 0;
 }
 #else
 #define i2c_pnx_controller_suspend	NULL
@@ -580,6 +592,15 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, i2c_pnx);
 
+	i2c_pnx->adapter->algo = &pnx_algorithm;
+	alg_data = i2c_pnx->adapter->algo_data;
+
+	alg_data->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(alg_data->clk)) {
+		ret = PTR_ERR(alg_data->clk);
+		goto out_drvdata;
+	}
+
 	if (i2c_pnx->calculate_input_freq)
 		freq_mhz = i2c_pnx->calculate_input_freq(pdev);
 	else {
@@ -588,9 +609,6 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		       "%d MHz\n", freq_mhz);
 	}
 
-	i2c_pnx->adapter->algo = &pnx_algorithm;
-
-	alg_data = i2c_pnx->adapter->algo_data;
 	init_timer(&alg_data->mif.timer);
 	alg_data->mif.timer.function = i2c_pnx_timeout;
 	alg_data->mif.timer.data = (unsigned long)i2c_pnx->adapter;
@@ -602,7 +620,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		       "I/O region 0x%08x for I2C already in use.\n",
 		       alg_data->base);
 		ret = -ENODEV;
-		goto out_drvdata;
+		goto out_clkget;
 	}
 
 	if (!(alg_data->ioaddr =
@@ -612,7 +630,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		goto out_release;
 	}
 
-	i2c_pnx->set_clock_run(pdev);
+	clk_set_rate(alg_data->clk, 1);
 
 	/*
 	 * Clock Divisor High This value is the number of system clocks
@@ -658,11 +676,13 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 out_irq:
 	free_irq(alg_data->irq, i2c_pnx->adapter);
 out_clock:
-	i2c_pnx->set_clock_stop(pdev);
+	clk_set_rate(alg_data->clk, 0);
 out_unmap:
 	iounmap((void *)alg_data->ioaddr);
 out_release:
 	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
+out_clkget:
+	clk_put(alg_data->clk);
 out_drvdata:
 	platform_set_drvdata(pdev, NULL);
 out:
@@ -677,9 +697,10 @@ static int __devexit i2c_pnx_remove(struct platform_device *pdev)
 
 	free_irq(alg_data->irq, i2c_pnx->adapter);
 	i2c_del_adapter(adap);
-	i2c_pnx->set_clock_stop(pdev);
+	clk_set_rate(alg_data->clk, 0);
 	iounmap((void *)alg_data->ioaddr);
 	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
+	clk_put(alg_data->clk);
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 71de7f976adf..688e29262a43 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -13,6 +13,7 @@
 #define __I2C_PNX_H__
 
 struct platform_device;
+struct clk;
 
 struct i2c_pnx_mif {
 	int			ret;		/* Return value */
@@ -29,12 +30,11 @@ struct i2c_pnx_algo_data {
 	int			irq;
 	struct i2c_pnx_mif	mif;
 	int			last;
+	struct clk		*clk;
 };
 
 struct i2c_pnx_data {
 	u32 (*calculate_input_freq) (struct platform_device *pdev);
-	int (*set_clock_run) (struct platform_device *pdev);
-	int (*set_clock_stop) (struct platform_device *pdev);
 	struct i2c_adapter *adapter;
 };
 
-- 
cgit v1.2.3


From 6fff3da998ac3cc9ed8a84bf4f19911bd63c8c32 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 20 Nov 2009 12:46:07 +0000
Subject: ARM: PNX4008: get i2c clock rate from clk API

Acked-by: Vitaly Wool <vitalywool@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pnx4008/clock.c |  9 ++++++---
 arch/arm/mach-pnx4008/i2c.c   |  9 ---------
 drivers/i2c/busses/i2c-pnx.c  | 15 ++++-----------
 include/linux/i2c-pnx.h       |  1 -
 4 files changed, 10 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pnx4008/clock.c b/arch/arm/mach-pnx4008/clock.c
index 692625cd2001..9d1975fa4d9f 100644
--- a/arch/arm/mach-pnx4008/clock.c
+++ b/arch/arm/mach-pnx4008/clock.c
@@ -638,9 +638,10 @@ static struct clk flash_ck = {
 static struct clk i2c0_ck = {
 	.name = "i2c0_ck",
 	.parent = &per_ck,
-	.flags = NEEDS_INITIALIZATION,
+	.flags = NEEDS_INITIALIZATION | FIXED_RATE,
 	.enable_shift = 0,
 	.enable_reg = I2CCLKCTRL_REG,
+	.rate = 13000000,
 	.enable = clk_reg_enable,
 	.disable = clk_reg_disable,
 };
@@ -648,9 +649,10 @@ static struct clk i2c0_ck = {
 static struct clk i2c1_ck = {
 	.name = "i2c1_ck",
 	.parent = &per_ck,
-	.flags = NEEDS_INITIALIZATION,
+	.flags = NEEDS_INITIALIZATION | FIXED_RATE,
 	.enable_shift = 1,
 	.enable_reg = I2CCLKCTRL_REG,
+	.rate = 13000000,
 	.enable = clk_reg_enable,
 	.disable = clk_reg_disable,
 };
@@ -658,9 +660,10 @@ static struct clk i2c1_ck = {
 static struct clk i2c2_ck = {
 	.name = "i2c2_ck",
 	.parent = &per_ck,
-	.flags = NEEDS_INITIALIZATION,
+	.flags = NEEDS_INITIALIZATION | FIXED_RATE,
 	.enable_shift = 2,
 	.enable_reg = USB_OTG_CLKCTRL_REG,
+	.rate = 13000000,
 	.enable = clk_reg_enable,
 	.disable = clk_reg_disable,
 };
diff --git a/arch/arm/mach-pnx4008/i2c.c b/arch/arm/mach-pnx4008/i2c.c
index 14b4906bec2f..23ec335a4bc0 100644
--- a/arch/arm/mach-pnx4008/i2c.c
+++ b/arch/arm/mach-pnx4008/i2c.c
@@ -18,12 +18,6 @@
 #include <mach/irqs.h>
 #include <mach/i2c.h>
 
-static u32 calculate_input_freq(struct platform_device *pdev)
-{
-	return HCLK_MHZ;
-}
-
-
 static struct i2c_pnx_algo_data pnx_algo_data0 = {
 	.base = PNX4008_I2C1_BASE,
 	.irq = I2C_1_INT,
@@ -54,17 +48,14 @@ static struct i2c_adapter pnx_adapter2 = {
 };
 
 static struct i2c_pnx_data i2c0_data = {
-	.calculate_input_freq = calculate_input_freq,
 	.adapter = &pnx_adapter0,
 };
 
 static struct i2c_pnx_data i2c1_data = {
-	.calculate_input_freq = calculate_input_freq,
 	.adapter = &pnx_adapter1,
 };
 
 static struct i2c_pnx_data i2c2_data = {
-	.calculate_input_freq = calculate_input_freq,
 	.adapter = &pnx_adapter2,
 };
 
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index 29f91774c4df..bfcd079e885c 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -31,7 +31,6 @@
 #define I2C_PNX_TIMEOUT		10 /* msec */
 #define I2C_PNX_SPEED_KHZ	100
 #define I2C_PNX_REGION_SIZE	0x100
-#define PNX_DEFAULT_FREQ	13 /* MHz */
 
 static inline int wait_timeout(long timeout, struct i2c_pnx_algo_data *data)
 {
@@ -578,7 +577,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	unsigned long tmp;
 	int ret = 0;
 	struct i2c_pnx_algo_data *alg_data;
-	int freq_mhz;
+	unsigned long freq;
 	struct i2c_pnx_data *i2c_pnx = pdev->dev.platform_data;
 
 	if (!i2c_pnx || !i2c_pnx->adapter) {
@@ -599,14 +598,6 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		goto out_drvdata;
 	}
 
-	if (i2c_pnx->calculate_input_freq)
-		freq_mhz = i2c_pnx->calculate_input_freq(pdev);
-	else {
-		freq_mhz = PNX_DEFAULT_FREQ;
-		dev_info(&pdev->dev, "Setting bus frequency to default value: "
-		       "%d MHz\n", freq_mhz);
-	}
-
 	init_timer(&alg_data->mif.timer);
 	alg_data->mif.timer.function = i2c_pnx_timeout;
 	alg_data->mif.timer.data = (unsigned long)i2c_pnx->adapter;
@@ -632,6 +623,8 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	if (ret)
 		goto out_unmap;
 
+	freq = clk_get_rate(alg_data->clk);
+
 	/*
 	 * Clock Divisor High This value is the number of system clocks
 	 * the serial clock (SCL) will be high.
@@ -643,7 +636,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	 * the deglitching filter length.
 	 */
 
-	tmp = ((freq_mhz * 1000) / I2C_PNX_SPEED_KHZ) / 2 - 2;
+	tmp = ((freq / 1000) / I2C_PNX_SPEED_KHZ) / 2 - 2;
 	iowrite32(tmp, I2C_REG_CKH(alg_data));
 	iowrite32(tmp, I2C_REG_CKL(alg_data));
 
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 688e29262a43..9035711cb3b1 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -34,7 +34,6 @@ struct i2c_pnx_algo_data {
 };
 
 struct i2c_pnx_data {
-	u32 (*calculate_input_freq) (struct platform_device *pdev);
 	struct i2c_adapter *adapter;
 };
 
-- 
cgit v1.2.3


From 88d968b22fa26d5e3a8cab46fc7c3a21c89a91d3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 21 Nov 2009 11:58:36 +0000
Subject: ARM: PNX4008: Make ioaddr 'void __iomem *' rather than 'u32'

This avoids unnecessary casting.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/i2c/busses/i2c-pnx.c | 8 ++++----
 include/linux/i2c-pnx.h      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index bfcd079e885c..882579e64111 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -612,8 +612,8 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		goto out_clkget;
 	}
 
-	if (!(alg_data->ioaddr =
-			(u32)ioremap(alg_data->base, I2C_PNX_REGION_SIZE))) {
+	alg_data->ioaddr = ioremap(alg_data->base, I2C_PNX_REGION_SIZE);
+	if (!alg_data->ioaddr) {
 		dev_err(&pdev->dev, "Couldn't ioremap I2C I/O region\n");
 		ret = -ENOMEM;
 		goto out_release;
@@ -671,7 +671,7 @@ out_irq:
 out_clock:
 	clk_disable(alg_data->clk);
 out_unmap:
-	iounmap((void *)alg_data->ioaddr);
+	iounmap(alg_data->ioaddr);
 out_release:
 	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
 out_clkget:
@@ -691,7 +691,7 @@ static int __devexit i2c_pnx_remove(struct platform_device *pdev)
 	free_irq(alg_data->irq, i2c_pnx->adapter);
 	i2c_del_adapter(adap);
 	clk_disable(alg_data->clk);
-	iounmap((void *)alg_data->ioaddr);
+	iounmap(alg_data->ioaddr);
 	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
 	clk_put(alg_data->clk);
 	platform_set_drvdata(pdev, NULL);
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 9035711cb3b1..5a48f33046fb 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -26,7 +26,7 @@ struct i2c_pnx_mif {
 
 struct i2c_pnx_algo_data {
 	u32			base;
-	u32			ioaddr;
+	void __iomem		*ioaddr;
 	int			irq;
 	struct i2c_pnx_mif	mif;
 	int			last;
-- 
cgit v1.2.3


From 44c5d739181886cff8e3903dfa38cd704f3d9640 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 21 Nov 2009 12:10:54 +0000
Subject: ARM: PNX4008: kzalloc i2c drivers internal data

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pnx4008/i2c.c  | 25 +++++++------------------
 drivers/i2c/busses/i2c-pnx.c | 29 +++++++++++++++++++----------
 include/linux/i2c-pnx.h      |  4 ++--
 3 files changed, 28 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pnx4008/i2c.c b/arch/arm/mach-pnx4008/i2c.c
index 23ec335a4bc0..33146e832dd7 100644
--- a/arch/arm/mach-pnx4008/i2c.c
+++ b/arch/arm/mach-pnx4008/i2c.c
@@ -18,45 +18,34 @@
 #include <mach/irqs.h>
 #include <mach/i2c.h>
 
-static struct i2c_pnx_algo_data pnx_algo_data0 = {
-	.base = PNX4008_I2C1_BASE,
-	.irq = I2C_1_INT,
-};
-
-static struct i2c_pnx_algo_data pnx_algo_data1 = {
-	.base = PNX4008_I2C2_BASE,
-	.irq = I2C_2_INT,
-};
-
-static struct i2c_pnx_algo_data pnx_algo_data2 = {
-	.base = (PNX4008_USB_CONFIG_BASE + 0x300),
-	.irq = USB_I2C_INT,
-};
-
 static struct i2c_adapter pnx_adapter0 = {
 	.name = I2C_CHIP_NAME "0",
-	.algo_data = &pnx_algo_data0,
 };
+
 static struct i2c_adapter pnx_adapter1 = {
 	.name = I2C_CHIP_NAME "1",
-	.algo_data = &pnx_algo_data1,
 };
 
 static struct i2c_adapter pnx_adapter2 = {
 	.name = "USB-I2C",
-	.algo_data = &pnx_algo_data2,
 };
 
 static struct i2c_pnx_data i2c0_data = {
 	.adapter = &pnx_adapter0,
+	.base = PNX4008_I2C1_BASE,
+	.irq = I2C_1_INT,
 };
 
 static struct i2c_pnx_data i2c1_data = {
 	.adapter = &pnx_adapter1,
+	.base = PNX4008_I2C2_BASE,
+	.irq = I2C_2_INT,
 };
 
 static struct i2c_pnx_data i2c2_data = {
 	.adapter = &pnx_adapter2,
+	.base = (PNX4008_USB_CONFIG_BASE + 0x300),
+	.irq = USB_I2C_INT,
 };
 
 static struct platform_device i2c0_device = {
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index 882579e64111..1d66856a22fd 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -587,10 +587,16 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		goto out;
 	}
 
+	alg_data = kzalloc(sizeof(*alg_data), GFP_KERNEL);
+	if (!alg_data) {
+		ret = -ENOMEM;
+		goto err_kzalloc;
+	}
+
 	platform_set_drvdata(pdev, i2c_pnx);
 
 	i2c_pnx->adapter->algo = &pnx_algorithm;
-	alg_data = i2c_pnx->adapter->algo_data;
+	i2c_pnx->adapter->algo_data = alg_data;
 
 	alg_data->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(alg_data->clk)) {
@@ -603,16 +609,16 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	alg_data->mif.timer.data = (unsigned long)i2c_pnx->adapter;
 
 	/* Register I/O resource */
-	if (!request_mem_region(alg_data->base, I2C_PNX_REGION_SIZE,
+	if (!request_mem_region(i2c_pnx->base, I2C_PNX_REGION_SIZE,
 				pdev->name)) {
 		dev_err(&pdev->dev,
 		       "I/O region 0x%08x for I2C already in use.\n",
-		       alg_data->base);
+		       i2c_pnx->base);
 		ret = -ENODEV;
 		goto out_clkget;
 	}
 
-	alg_data->ioaddr = ioremap(alg_data->base, I2C_PNX_REGION_SIZE);
+	alg_data->ioaddr = ioremap(i2c_pnx->base, I2C_PNX_REGION_SIZE);
 	if (!alg_data->ioaddr) {
 		dev_err(&pdev->dev, "Couldn't ioremap I2C I/O region\n");
 		ret = -ENOMEM;
@@ -647,7 +653,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	}
 	init_completion(&alg_data->mif.complete);
 
-	ret = request_irq(alg_data->irq, i2c_pnx_interrupt,
+	ret = request_irq(i2c_pnx->irq, i2c_pnx_interrupt,
 			0, pdev->name, i2c_pnx->adapter);
 	if (ret)
 		goto out_clock;
@@ -662,21 +668,23 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	}
 
 	dev_dbg(&pdev->dev, "%s: Master at %#8x, irq %d.\n",
-	       i2c_pnx->adapter->name, alg_data->base, alg_data->irq);
+	       i2c_pnx->adapter->name, i2c_pnx->base, i2c_pnx->irq);
 
 	return 0;
 
 out_irq:
-	free_irq(alg_data->irq, i2c_pnx->adapter);
+	free_irq(i2c_pnx->irq, i2c_pnx->adapter);
 out_clock:
 	clk_disable(alg_data->clk);
 out_unmap:
 	iounmap(alg_data->ioaddr);
 out_release:
-	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
+	release_mem_region(i2c_pnx->base, I2C_PNX_REGION_SIZE);
 out_clkget:
 	clk_put(alg_data->clk);
 out_drvdata:
+	kfree(alg_data);
+err_kzalloc:
 	platform_set_drvdata(pdev, NULL);
 out:
 	return ret;
@@ -688,12 +696,13 @@ static int __devexit i2c_pnx_remove(struct platform_device *pdev)
 	struct i2c_adapter *adap = i2c_pnx->adapter;
 	struct i2c_pnx_algo_data *alg_data = adap->algo_data;
 
-	free_irq(alg_data->irq, i2c_pnx->adapter);
+	free_irq(i2c_pnx->irq, i2c_pnx->adapter);
 	i2c_del_adapter(adap);
 	clk_disable(alg_data->clk);
 	iounmap(alg_data->ioaddr);
-	release_mem_region(alg_data->base, I2C_PNX_REGION_SIZE);
+	release_mem_region(i2c_pnx->base, I2C_PNX_REGION_SIZE);
 	clk_put(alg_data->clk);
+	kfree(alg_data);
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 5a48f33046fb..9ebdf882d032 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -25,9 +25,7 @@ struct i2c_pnx_mif {
 };
 
 struct i2c_pnx_algo_data {
-	u32			base;
 	void __iomem		*ioaddr;
-	int			irq;
 	struct i2c_pnx_mif	mif;
 	int			last;
 	struct clk		*clk;
@@ -35,6 +33,8 @@ struct i2c_pnx_algo_data {
 
 struct i2c_pnx_data {
 	struct i2c_adapter *adapter;
+	u32 base;
+	int irq;
 };
 
 #endif /* __I2C_PNX_H__ */
-- 
cgit v1.2.3


From 9d7f73632c87ef1b6187eb539d1efd63c3cf0e36 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 21 Nov 2009 12:25:27 +0000
Subject: ARM: PNX4008: move i2c_adapter structure inside the drivers private
 data

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-pnx4008/i2c.c  | 18 +++---------------
 drivers/i2c/busses/i2c-pnx.c | 40 ++++++++++++++++++++--------------------
 include/linux/i2c-pnx.h      |  4 +++-
 3 files changed, 26 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pnx4008/i2c.c b/arch/arm/mach-pnx4008/i2c.c
index 33146e832dd7..8103f9644e2d 100644
--- a/arch/arm/mach-pnx4008/i2c.c
+++ b/arch/arm/mach-pnx4008/i2c.c
@@ -18,32 +18,20 @@
 #include <mach/irqs.h>
 #include <mach/i2c.h>
 
-static struct i2c_adapter pnx_adapter0 = {
-	.name = I2C_CHIP_NAME "0",
-};
-
-static struct i2c_adapter pnx_adapter1 = {
-	.name = I2C_CHIP_NAME "1",
-};
-
-static struct i2c_adapter pnx_adapter2 = {
-	.name = "USB-I2C",
-};
-
 static struct i2c_pnx_data i2c0_data = {
-	.adapter = &pnx_adapter0,
+	.name = I2C_CHIP_NAME "0",
 	.base = PNX4008_I2C1_BASE,
 	.irq = I2C_1_INT,
 };
 
 static struct i2c_pnx_data i2c1_data = {
-	.adapter = &pnx_adapter1,
+	.name = I2C_CHIP_NAME "1",
 	.base = PNX4008_I2C2_BASE,
 	.irq = I2C_2_INT,
 };
 
 static struct i2c_pnx_data i2c2_data = {
-	.adapter = &pnx_adapter2,
+	.name = "USB-I2C",
 	.base = (PNX4008_USB_CONFIG_BASE + 0x300),
 	.irq = USB_I2C_INT,
 };
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index 1d66856a22fd..6b413c5300d3 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -551,8 +551,7 @@ static struct i2c_algorithm pnx_algorithm = {
 static int i2c_pnx_controller_suspend(struct platform_device *pdev,
 				      pm_message_t state)
 {
-	struct i2c_pnx_data *i2c_pnx = platform_get_drvdata(pdev);
-	struct i2c_pnx_algo_data *alg_data = i2c_pnx->adapter->algo_data;
+	struct i2c_pnx_algo_data *alg_data = platform_get_drvdata(pdev);
 
 	/* FIXME: shouldn't this be clk_disable? */
 	clk_enable(alg_data->clk);
@@ -562,8 +561,7 @@ static int i2c_pnx_controller_suspend(struct platform_device *pdev,
 
 static int i2c_pnx_controller_resume(struct platform_device *pdev)
 {
-	struct i2c_pnx_data *i2c_pnx = platform_get_drvdata(pdev);
-	struct i2c_pnx_algo_data *alg_data = i2c_pnx->adapter->algo_data;
+	struct i2c_pnx_algo_data *alg_data = platform_get_drvdata(pdev);
 
 	return clk_enable(alg_data->clk);
 }
@@ -580,7 +578,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	unsigned long freq;
 	struct i2c_pnx_data *i2c_pnx = pdev->dev.platform_data;
 
-	if (!i2c_pnx || !i2c_pnx->adapter) {
+	if (!i2c_pnx || !i2c_pnx->name) {
 		dev_err(&pdev->dev, "%s: no platform data supplied\n",
 		       __func__);
 		ret = -EINVAL;
@@ -593,10 +591,15 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 		goto err_kzalloc;
 	}
 
-	platform_set_drvdata(pdev, i2c_pnx);
+	platform_set_drvdata(pdev, alg_data);
 
-	i2c_pnx->adapter->algo = &pnx_algorithm;
-	i2c_pnx->adapter->algo_data = alg_data;
+	strlcpy(alg_data->adapter.name, i2c_pnx->name,
+		sizeof(alg_data->adapter.name));
+	alg_data->adapter.dev.parent = &pdev->dev;
+	alg_data->adapter.algo = &pnx_algorithm;
+	alg_data->adapter.algo_data = alg_data;
+	alg_data->adapter.nr = pdev->id;
+	alg_data->i2c_pnx = i2c_pnx;
 
 	alg_data->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(alg_data->clk)) {
@@ -606,7 +609,7 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 
 	init_timer(&alg_data->mif.timer);
 	alg_data->mif.timer.function = i2c_pnx_timeout;
-	alg_data->mif.timer.data = (unsigned long)i2c_pnx->adapter;
+	alg_data->mif.timer.data = (unsigned long)&alg_data->adapter;
 
 	/* Register I/O resource */
 	if (!request_mem_region(i2c_pnx->base, I2C_PNX_REGION_SIZE,
@@ -654,26 +657,24 @@ static int __devinit i2c_pnx_probe(struct platform_device *pdev)
 	init_completion(&alg_data->mif.complete);
 
 	ret = request_irq(i2c_pnx->irq, i2c_pnx_interrupt,
-			0, pdev->name, i2c_pnx->adapter);
+			0, pdev->name, &alg_data->adapter);
 	if (ret)
 		goto out_clock;
 
 	/* Register this adapter with the I2C subsystem */
-	i2c_pnx->adapter->dev.parent = &pdev->dev;
-	i2c_pnx->adapter->nr = pdev->id;
-	ret = i2c_add_numbered_adapter(i2c_pnx->adapter);
+	ret = i2c_add_numbered_adapter(&alg_data->adapter);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "I2C: Failed to add bus\n");
 		goto out_irq;
 	}
 
 	dev_dbg(&pdev->dev, "%s: Master at %#8x, irq %d.\n",
-	       i2c_pnx->adapter->name, i2c_pnx->base, i2c_pnx->irq);
+	       alg_data->adapter.name, i2c_pnx->base, i2c_pnx->irq);
 
 	return 0;
 
 out_irq:
-	free_irq(i2c_pnx->irq, i2c_pnx->adapter);
+	free_irq(i2c_pnx->irq, &alg_data->adapter);
 out_clock:
 	clk_disable(alg_data->clk);
 out_unmap:
@@ -692,12 +693,11 @@ out:
 
 static int __devexit i2c_pnx_remove(struct platform_device *pdev)
 {
-	struct i2c_pnx_data *i2c_pnx = platform_get_drvdata(pdev);
-	struct i2c_adapter *adap = i2c_pnx->adapter;
-	struct i2c_pnx_algo_data *alg_data = adap->algo_data;
+	struct i2c_pnx_algo_data *alg_data = platform_get_drvdata(pdev);
+	struct i2c_pnx_data *i2c_pnx = alg_data->i2c_pnx;
 
-	free_irq(i2c_pnx->irq, i2c_pnx->adapter);
-	i2c_del_adapter(adap);
+	free_irq(i2c_pnx->irq, &alg_data->adapter);
+	i2c_del_adapter(&alg_data->adapter);
 	clk_disable(alg_data->clk);
 	iounmap(alg_data->ioaddr);
 	release_mem_region(i2c_pnx->base, I2C_PNX_REGION_SIZE);
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 9ebdf882d032..a87124d4d533 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -29,10 +29,12 @@ struct i2c_pnx_algo_data {
 	struct i2c_pnx_mif	mif;
 	int			last;
 	struct clk		*clk;
+	struct i2c_pnx_data	*i2c_pnx;
+	struct i2c_adapter	adapter;
 };
 
 struct i2c_pnx_data {
-	struct i2c_adapter *adapter;
+	const char *name;
 	u32 base;
 	int irq;
 };
-- 
cgit v1.2.3


From 08677214e318297f228237be0042aac754f48f1d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:20 -0800
Subject: x86: Make 64 bit use early_res instead of bootmem before slab

Finally we can use early_res to replace bootmem for x86_64 now.

Still can use CONFIG_NO_BOOTMEM to enable it or not.

-v2: fix 32bit compiling about MAX_DMA32_PFN
-v3: folded bug fix from LKML message below

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B747239.4070907@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Kconfig            |  13 +++
 arch/x86/include/asm/e820.h |   6 ++
 arch/x86/kernel/e820.c      | 159 +++++++++++++++++++++++++++++++++---
 arch/x86/kernel/setup.c     |   2 +
 arch/x86/mm/init_64.c       |   4 +
 arch/x86/mm/numa_64.c       |  20 +++--
 include/linux/bootmem.h     |   7 ++
 include/linux/mm.h          |   5 ++
 include/linux/mmzone.h      |   2 +
 mm/bootmem.c                | 195 +++++++++++++++++++++++++++++++++++++++++++-
 mm/page_alloc.c             |  59 +++++++++++++-
 mm/percpu.c                 |   3 +
 mm/sparse-vmemmap.c         |   2 +-
 13 files changed, 454 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb4092568f9e..95439843cebc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -568,6 +568,19 @@ config PARAVIRT_DEBUG
 	  Enable to debug paravirt_ops internals.  Specifically, BUG if
 	  a paravirt_op is missing when it is called.
 
+config NO_BOOTMEM
+	default y
+	bool "Disable Bootmem code"
+	depends on X86_64
+	---help---
+	  Use early_res directly instead of bootmem before slab is ready.
+		- allocator (buddy) [generic]
+		- early allocator (bootmem) [generic]
+		- very early allocator (reserve_early*()) [x86]
+		- very very early allocator (early brk model) [x86]
+	  So reduce one layer between early allocator to final allocator
+
+
 config MEMTEST
 	bool "Memtest"
 	---help---
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 761249e396fe..7d72e5fb7008 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,6 +117,12 @@ extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
+void reserve_early_without_check(u64 start, u64 end, char *name);
+u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+			 u64 size, u64 align);
+#include <linux/range.h>
+int get_free_all_memory_range(struct range **rangep, int nodeid);
+
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
 extern int e820_find_active_region(const struct e820entry *ei,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e09c18c8f3c1..90a85295f332 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -977,6 +977,25 @@ void __init reserve_early(u64 start, u64 end, char *name)
 	__reserve_early(start, end, name, 0);
 }
 
+void __init reserve_early_without_check(u64 start, u64 end, char *name)
+{
+	struct early_res *r;
+
+	if (start >= end)
+		return;
+
+	__check_and_double_early_res(end);
+
+	r = &early_res[early_res_count];
+
+	r->start = start;
+	r->end = end;
+	r->overlap_ok = 0;
+	if (name)
+		strncpy(r->name, name, sizeof(r->name) - 1);
+	early_res_count++;
+}
+
 void __init free_early(u64 start, u64 end)
 {
 	struct early_res *r;
@@ -991,6 +1010,94 @@ void __init free_early(u64 start, u64 end)
 	drop_range(i);
 }
 
+#ifdef CONFIG_NO_BOOTMEM
+static void __init subtract_early_res(struct range *range, int az)
+{
+	int i, count;
+	u64 final_start, final_end;
+	int idx = 0;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	/* need to skip first one ?*/
+	if (early_res != early_res_x)
+		idx = 1;
+
+#if 1
+	printk(KERN_INFO "Subtract (%d early reservations)\n", count);
+#endif
+	for (i = idx; i < count; i++) {
+		struct early_res *r = &early_res[i];
+#if 0
+		printk(KERN_INFO "  #%d [%010llx - %010llx] %15s", i,
+			r->start, r->end, r->name);
+#endif
+		final_start = PFN_DOWN(r->start);
+		final_end = PFN_UP(r->end);
+		if (final_start >= final_end) {
+#if 0
+			printk(KERN_CONT "\n");
+#endif
+			continue;
+		}
+#if 0
+		printk(KERN_CONT " subtract pfn [%010llx - %010llx]\n",
+			final_start, final_end);
+#endif
+		subtract_range(range, az, final_start, final_end);
+	}
+
+}
+
+int __init get_free_all_memory_range(struct range **rangep, int nodeid)
+{
+	int i, count;
+	u64 start = 0, end;
+	u64 size;
+	u64 mem;
+	struct range *range;
+	int nr_range;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	count *= 2;
+
+	size = sizeof(struct range) * count;
+#ifdef MAX_DMA32_PFN
+	if (max_pfn_mapped > MAX_DMA32_PFN)
+		start = MAX_DMA32_PFN << PAGE_SHIFT;
+#endif
+	end = max_pfn_mapped << PAGE_SHIFT;
+	mem = find_e820_area(start, end, size, sizeof(struct range));
+	if (mem == -1ULL)
+		panic("can not find more space for range free");
+
+	range = __va(mem);
+	/* use early_node_map[] and early_res to get range array at first */
+	memset(range, 0, size);
+	nr_range = 0;
+
+	/* need to go over early_node_map to find out good range for node */
+	nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
+	subtract_early_res(range, count);
+	nr_range = clean_sort_range(range, count);
+
+	/* need to clear it ? */
+	if (nodeid == MAX_NUMNODES) {
+		memset(&early_res[0], 0,
+			 sizeof(struct early_res) * max_early_res);
+		early_res = NULL;
+		max_early_res = 0;
+	}
+
+	*rangep = range;
+	return nr_range;
+}
+#else
 void __init early_res_to_bootmem(u64 start, u64 end)
 {
 	int i, count;
@@ -1028,6 +1135,7 @@ void __init early_res_to_bootmem(u64 start, u64 end)
 	max_early_res = 0;
 	early_res_count = 0;
 }
+#endif
 
 /* Check for already reserved areas */
 static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
@@ -1081,6 +1189,35 @@ again:
 	return changed;
 }
 
+/*
+ * Find a free area with specified alignment in a specific range.
+ * only with the area.between start to end is active range from early_node_map
+ * so they are good as RAM
+ */
+u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+			 u64 size, u64 align)
+{
+	u64 addr, last;
+
+	addr = round_up(ei_start, align);
+	if (addr < start)
+		addr = round_up(start, align);
+	if (addr >= ei_last)
+		goto out;
+	while (bad_addr(&addr, size, align) && addr+size <= ei_last)
+		;
+	last = addr + size;
+	if (last > ei_last)
+		goto out;
+	if (last > end)
+		goto out;
+
+	return addr;
+
+out:
+	return -1ULL;
+}
+
 /*
  * Find a free area with specified alignment in a specific range.
  */
@@ -1090,24 +1227,20 @@ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
-		u64 addr, last;
-		u64 ei_last;
+		u64 addr;
+		u64 ei_start, ei_last;
 
 		if (ei->type != E820_RAM)
 			continue;
-		addr = round_up(ei->addr, align);
+
 		ei_last = ei->addr + ei->size;
-		if (addr < start)
-			addr = round_up(start, align);
-		if (addr >= ei_last)
-			continue;
-		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
-			;
-		last = addr + size;
-		if (last > ei_last)
-			continue;
-		if (last > end)
+		ei_start = ei->addr;
+		addr = find_early_area(ei_start, ei_last, start, end,
+					 size, align);
+
+		if (addr == -1ULL)
 			continue;
+
 		return addr;
 	}
 	return -1ULL;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ea4141b48518..d49e168bda8c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -967,7 +967,9 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	initmem_init(0, max_pfn, acpi, k8);
+#ifndef CONFIG_NO_BOOTMEM
 	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+#endif
 
 	dma32_reserve_bootmem();
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a15abaae5ba4..53158b7e5d46 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start,
 void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
 				int acpi, int k8)
 {
+#ifndef CONFIG_NO_BOOTMEM
 	unsigned long bootmap_size, bootmap;
 
 	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
@@ -585,6 +586,9 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
 					 0, end_pfn);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	free_bootmem_with_active_regions(0, end_pfn);
+#else
+	e820_register_active_regions(0, start_pfn, end_pfn);
+#endif
 }
 #endif
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 02f13cb99bc2..a20e17059afd 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -198,11 +198,13 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 void __init
 setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 {
-	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
+	unsigned long start_pfn, last_pfn, nodedata_phys;
 	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
-	unsigned long bootmap_start, nodedata_phys;
-	void *bootmap;
 	int nid;
+#ifndef CONFIG_NO_BOOTMEM
+	unsigned long bootmap_start, bootmap_pages, bootmap_size;
+	void *bootmap;
+#endif
 
 	if (!end)
 		return;
@@ -216,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 
 	start = roundup(start, ZONE_ALIGN);
 
-	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
+	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
 	       start, end);
 
 	start_pfn = start >> PAGE_SHIFT;
@@ -235,10 +237,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nodeid, nid);
 
 	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
-	NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
+	NODE_DATA(nodeid)->node_id = nodeid;
 	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
 	NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
 
+#ifndef CONFIG_NO_BOOTMEM
+	NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
+
 	/*
 	 * Find a place for the bootmem map
 	 * nodedata_phys could be on other nodes by alloc_bootmem,
@@ -275,6 +280,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 		printk(KERN_INFO "    bootmap(%d) on node %d\n", nodeid, nid);
 
 	free_bootmem_with_active_regions(nodeid, end);
+#endif
 
 	node_set_online(nodeid);
 }
@@ -733,6 +739,10 @@ unsigned long __init numa_free_all_bootmem(void)
 	for_each_online_node(i)
 		pages += free_all_bootmem_node(NODE_DATA(i));
 
+#ifdef CONFIG_NO_BOOTMEM
+	pages += free_all_memory_core_early(MAX_NUMNODES);
+#endif
+
 	return pages;
 }
 
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b10ec49ee2dd..266ab9291232 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -23,6 +23,7 @@ extern unsigned long max_pfn;
 extern unsigned long saved_max_pfn;
 #endif
 
+#ifndef CONFIG_NO_BOOTMEM
 /*
  * node_bootmem_map is a map pointer - the bits represent all physical 
  * memory pages (including holes) on the node.
@@ -37,6 +38,7 @@ typedef struct bootmem_data {
 } bootmem_data_t;
 
 extern bootmem_data_t bootmem_node_data[];
+#endif
 
 extern unsigned long bootmem_bootmap_pages(unsigned long);
 
@@ -46,6 +48,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long endpfn);
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
 
+unsigned long free_all_memory_core_early(int nodeid);
 extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
 extern unsigned long free_all_bootmem(void);
 
@@ -84,6 +87,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat,
 				  unsigned long size,
 				  unsigned long align,
 				  unsigned long goal);
+void *__alloc_bootmem_node_high(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
 extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 				  unsigned long size,
 				  unsigned long align,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b2fa8593c61..f2c5b3cee8a1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -12,6 +12,7 @@
 #include <linux/prio_tree.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
+#include <linux/range.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1049,6 +1050,10 @@ extern void get_pfn_range_for_nid(unsigned int nid,
 extern unsigned long find_min_pfn_with_active_regions(void);
 extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
+int add_from_early_node_map(struct range *range, int az,
+				   int nr_range, int nid);
+void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
+				 u64 goal, u64 limit);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
 extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
 extern void sparse_memory_present_with_active_regions(int nid);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 30fe668c2542..eae8387b6007 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -620,7 +620,9 @@ typedef struct pglist_data {
 	struct page_cgroup *node_page_cgroup;
 #endif
 #endif
+#ifndef CONFIG_NO_BOOTMEM
 	struct bootmem_data *bdata;
+#endif
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/*
 	 * Must be held any time you expect node_start_pfn, node_present_pages
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 7d1486875e1c..d7c791ef0036 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -13,6 +13,7 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/kmemleak.h>
+#include <linux/range.h>
 
 #include <asm/bug.h>
 #include <asm/io.h>
@@ -32,6 +33,7 @@ unsigned long max_pfn;
 unsigned long saved_max_pfn;
 #endif
 
+#ifndef CONFIG_NO_BOOTMEM
 bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
 
 static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 	min_low_pfn = start;
 	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
 }
-
+#endif
 /*
  * free_bootmem_late - free bootmem pages directly to page allocator
  * @addr: starting address of the range
@@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
 	}
 }
 
+#ifdef CONFIG_NO_BOOTMEM
+static void __init __free_pages_memory(unsigned long start, unsigned long end)
+{
+	int i;
+	unsigned long start_aligned, end_aligned;
+	int order = ilog2(BITS_PER_LONG);
+
+	start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
+	end_aligned = end & ~(BITS_PER_LONG - 1);
+
+	if (end_aligned <= start_aligned) {
+#if 1
+		printk(KERN_DEBUG " %lx - %lx\n", start, end);
+#endif
+		for (i = start; i < end; i++)
+			__free_pages_bootmem(pfn_to_page(i), 0);
+
+		return;
+	}
+
+#if 1
+	printk(KERN_DEBUG " %lx %lx - %lx %lx\n",
+		 start, start_aligned, end_aligned, end);
+#endif
+	for (i = start; i < start_aligned; i++)
+		__free_pages_bootmem(pfn_to_page(i), 0);
+
+	for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
+		__free_pages_bootmem(pfn_to_page(i), order);
+
+	for (i = end_aligned; i < end; i++)
+		__free_pages_bootmem(pfn_to_page(i), 0);
+}
+
+unsigned long __init free_all_memory_core_early(int nodeid)
+{
+	int i;
+	u64 start, end;
+	unsigned long count = 0;
+	struct range *range = NULL;
+	int nr_range;
+
+	nr_range = get_free_all_memory_range(&range, nodeid);
+
+	for (i = 0; i < nr_range; i++) {
+		start = range[i].start;
+		end = range[i].end;
+		count += end - start;
+		__free_pages_memory(start, end);
+	}
+
+	return count;
+}
+#else
 static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 {
 	int aligned;
@@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 
 	return count;
 }
+#endif
 
 /**
  * free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
 	register_page_bootmem_info_node(pgdat);
+#ifdef CONFIG_NO_BOOTMEM
+	/* free_all_memory_core_early(MAX_NUMNODES) will be called later */
+	return 0;
+#else
 	return free_all_bootmem_core(pgdat->bdata);
+#endif
 }
 
 /**
@@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
  */
 unsigned long __init free_all_bootmem(void)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	return free_all_memory_core_early(NODE_DATA(0)->node_id);
+#else
 	return free_all_bootmem_core(NODE_DATA(0)->bdata);
+#endif
 }
 
+#ifndef CONFIG_NO_BOOTMEM
 static void __init __free(bootmem_data_t *bdata,
 			unsigned long sidx, unsigned long eidx)
 {
@@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
 	}
 	BUG();
 }
+#endif
 
 /**
  * free_bootmem_node - mark a page range as usable
@@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 			      unsigned long size)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	free_early(physaddr, physaddr + size);
+#if 0
+	printk(KERN_DEBUG "free %lx %lx\n", physaddr, size);
+#endif
+#else
 	unsigned long start, end;
 
 	kmemleak_free_part(__va(physaddr), size);
@@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 	end = PFN_DOWN(physaddr + size);
 
 	mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
+#endif
 }
 
 /**
@@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
  */
 void __init free_bootmem(unsigned long addr, unsigned long size)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	free_early(addr, addr + size);
+#if 0
+	printk(KERN_DEBUG "free %lx %lx\n", addr, size);
+#endif
+#else
 	unsigned long start, end;
 
 	kmemleak_free_part(__va(addr), size);
@@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
 	end = PFN_DOWN(addr + size);
 
 	mark_bootmem(start, end, 0, 0);
+#endif
 }
 
 /**
@@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 				 unsigned long size, int flags)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	panic("no bootmem");
+	return 0;
+#else
 	unsigned long start, end;
 
 	start = PFN_DOWN(physaddr);
 	end = PFN_UP(physaddr + size);
 
 	return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
+#endif
 }
 
 /**
@@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 int __init reserve_bootmem(unsigned long addr, unsigned long size,
 			    int flags)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	panic("no bootmem");
+	return 0;
+#else
 	unsigned long start, end;
 
 	start = PFN_DOWN(addr);
 	end = PFN_UP(addr + size);
 
 	return mark_bootmem(start, end, 1, flags);
+#endif
 }
 
+#ifndef CONFIG_NO_BOOTMEM
 static unsigned long __init align_idx(struct bootmem_data *bdata,
 				      unsigned long idx, unsigned long step)
 {
@@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
 #endif
 	return NULL;
 }
+#endif
 
 static void * __init ___alloc_bootmem_nopanic(unsigned long size,
 					unsigned long align,
 					unsigned long goal,
 					unsigned long limit)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	void *ptr;
+
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc(size, GFP_NOWAIT);
+
+restart:
+
+	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
+
+	if (ptr)
+		return ptr;
+
+	if (goal != 0) {
+		goal = 0;
+		goto restart;
+	}
+
+	return NULL;
+#else
 	bootmem_data_t *bdata;
 	void *region;
 
@@ -613,6 +727,7 @@ restart:
 	}
 
 	return NULL;
+#endif
 }
 
 /**
@@ -631,7 +746,13 @@ restart:
 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 					unsigned long goal)
 {
-	return ___alloc_bootmem_nopanic(size, align, goal, 0);
+	unsigned long limit = 0;
+
+#ifdef CONFIG_NO_BOOTMEM
+	limit = -1UL;
+#endif
+
+	return ___alloc_bootmem_nopanic(size, align, goal, limit);
 }
 
 static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
@@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 			      unsigned long goal)
 {
-	return ___alloc_bootmem(size, align, goal, 0);
+	unsigned long limit = 0;
+
+#ifdef CONFIG_NO_BOOTMEM
+	limit = -1UL;
+#endif
+
+	return ___alloc_bootmem(size, align, goal, limit);
 }
 
+#ifndef CONFIG_NO_BOOTMEM
 static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 				unsigned long size, unsigned long align,
 				unsigned long goal, unsigned long limit)
@@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 
 	return ___alloc_bootmem(size, align, goal, limit);
 }
+#endif
 
 /**
  * __alloc_bootmem_node - allocate boot memory from a specific node
@@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
+#ifdef CONFIG_NO_BOOTMEM
+	return __alloc_memory_core_early(pgdat->node_id, size, align,
+					 goal, -1ULL);
+#else
 	return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+#endif
+}
+
+void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
+				   unsigned long align, unsigned long goal)
+{
+#ifdef MAX_DMA32_PFN
+	unsigned long end_pfn;
+
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+	/* update goal according ...MAX_DMA32_PFN */
+	end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+
+	if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
+	    (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
+		void *ptr;
+		unsigned long new_goal;
+
+		new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
+#ifdef CONFIG_NO_BOOTMEM
+		ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
+						 new_goal, -1ULL);
+#else
+		ptr = alloc_bootmem_core(pgdat->bdata, size, align,
+						 new_goal, 0);
+#endif
+		if (ptr)
+			return ptr;
+	}
+#endif
+
+	return __alloc_bootmem_node(pgdat, size, align, goal);
+
 }
 
 #ifdef CONFIG_SPARSEMEM
@@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 void * __init alloc_bootmem_section(unsigned long size,
 				    unsigned long section_nr)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	unsigned long pfn, goal, limit;
+
+	pfn = section_nr_to_pfn(section_nr);
+	goal = pfn << PAGE_SHIFT;
+	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
+
+	return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
+					 SMP_CACHE_BYTES, goal, limit);
+#else
 	bootmem_data_t *bdata;
 	unsigned long pfn, goal, limit;
 
@@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size,
 	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
 
 	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
+#endif
 }
 #endif
 
@@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
+#ifdef CONFIG_NO_BOOTMEM
+	ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
+						 goal, -1ULL);
+#else
 	ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
 		return ptr;
 
 	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+#endif
 	if (ptr)
 		return ptr;
 
@@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
+#ifdef CONFIG_NO_BOOTMEM
+	return __alloc_memory_core_early(pgdat->node_id, size, align,
+				goal, ARCH_LOW_ADDRESS_LIMIT);
+#else
 	return ___alloc_bootmem_node(pgdat->bdata, size, align,
 				goal, ARCH_LOW_ADDRESS_LIMIT);
+#endif
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8deb9d0fd5b1..78821a28e394 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3435,6 +3435,59 @@ void __init free_bootmem_with_active_regions(int nid,
 	}
 }
 
+int __init add_from_early_node_map(struct range *range, int az,
+				   int nr_range, int nid)
+{
+	int i;
+	u64 start, end;
+
+	/* need to go over early_node_map to find out good range for node */
+	for_each_active_range_index_in_nid(i, nid) {
+		start = early_node_map[i].start_pfn;
+		end = early_node_map[i].end_pfn;
+		nr_range = add_range(range, az, nr_range, start, end);
+	}
+	return nr_range;
+}
+
+void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit)
+{
+	int i;
+	void *ptr;
+
+	/* need to go over early_node_map to find out good range for node */
+	for_each_active_range_index_in_nid(i, nid) {
+		u64 addr;
+		u64 ei_start, ei_last;
+
+		ei_last = early_node_map[i].end_pfn;
+		ei_last <<= PAGE_SHIFT;
+		ei_start = early_node_map[i].start_pfn;
+		ei_start <<= PAGE_SHIFT;
+		addr = find_early_area(ei_start, ei_last,
+					 goal, limit, size, align);
+
+		if (addr == -1ULL)
+			continue;
+
+#if 0
+		printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
+				nid,
+				ei_start, ei_last, goal, limit, size,
+				align, addr);
+#endif
+
+		ptr = phys_to_virt(addr);
+		memset(ptr, 0, size);
+		reserve_early_without_check(addr, addr + size, "BOOTMEM");
+		return ptr;
+	}
+
+	return NULL;
+}
+
+
 void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
 {
 	int i;
@@ -4467,7 +4520,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] };
+struct pglist_data __refdata contig_page_data = {
+#ifndef CONFIG_NO_BOOTMEM
+ .bdata = &bootmem_node_data[0]
+#endif
+ };
 EXPORT_SYMBOL(contig_page_data);
 #endif
 
diff --git a/mm/percpu.c b/mm/percpu.c
index 083e7c91e5f6..841defeeef86 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1929,7 +1929,10 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
 			}
 			/* copy and return the unused part */
 			memcpy(ptr, __per_cpu_load, ai->static_size);
+#ifndef CONFIG_NO_BOOTMEM
+			/* fix partial free ! */
 			free_fn(ptr + size_sum, ai->unit_size - size_sum);
+#endif
 		}
 	}
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index d9714bdcb4a3..9506c39942f6 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -40,7 +40,7 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
 				unsigned long align,
 				unsigned long goal)
 {
-	return __alloc_bootmem_node(NODE_DATA(node), size, align, goal);
+	return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
 }
 
 
-- 
cgit v1.2.3


From 9bdac914240759457175ac0d6529a37d2820bc4d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:22 -0800
Subject: sparsemem: Put mem map for one node together.

Add vmemmap_alloc_block_buf for mem map only.

It will fallback to the old way if it cannot get a block that big.

Before this patch, when a node have 128g ram installed, memmap are
split into two parts or more.
[    0.000000]  [ffffea0000000000-ffffea003fffffff] PMD -> [ffff880100600000-ffff88013e9fffff] on node 1
[    0.000000]  [ffffea0040000000-ffffea006fffffff] PMD -> [ffff88013ec00000-ffff88016ebfffff] on node 1
[    0.000000]  [ffffea0070000000-ffffea007fffffff] PMD -> [ffff882000600000-ffff8820105fffff] on node 0
[    0.000000]  [ffffea0080000000-ffffea00bfffffff] PMD -> [ffff882010800000-ffff8820507fffff] on node 0
[    0.000000]  [ffffea00c0000000-ffffea00dfffffff] PMD -> [ffff882050a00000-ffff8820709fffff] on node 0
[    0.000000]  [ffffea00e0000000-ffffea00ffffffff] PMD -> [ffff884000600000-ffff8840205fffff] on node 2
[    0.000000]  [ffffea0100000000-ffffea013fffffff] PMD -> [ffff884020800000-ffff8840607fffff] on node 2
[    0.000000]  [ffffea0140000000-ffffea014fffffff] PMD -> [ffff884060a00000-ffff8840709fffff] on node 2
[    0.000000]  [ffffea0150000000-ffffea017fffffff] PMD -> [ffff886000600000-ffff8860305fffff] on node 3
[    0.000000]  [ffffea0180000000-ffffea01bfffffff] PMD -> [ffff886030800000-ffff8860707fffff] on node 3
[    0.000000]  [ffffea01c0000000-ffffea01ffffffff] PMD -> [ffff888000600000-ffff8880405fffff] on node 4
[    0.000000]  [ffffea0200000000-ffffea022fffffff] PMD -> [ffff888040800000-ffff8880707fffff] on node 4
[    0.000000]  [ffffea0230000000-ffffea023fffffff] PMD -> [ffff88a000600000-ffff88a0105fffff] on node 5
[    0.000000]  [ffffea0240000000-ffffea027fffffff] PMD -> [ffff88a010800000-ffff88a0507fffff] on node 5
[    0.000000]  [ffffea0280000000-ffffea029fffffff] PMD -> [ffff88a050a00000-ffff88a0709fffff] on node 5
[    0.000000]  [ffffea02a0000000-ffffea02bfffffff] PMD -> [ffff88c000600000-ffff88c0205fffff] on node 6
[    0.000000]  [ffffea02c0000000-ffffea02ffffffff] PMD -> [ffff88c020800000-ffff88c0607fffff] on node 6
[    0.000000]  [ffffea0300000000-ffffea030fffffff] PMD -> [ffff88c060a00000-ffff88c0709fffff] on node 6
[    0.000000]  [ffffea0310000000-ffffea033fffffff] PMD -> [ffff88e000600000-ffff88e0305fffff] on node 7
[    0.000000]  [ffffea0340000000-ffffea037fffffff] PMD -> [ffff88e030800000-ffff88e0707fffff] on node 7

after patch will get
[    0.000000]  [ffffea0000000000-ffffea006fffffff] PMD -> [ffff880100200000-ffff88016e5fffff] on node 0
[    0.000000]  [ffffea0070000000-ffffea00dfffffff] PMD -> [ffff882000200000-ffff8820701fffff] on node 1
[    0.000000]  [ffffea00e0000000-ffffea014fffffff] PMD -> [ffff884000200000-ffff8840701fffff] on node 2
[    0.000000]  [ffffea0150000000-ffffea01bfffffff] PMD -> [ffff886000200000-ffff8860701fffff] on node 3
[    0.000000]  [ffffea01c0000000-ffffea022fffffff] PMD -> [ffff888000200000-ffff8880701fffff] on node 4
[    0.000000]  [ffffea0230000000-ffffea029fffffff] PMD -> [ffff88a000200000-ffff88a0701fffff] on node 5
[    0.000000]  [ffffea02a0000000-ffffea030fffffff] PMD -> [ffff88c000200000-ffff88c0701fffff] on node 6
[    0.000000]  [ffffea0310000000-ffffea037fffffff] PMD -> [ffff88e000200000-ffff88e0701fffff] on node 7

-v2: change buf to vmemmap_buf instead according to Ingo
     also add CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER according to Ingo
-v3: according to Andrew, use sizeof(name) instead of hard coded 15

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-19-git-send-email-yinghai@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/init_64.c |   2 +-
 include/linux/mm.h    |   7 ++++
 mm/Kconfig            |   4 ++
 mm/sparse-vmemmap.c   |  74 ++++++++++++++++++++++++++++++++-
 mm/sparse.c           | 111 +++++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 195 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 53158b7e5d46..e9b040e1cde5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -977,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 			if (pmd_none(*pmd)) {
 				pte_t entry;
 
-				p = vmemmap_alloc_block(PMD_SIZE, node);
+				p = vmemmap_alloc_block_buf(PMD_SIZE, node);
 				if (!p)
 					return -ENOMEM;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f2c5b3cee8a1..f6002e5dc187 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1326,12 +1326,19 @@ extern int randomize_va_space;
 const char * arch_vma_name(struct vm_area_struct *vma);
 void print_vma_addr(char *prefix, unsigned long rip);
 
+void sparse_mem_maps_populate_node(struct page **map_map,
+				   unsigned long pnum_begin,
+				   unsigned long pnum_end,
+				   unsigned long map_count,
+				   int nodeid);
+
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
 void *vmemmap_alloc_block(unsigned long size, int node);
+void *vmemmap_alloc_block_buf(unsigned long size, int node);
 void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
 int vmemmap_populate_basepages(struct page *start_page,
 						unsigned long pages, int node);
diff --git a/mm/Kconfig b/mm/Kconfig
index 17b8947aa7da..e4a33b9479b2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME
 config SPARSEMEM_VMEMMAP_ENABLE
 	bool
 
+config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+	def_bool y
+	depends on SPARSEMEM && X86_64
+
 config SPARSEMEM_VMEMMAP
 	bool "Sparse Memory virtual memmap"
 	depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 9506c39942f6..392b9bb5bc01 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -43,6 +43,8 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
 	return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
 }
 
+static void *vmemmap_buf;
+static void *vmemmap_buf_end;
 
 void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 {
@@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 				__pa(MAX_DMA_ADDRESS));
 }
 
+/* need to make sure size is all the same during early stage */
+void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
+{
+	void *ptr;
+
+	if (!vmemmap_buf)
+		return vmemmap_alloc_block(size, node);
+
+	/* take the from buf */
+	ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
+	if (ptr + size > vmemmap_buf_end)
+		return vmemmap_alloc_block(size, node);
+
+	vmemmap_buf = ptr + size;
+
+	return ptr;
+}
+
 void __meminit vmemmap_verify(pte_t *pte, int node,
 				unsigned long start, unsigned long end)
 {
@@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
 	pte_t *pte = pte_offset_kernel(pmd, addr);
 	if (pte_none(*pte)) {
 		pte_t entry;
-		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
 		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
 
 	return map;
 }
+
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+					  unsigned long pnum_begin,
+					  unsigned long pnum_end,
+					  unsigned long map_count, int nodeid)
+{
+	unsigned long pnum;
+	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+	void *vmemmap_buf_start;
+
+	size = ALIGN(size, PMD_SIZE);
+	vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
+			 PMD_SIZE, __pa(MAX_DMA_ADDRESS));
+
+	if (vmemmap_buf_start) {
+		vmemmap_buf = vmemmap_buf_start;
+		vmemmap_buf_end = vmemmap_buf_start + size * map_count;
+	}
+
+	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+
+		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+		if (map_map[pnum])
+			continue;
+		ms = __nr_to_section(pnum);
+		printk(KERN_ERR "%s: sparsemem memory map backing failed "
+			"some memory will not be available.\n", __func__);
+		ms->section_mem_map = 0;
+	}
+
+	if (vmemmap_buf_start) {
+		/* need to free left buf */
+#ifdef CONFIG_NO_BOOTMEM
+		free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end));
+		if (vmemmap_buf_start < vmemmap_buf) {
+			char name[15];
+
+			snprintf(name, sizeof(name), "MEMMAP %d", nodeid);
+			reserve_early_without_check(__pa(vmemmap_buf_start),
+						    __pa(vmemmap_buf), name);
+		}
+#else
+		free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
+#endif
+		vmemmap_buf = NULL;
+		vmemmap_buf_end = NULL;
+	}
+}
diff --git a/mm/sparse.c b/mm/sparse.c
index 0cdaf0b58457..9b6b93a4d78d 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -390,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
 		       PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
 	return map;
 }
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+					  unsigned long pnum_begin,
+					  unsigned long pnum_end,
+					  unsigned long map_count, int nodeid)
+{
+	void *map;
+	unsigned long pnum;
+	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+
+	map = alloc_remap(nodeid, size * map_count);
+	if (map) {
+		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+			if (!present_section_nr(pnum))
+				continue;
+			map_map[pnum] = map;
+			map += size;
+		}
+		return;
+	}
+
+	size = PAGE_ALIGN(size);
+	map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count);
+	if (map) {
+		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+			if (!present_section_nr(pnum))
+				continue;
+			map_map[pnum] = map;
+			map += size;
+		}
+		return;
+	}
+
+	/* fallback */
+	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+		if (map_map[pnum])
+			continue;
+		ms = __nr_to_section(pnum);
+		printk(KERN_ERR "%s: sparsemem memory map backing failed "
+			"some memory will not be available.\n", __func__);
+		ms->section_mem_map = 0;
+	}
+}
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
+static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
+				 unsigned long pnum_begin,
+				 unsigned long pnum_end,
+				 unsigned long map_count, int nodeid)
+{
+	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
+					 map_count, nodeid);
+}
+
+#ifndef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 {
 	struct page *map;
@@ -407,6 +464,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 	ms->section_mem_map = 0;
 	return NULL;
 }
+#endif
 
 void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
 {
@@ -420,12 +478,14 @@ void __init sparse_init(void)
 {
 	unsigned long pnum;
 	struct page *map;
+	struct page **map_map;
 	unsigned long *usemap;
 	unsigned long **usemap_map;
-	int size;
+	int size, size2;
 	int nodeid_begin = 0;
 	unsigned long pnum_begin = 0;
 	unsigned long usemap_count;
+	unsigned long map_count;
 
 	/*
 	 * map is using big page (aka 2M in x86 64 bit)
@@ -478,6 +538,48 @@ void __init sparse_init(void)
 	sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
 					 usemap_count, nodeid_begin);
 
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
+	map_map = alloc_bootmem(size2);
+	if (!map_map)
+		panic("can not allocate map_map\n");
+
+	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+		ms = __nr_to_section(pnum);
+		nodeid_begin = sparse_early_nid(ms);
+		pnum_begin = pnum;
+		break;
+	}
+	map_count = 1;
+	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
+		struct mem_section *ms;
+		int nodeid;
+
+		if (!present_section_nr(pnum))
+			continue;
+		ms = __nr_to_section(pnum);
+		nodeid = sparse_early_nid(ms);
+		if (nodeid == nodeid_begin) {
+			map_count++;
+			continue;
+		}
+		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
+		sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
+						 map_count, nodeid_begin);
+		/* new start, update count etc*/
+		nodeid_begin = nodeid;
+		pnum_begin = pnum;
+		map_count = 1;
+	}
+	/* ok, last chunk */
+	sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
+					 map_count, nodeid_begin);
+#endif
+
 	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -486,7 +588,11 @@ void __init sparse_init(void)
 		if (!usemap)
 			continue;
 
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+		map = map_map[pnum];
+#else
 		map = sparse_early_mem_map_alloc(pnum);
+#endif
 		if (!map)
 			continue;
 
@@ -496,6 +602,9 @@ void __init sparse_init(void)
 
 	vmemmap_populate_print_last();
 
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+	free_bootmem(__pa(map_map), size2);
+#endif
 	free_bootmem(__pa(usemap_map), size);
 }
 
-- 
cgit v1.2.3


From 9b3be9f99203d9a400e8547f0e80f1d8f8e5738c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:29 -0800
Subject: Move round_up/down to kernel.h

... in preparation of moving early_res to kernel/.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-26-git-send-email-yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/proto.h | 10 ----------
 include/linux/kernel.h       | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 4009f6534f52..6f414ed88620 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -23,14 +23,4 @@ extern int reboot_force;
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
 
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x,y) ((__typeof__(x))((y)-1))
-#define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1)
-#define round_down(x,y) ((x) & ~__round_mask(x,y))
-
 #endif /* _ASM_X86_PROTO_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 328bca609b9b..d45e372fad81 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -44,6 +44,16 @@ extern const char linux_proc_banner[];
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
-- 
cgit v1.2.3


From 571ba42303813106d533bf6bda929d8e289f51bf Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 9 Feb 2010 11:49:47 +0000
Subject: netdevice.h: Add netdev_printk helpers like dev_printk

These netdev_printk routines take a struct net_device * and emit
dev_printk logging messages adding "%s: " ... netdev->dev.parent
to the dev_printk format and arguments.

This can create some uniformity in the output message log.

These helpers should not be used until a successful alloc_netdev.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cdf53a8d9ff5..a51228ac9331 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2095,6 +2095,77 @@ static inline u32 dev_ethtool_get_flags(struct net_device *dev)
 		return 0;
 	return dev->ethtool_ops->get_flags(dev);
 }
+
+/* Logging, debugging and troubleshooting/diagnostic helpers. */
+
+/* netdev_printk helpers, similar to dev_printk */
+
+static inline const char *netdev_name(const struct net_device *dev)
+{
+	if (dev->reg_state != NETREG_REGISTERED)
+		return "(unregistered net_device)";
+	return dev->name;
+}
+
+#define netdev_printk(level, netdev, format, args...)		\
+	dev_printk(level, (netdev)->dev.parent,			\
+		   "%s: " format,				\
+		   netdev_name(netdev), ##args)
+
+#define netdev_emerg(dev, format, args...)			\
+	netdev_printk(KERN_EMERG, dev, format, ##args)
+#define netdev_alert(dev, format, args...)			\
+	netdev_printk(KERN_ALERT, dev, format, ##args)
+#define netdev_crit(dev, format, args...)			\
+	netdev_printk(KERN_CRIT, dev, format, ##args)
+#define netdev_err(dev, format, args...)			\
+	netdev_printk(KERN_ERR, dev, format, ##args)
+#define netdev_warn(dev, format, args...)			\
+	netdev_printk(KERN_WARNING, dev, format, ##args)
+#define netdev_notice(dev, format, args...)			\
+	netdev_printk(KERN_NOTICE, dev, format, ##args)
+#define netdev_info(dev, format, args...)			\
+	netdev_printk(KERN_INFO, dev, format, ##args)
+
+#if defined(DEBUG)
+#define netdev_dbg(__dev, format, args...)			\
+	netdev_printk(KERN_DEBUG, __dev, format, ##args)
+#elif defined(CONFIG_DYNAMIC_DEBUG)
+#define netdev_dbg(__dev, format, args...)			\
+do {								\
+	dynamic_dev_dbg((__dev)->dev.parent, "%s: " format,	\
+			netdev_name(__dev), ##args);		\
+} while (0)
+#else
+#define netdev_dbg(__dev, format, args...)			\
+({								\
+	if (0)							\
+		netdev_printk(KERN_DEBUG, __dev, format, ##args); \
+	0;							\
+})
+#endif
+
+#if defined(VERBOSE_DEBUG)
+#define netdev_vdbg	netdev_dbg
+#else
+
+#define netdev_vdbg(dev, format, args...)			\
+({								\
+	if (0)							\
+		netdev_printk(KERN_DEBUG, dev, format, ##args);	\
+	0;							\
+})
+#endif
+
+/*
+ * netdev_WARN() acts like dev_printk(), but with the key difference
+ * of using a WARN/WARN_ON to get the message out, including the
+ * file/line information and a backtrace.
+ */
+#define netdev_WARN(dev, format, args...)			\
+	WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args);
+
+
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_NETDEVICE_H */
-- 
cgit v1.2.3


From b3d95c5c93d4b57eaea0ad3f582b08a6b5fb3eb1 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 9 Feb 2010 11:49:49 +0000
Subject: include/linux/netdevice.h: Add netif_printk helpers

Add macros to test a private structure for msg_enable bits
and the netif_msg_##bit to test and call netdev_printk if set

Simplifies logic in callers and adds message logging consistency

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a51228ac9331..1412dde16d00 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2165,6 +2165,59 @@ do {								\
 #define netdev_WARN(dev, format, args...)			\
 	WARN(1, "netdevice: %s\n" format, netdev_name(dev), ##args);
 
+/* netif printk helpers, similar to netdev_printk */
+
+#define netif_printk(priv, type, level, dev, fmt, args...)	\
+do {					  			\
+	if (netif_msg_##type(priv))				\
+		netdev_printk(level, (dev), fmt, ##args);	\
+} while (0)
+
+#define netif_emerg(priv, type, dev, fmt, args...)		\
+	netif_printk(priv, type, KERN_EMERG, dev, fmt, ##args)
+#define netif_alert(priv, type, dev, fmt, args...)		\
+	netif_printk(priv, type, KERN_ALERT, dev, fmt, ##args)
+#define netif_crit(priv, type, dev, fmt, args...)		\
+	netif_printk(priv, type, KERN_CRIT, dev, fmt, ##args)
+#define netif_err(priv, type, dev, fmt, args...)		\
+	netif_printk(priv, type, KERN_ERR, dev, fmt, ##args)
+#define netif_warn(priv, type, dev, fmt, args...)		\
+	netif_printk(priv, type, KERN_WARNING, dev, fmt, ##args)
+#define netif_notice(priv, type, dev, fmt, args...)		\
+	netif_printk(priv, type, KERN_NOTICE, dev, fmt, ##args)
+#define netif_info(priv, type, dev, fmt, args...)		\
+	netif_printk(priv, type, KERN_INFO, (dev), fmt, ##args)
+
+#if defined(DEBUG)
+#define netif_dbg(priv, type, dev, format, args...)		\
+	netif_printk(priv, type, KERN_DEBUG, dev, format, ##args)
+#elif defined(CONFIG_DYNAMIC_DEBUG)
+#define netif_dbg(priv, type, netdev, format, args...)		\
+do {								\
+	if (netif_msg_##type(priv))				\
+		dynamic_dev_dbg((netdev)->dev.parent,		\
+				"%s: " format,			\
+				netdev_name(netdev), ##args);	\
+} while (0)
+#else
+#define netif_dbg(priv, type, dev, format, args...)			\
+({									\
+	if (0)								\
+		netif_printk(priv, type, KERN_DEBUG, dev, format, ##args); \
+	0;								\
+})
+#endif
+
+#if defined(VERBOSE_DEBUG)
+#define netif_vdbg	netdev_dbg
+#else
+#define netif_vdbg(priv, type, dev, format, args...)		\
+({								\
+	if (0)							\
+		netif_printk(KERN_DEBUG, dev, format, ##args);	\
+	0;							\
+})
+#endif
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From fb8a0d9d1bfd1e4355f307e86a6da7209eefd5f3 Mon Sep 17 00:00:00 2001
From: "Williams, Mitch A" <mitch.a.williams@intel.com>
Date: Wed, 10 Feb 2010 01:43:04 +0000
Subject: pci: Add SR-IOV convenience functions and macros

Add and export pci_num_vf to allow other subsystems to determine how many
virtual function devices are associated with an SR-IOV physical function
device.
Add macros dev_is_pci, dev_is_ps, and dev_num_vf to make it easier for
non-PCI specific code to determine SR-IOV capabilities.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/pci/iov.c   | 15 +++++++++++++++
 include/linux/pci.h | 11 +++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index b2a448e19fe6..3e5ab2bf6a5c 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -706,6 +706,21 @@ irqreturn_t pci_sriov_migration(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_sriov_migration);
 
+/**
+ * pci_num_vf - return number of VFs associated with a PF device_release_driver
+ * @dev: the PCI device
+ *
+ * Returns number of VFs, or 0 if SR-IOV is not enabled.
+ */
+int pci_num_vf(struct pci_dev *dev)
+{
+	if (!dev || !dev->is_physfn)
+		return 0;
+	else
+		return dev->sriov->nr_virtfn;
+}
+EXPORT_SYMBOL_GPL(pci_num_vf);
+
 static int ats_alloc_one(struct pci_dev *dev, int ps)
 {
 	int pos;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 174e5392e51e..59a98e2ee2c6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -612,6 +612,9 @@ extern void pci_remove_bus_device(struct pci_dev *dev);
 extern void pci_stop_bus_device(struct pci_dev *dev);
 void pci_setup_cardbus(struct pci_bus *bus);
 extern void pci_sort_breadthfirst(void);
+#define dev_is_pci(d) ((d)->bus == &pci_bus_type)
+#define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false))
+#define dev_num_vf(d) ((dev_is_pci(d) ? pci_num_vf(to_pci_dev(d)) : 0))
 
 /* Generic PCI functions exported to card drivers */
 
@@ -1129,6 +1132,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
 						unsigned int devfn)
 { return NULL; }
 
+#define dev_is_pci(d) (false)
+#define dev_is_pf(d) (false)
+#define dev_num_vf(d) (0)
 #endif /* CONFIG_PCI */
 
 /* Include architecture-dependent settings and functions */
@@ -1286,6 +1292,7 @@ void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
 extern int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
 extern void pci_disable_sriov(struct pci_dev *dev);
 extern irqreturn_t pci_sriov_migration(struct pci_dev *dev);
+extern int pci_num_vf(struct pci_dev *dev);
 #else
 static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
 {
@@ -1298,6 +1305,10 @@ static inline irqreturn_t pci_sriov_migration(struct pci_dev *dev)
 {
 	return IRQ_NONE;
 }
+static inline int pci_num_vf(struct pci_dev *dev)
+{
+	return 0;
+}
 #endif
 
 #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
-- 
cgit v1.2.3


From b280da8d54b8d82b52f368a8703b7ada6c1744d5 Mon Sep 17 00:00:00 2001
From: "Williams, Mitch A" <mitch.a.williams@intel.com>
Date: Wed, 10 Feb 2010 01:43:24 +0000
Subject: if_link: Add SR-IOV configuration methods

Add SR-IOV VF management methods to IFLA_LINKINFO. This allows userspace to
use rtnetlink to configure VF network devices.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 6674791622ca..c9bf92cd7653 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -78,6 +78,11 @@ enum {
 #define IFLA_LINKINFO IFLA_LINKINFO
 	IFLA_NET_NS_PID,
 	IFLA_IFALIAS,
+	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
+	IFLA_VF_MAC,		/* Hardware queue specific attributes */
+	IFLA_VF_VLAN,
+	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
+	IFLA_VFINFO,
 	__IFLA_MAX
 };
 
@@ -196,4 +201,29 @@ enum macvlan_mode {
 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
 };
 
+/* SR-IOV virtual function managment section */
+
+struct ifla_vf_mac {
+	__u32 vf;
+	__u8 mac[32]; /* MAX_ADDR_LEN */
+};
+
+struct ifla_vf_vlan {
+	__u32 vf;
+	__u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+	__u32 qos;
+};
+
+struct ifla_vf_tx_rate {
+	__u32 vf;
+	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
+};
+
+struct ifla_vf_info {
+	__u32 vf;
+	__u8 mac[32];
+	__u32 vlan;
+	__u32 qos;
+	__u32 tx_rate;
+};
 #endif /* _LINUX_IF_LINK_H */
-- 
cgit v1.2.3


From 95c26df829ba4a25f40595cadb4c9433883cbe28 Mon Sep 17 00:00:00 2001
From: "Williams, Mitch A" <mitch.a.williams@intel.com>
Date: Wed, 10 Feb 2010 01:43:46 +0000
Subject: net: Add netdev ops for SR-IOV configuration

Add netdev ops for configuring SR-IOV VF devices through the PF driver.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1412dde16d00..682d02521bbc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -28,6 +28,7 @@
 #include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
+#include <linux/if_link.h>
 
 #ifdef __KERNEL__
 #include <linux/timer.h>
@@ -621,6 +622,13 @@ struct netdev_queue {
  *	this function is called when a VLAN id is unregistered.
  *
  * void (*ndo_poll_controller)(struct net_device *dev);
+ *
+ *	SR-IOV management functions.
+ * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
+ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
+ * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
+ * int (*ndo_get_vf_config)(struct net_device *dev,
+ *			    int vf, struct ifla_vf_info *ivf);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -660,6 +668,15 @@ struct net_device_ops {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
 #endif
+	int			(*ndo_set_vf_mac)(struct net_device *dev,
+						  int queue, u8 *mac);
+	int			(*ndo_set_vf_vlan)(struct net_device *dev,
+						   int queue, u16 vlan, u8 qos);
+	int			(*ndo_set_vf_tx_rate)(struct net_device *dev,
+						      int vf, int rate);
+	int			(*ndo_get_vf_config)(struct net_device *dev,
+						     int vf,
+						     struct ifla_vf_info *ivf);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
-- 
cgit v1.2.3


From e902ec9906e844f4613fa6190c6fa65f162dc86e Mon Sep 17 00:00:00 2001
From: Jiro SEKIBA <jir@unicus.jp>
Date: Sat, 30 Jan 2010 18:06:35 +0900
Subject: nilfs2: issue discard request after cleaning segments

This adds a function to send discard requests for given array of
segment numbers, and calls the function when garbage collection
succeeded.

Signed-off-by: Jiro SEKIBA <jir@unicus.jp>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 Documentation/filesystems/nilfs2.txt |  3 +++
 fs/nilfs2/segment.c                  | 10 ++++++++++
 fs/nilfs2/super.c                    |  8 +++++++-
 fs/nilfs2/the_nilfs.c                | 38 ++++++++++++++++++++++++++++++++++++
 fs/nilfs2/the_nilfs.h                |  1 +
 include/linux/nilfs2_fs.h            |  1 +
 6 files changed, 60 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index 839efd8a8a8c..cf6d0d85ca82 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -74,6 +74,9 @@ norecovery		Disable recovery of the filesystem on mount.
 			This disables every write access on the device for
 			read-only mounts or snapshots.  This option will fail
 			for r/w mounts on an unclean volume.
+discard			Issue discard/TRIM commands to the underlying block
+			device when blocks are freed.  This is useful for SSD
+			devices and sparse/thinly-provisioned LUNs.
 
 NILFS2 usage
 ============
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 105b508b47a8..9280b0f10792 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2560,6 +2560,16 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(sci->sc_interval);
 	}
+	if (nilfs_test_opt(sbi, DISCARD)) {
+		int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
+						 sci->sc_nfreesegs);
+		if (ret) {
+			printk(KERN_WARNING
+			       "NILFS warning: error %d on discard request, "
+			       "turning discards off for the device\n", ret);
+			nilfs_clear_opt(sbi, DISCARD);
+		}
+	}
 
  out_unlock:
 	sci->sc_freesegs = NULL;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8173faee31e6..3f88401a375b 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -481,6 +481,8 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_printf(seq, ",order=strict");
 	if (nilfs_test_opt(sbi, NORECOVERY))
 		seq_printf(seq, ",norecovery");
+	if (nilfs_test_opt(sbi, DISCARD))
+		seq_printf(seq, ",discard");
 
 	return 0;
 }
@@ -550,7 +552,7 @@ static const struct export_operations nilfs_export_ops = {
 enum {
 	Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
-	Opt_err,
+	Opt_discard, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -561,6 +563,7 @@ static match_table_t tokens = {
 	{Opt_snapshot, "cp=%u"},
 	{Opt_order, "order=%s"},
 	{Opt_norecovery, "norecovery"},
+	{Opt_discard, "discard"},
 	{Opt_err, NULL}
 };
 
@@ -614,6 +617,9 @@ static int parse_options(char *options, struct super_block *sb)
 		case Opt_norecovery:
 			nilfs_set_opt(sbi, NORECOVERY);
 			break;
+		case Opt_discard:
+			nilfs_set_opt(sbi, DISCARD);
+			break;
 		default:
 			printk(KERN_ERR
 			       "NILFS: Unrecognized mount option \"%s\"\n", p);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 6241e1722efc..92733d5651d2 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -646,6 +646,44 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
 	goto out;
 }
 
+int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
+			    size_t nsegs)
+{
+	sector_t seg_start, seg_end;
+	sector_t start = 0, nblocks = 0;
+	unsigned int sects_per_block;
+	__u64 *sn;
+	int ret = 0;
+
+	sects_per_block = (1 << nilfs->ns_blocksize_bits) /
+		bdev_logical_block_size(nilfs->ns_bdev);
+	for (sn = segnump; sn < segnump + nsegs; sn++) {
+		nilfs_get_segment_range(nilfs, *sn, &seg_start, &seg_end);
+
+		if (!nblocks) {
+			start = seg_start;
+			nblocks = seg_end - seg_start + 1;
+		} else if (start + nblocks == seg_start) {
+			nblocks += seg_end - seg_start + 1;
+		} else {
+			ret = blkdev_issue_discard(nilfs->ns_bdev,
+						   start * sects_per_block,
+						   nblocks * sects_per_block,
+						   GFP_NOFS,
+						   DISCARD_FL_BARRIER);
+			if (ret < 0)
+				return ret;
+			nblocks = 0;
+		}
+	}
+	if (nblocks)
+		ret = blkdev_issue_discard(nilfs->ns_bdev,
+					   start * sects_per_block,
+					   nblocks * sects_per_block,
+					   GFP_NOFS, DISCARD_FL_BARRIER);
+	return ret;
+}
+
 int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
 {
 	struct inode *dat = nilfs_dat_inode(nilfs);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 589786e33464..fd057f8ad439 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -221,6 +221,7 @@ struct the_nilfs *find_or_create_nilfs(struct block_device *);
 void put_nilfs(struct the_nilfs *);
 int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
 int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
+int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
 int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
 struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
 int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int);
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 3fe02cf8b65a..640702e97457 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -153,6 +153,7 @@ struct nilfs_super_root {
 						   semantics also for data */
 #define NILFS_MOUNT_NORECOVERY		0x4000  /* Disable write access during
 						   mount-time recovery */
+#define NILFS_MOUNT_DISCARD		0x8000  /* Issue DISCARD requests */
 
 
 /**
-- 
cgit v1.2.3


From 21b082ecdd7e6b8a5eba2cc013cae41b24de7f51 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 14 Feb 2010 07:13:38 -0700
Subject: of: Remove old and misplaced function declarations

The following functions don't exist:
  finish_device_tree()
  print_properties()
  prom_n_intr_cells()
  prom_get_irq_senses()

The following functions are in drivers/of/base.c, so the declaration
belongs in of.h instead of of_fdt.h
  of_machine_is_compatible()
  prom_add_property()
  prom_remove_property()
  prom_update_property()

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Michal Simek <monstr@monstr.eu>
---
 include/linux/of.h     |  8 ++++++++
 include/linux/of_fdt.h | 10 ----------
 2 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index fa5571b6e219..5c7b6a64acd9 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -180,6 +180,14 @@ extern int of_parse_phandles_with_args(struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct device_node **out_node, const void **out_args);
 
+extern int of_machine_is_compatible(const char *compat);
+
+extern int prom_add_property(struct device_node* np, struct property* prop);
+extern int prom_remove_property(struct device_node *np, struct property *prop);
+extern int prom_update_property(struct device_node *np,
+				struct property *newprop,
+				struct property *oldprop);
+
 #if defined(CONFIG_OF_DYNAMIC)
 /* For updating the device tree at runtime */
 extern void of_attach_node(struct device_node *);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 0007187ab59b..c9cb8a7bc065 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -95,18 +95,8 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data);
 
 /* Other Prototypes */
-extern void finish_device_tree(void);
 extern void unflatten_device_tree(void);
 extern void early_init_devtree(void *);
-extern int of_machine_is_compatible(const char *compat);
-extern void print_properties(struct device_node *node);
-extern int prom_n_intr_cells(struct device_node* np);
-extern void prom_get_irq_senses(unsigned char *senses, int off, int max);
-extern int prom_add_property(struct device_node* np, struct property* prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
-				struct property *newprop,
-				struct property *oldprop);
 
 #endif /* __ASSEMBLY__ */
 #endif /* _LINUX_OF_FDT_H */
-- 
cgit v1.2.3


From 9dfbf207802c7e8cda9d081a8d750b50633c82d2 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jeremy.kerr@canonical.com>
Date: Sun, 14 Feb 2010 07:13:43 -0700
Subject: of: protect linux/of.h with CONFIG_OF

For platforms that have CONFIG_OF optional, we need to make the contents
of linux/of.h conditional on CONFIG_OF.

Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Michal Simek <monstr@monstr.eu>
---
 include/linux/of.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 5c7b6a64acd9..48b0ee6d0f76 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -22,6 +22,8 @@
 
 #include <asm/byteorder.h>
 
+#ifdef CONFIG_OF
+
 typedef u32 phandle;
 typedef u32 ihandle;
 
@@ -194,4 +196,5 @@ extern void of_attach_node(struct device_node *);
 extern void of_detach_node(struct device_node *);
 #endif
 
+#endif /* CONFIG_OF */
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From 4ef7b373df330bc0ff037dc4792d373c9346375f Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jeremy.kerr@canonical.com>
Date: Sun, 14 Feb 2010 07:13:47 -0700
Subject: of/flattree: Don't assume HAVE_LMB

We don't always have lmb available, so make arches provide an
early_init_dt_alloc_memory_arch() to handle the allocation of
memory in the fdt code.

When we don't have lmb.h included, we need asm/page.h for __va.

Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/kernel/prom.c | 5 +++++
 arch/powerpc/kernel/prom.c    | 5 +++++
 drivers/of/fdt.c              | 9 ++++++---
 include/linux/of_fdt.h        | 1 +
 4 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 050b7993c51c..a7dcaf092200 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -55,6 +55,11 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	lmb_add(base, size);
 }
 
+u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return lmb_alloc(size, align);
+}
+
 #ifdef CONFIG_EARLY_PRINTK
 /* MS this is Microblaze specifig function */
 static int __init early_init_dt_scan_serial(unsigned long node,
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 43c78d74ddcb..5bbbdb29f603 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -510,6 +510,11 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	memstart_addr = min((u64)memstart_addr, base);
 }
 
+u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return lmb_alloc(size, align);
+}
+
 #ifdef CONFIG_BLK_DEV_INITRD
 void __init early_init_dt_setup_initrd_arch(unsigned long start,
 		unsigned long end)
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index b51f797d9d9d..406757a9d7ea 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -10,16 +10,18 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/lmb.h>
 #include <linux/initrd.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
-
+#include <linux/string.h>
+#include <linux/errno.h>
 
 #ifdef CONFIG_PPC
 #include <asm/machdep.h>
 #endif /* CONFIG_PPC */
 
+#include <asm/page.h>
+
 int __initdata dt_root_addr_cells;
 int __initdata dt_root_size_cells;
 
@@ -560,7 +562,8 @@ void __init unflatten_device_tree(void)
 	pr_debug("  size is %lx, allocating...\n", size);
 
 	/* Allocate memory for the expanded device tree */
-	mem = lmb_alloc(size + 4, __alignof__(struct device_node));
+	mem = early_init_dt_alloc_memory_arch(size + 4,
+			__alignof__(struct device_node));
 	mem = (unsigned long) __va(mem);
 
 	((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index c9cb8a7bc065..a1ca92ccb0ff 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -78,6 +78,7 @@ extern void early_init_dt_check_for_initrd(unsigned long node);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
+extern u64 early_init_dt_alloc_memory_arch(u64 size, u64 align);
 extern u64 dt_mem_next_cell(int s, __be32 **cellp);
 
 /*
-- 
cgit v1.2.3


From 7c7b60cb87547b1664a4385c187f029bf514a737 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 14 Feb 2010 07:13:50 -0700
Subject: of: put default string compare and #a/s-cell values into common
 header

Most architectures don't need to change these.  Put them into common
code to eliminate some duplication

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/include/asm/prom.h |  7 -------
 arch/powerpc/include/asm/prom.h    |  7 -------
 include/linux/of.h                 | 13 +++++++++++++
 3 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 6c6b386cf3c6..ddc5c57eb240 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -26,13 +26,6 @@
 #include <asm/irq.h>
 #include <asm/atomic.h>
 
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT	1
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT	1
-
-#define of_compat_cmp(s1, s2, l)	strncasecmp((s1), (s2), (l))
-#define of_prop_cmp(s1, s2)		strcmp((s1), (s2))
-#define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
-
 extern struct device_node *of_chosen;
 
 #define HAVE_ARCH_DEVTREE_FIXUPS
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index f384db815ea8..4a5070edb4d3 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -23,13 +23,6 @@
 #include <asm/irq.h>
 #include <asm/atomic.h>
 
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT	1
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT	1
-
-#define of_compat_cmp(s1, s2, l)	strcasecmp((s1), (s2))
-#define of_prop_cmp(s1, s2)		strcmp((s1), (s2))
-#define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
-
 extern struct device_node *of_chosen;
 
 #define HAVE_ARCH_DEVTREE_FIXUPS
diff --git a/include/linux/of.h b/include/linux/of.h
index 48b0ee6d0f76..5cd284002bf1 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -116,6 +116,19 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 
 #include <asm/prom.h>
 
+/* Default #address and #size cells.  Allow arch asm/prom.h to override */
+#if !defined(OF_ROOT_NODE_ADDR_CELLS_DEFAULT)
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
+#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
+#endif
+
+/* Default string compare functions, Allow arch asm/prom.h to override */
+#if !defined(of_compat_cmp)
+#define of_compat_cmp(s1, s2, l)	strncasecmp((s1), (s2), (l))
+#define of_prop_cmp(s1, s2)		strcmp((s1), (s2))
+#define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
+#endif
+
 /* flag descriptions */
 #define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
 #define OF_DETACHED	2 /* node has been detached from the device tree */
-- 
cgit v1.2.3


From fc0bdae49d810e4cb32d7b547bc6d4dfb08f9e2e Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 14 Feb 2010 07:13:55 -0700
Subject: of: move definition of of_chosen into common code.

Rather than defining of_chosen in each arch, it can be defined for all
in driver/of/base.c

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/include/asm/prom.h | 2 --
 arch/microblaze/kernel/prom.c      | 3 ---
 arch/powerpc/include/asm/prom.h    | 2 --
 arch/powerpc/kernel/prom.c         | 3 ---
 drivers/of/base.c                  | 1 +
 include/linux/of.h                 | 1 +
 6 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 8b1ebd39971a..aa1a437ac87d 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -26,8 +26,6 @@
 #include <asm/irq.h>
 #include <asm/atomic.h>
 
-extern struct device_node *of_chosen;
-
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
 /* Other Prototypes */
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index a7dcaf092200..a15ef6d67ca9 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -42,9 +42,6 @@
 #include <asm/sections.h>
 #include <asm/pci-bridge.h>
 
-/* export that to outside world */
-struct device_node *of_chosen;
-
 void __init early_init_dt_scan_chosen_arch(unsigned long node)
 {
 	/* No Microblaze specific code here */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 4a5070edb4d3..7f9a50aab157 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -23,8 +23,6 @@
 #include <asm/irq.h>
 #include <asm/atomic.h>
 
-extern struct device_node *of_chosen;
-
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
 #ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4869c937b6cf..43238b2054b6 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -67,9 +67,6 @@ int __initdata iommu_force_on;
 unsigned long tce_alloc_start, tce_alloc_end;
 #endif
 
-/* export that to outside world */
-struct device_node *of_chosen;
-
 static int __init early_parse_mem(char *p)
 {
 	if (!p)
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 873479a21c80..cb96888d1427 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -23,6 +23,7 @@
 #include <linux/proc_fs.h>
 
 struct device_node *allnodes;
+struct device_node *of_chosen;
 
 /* use when traversing tree through the allnext, child, sibling,
  * or parent members of struct device_node.
diff --git a/include/linux/of.h b/include/linux/of.h
index 5cd284002bf1..d34cc5d9d81e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -66,6 +66,7 @@ struct device_node {
 
 /* Pointer for first entry in chain of all nodes. */
 extern struct device_node *allnodes;
+extern struct device_node *of_chosen;
 
 static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
 {
-- 
cgit v1.2.3


From 0d351c3e932c2e155ef5e4c3f5b87223abd4eea6 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 14 Feb 2010 14:13:57 -0700
Subject: of/sparc: Remove sparc-local declaration of allnodes and devtree_lock

Both allnodes and devtree_lock are defined in common code.  The
extern declaration should be in the common header too so that the
compiler can type check.  allnodes is already in of.h, but
devtree_lock should be declared there too.

This patch removes the SPARC declarations and uses decls in of.h instead.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Michal Simek <monstr@monstr.eu>
Acked-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/prom.h | 3 ---
 include/linux/of.h       | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/prom.h b/arch/sparc/kernel/prom.h
index 453397fe5e14..a8591ef2636d 100644
--- a/arch/sparc/kernel/prom.h
+++ b/arch/sparc/kernel/prom.h
@@ -4,9 +4,6 @@
 #include <linux/spinlock.h>
 #include <asm/prom.h>
 
-extern struct device_node *allnodes;	/* temporary while merging */
-extern rwlock_t devtree_lock;	/* temporary while merging */
-
 extern void * prom_early_alloc(unsigned long size);
 extern void irq_trans_init(struct device_node *dp);
 
diff --git a/include/linux/of.h b/include/linux/of.h
index d34cc5d9d81e..f6d9cbc39c9c 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -19,6 +19,7 @@
 #include <linux/bitops.h>
 #include <linux/kref.h>
 #include <linux/mod_devicetable.h>
+#include <linux/spinlock.h>
 
 #include <asm/byteorder.h>
 
@@ -67,6 +68,7 @@ struct device_node {
 /* Pointer for first entry in chain of all nodes. */
 extern struct device_node *allnodes;
 extern struct device_node *of_chosen;
+extern rwlock_t devtree_lock;
 
 static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
 {
-- 
cgit v1.2.3


From 1a5778aa000ebfec7f07eed0ffa2852ffb5d16bb Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 14 Feb 2010 22:35:47 -0800
Subject: net: Fix first line of kernel-doc for a few functions

The function name must be followed by a space, hypen, space, and a
short description.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++--
 include/net/sock.h     | 2 +-
 net/sunrpc/rpc_pipe.c  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ae836fded530..ba0f8e3a9cda 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -738,7 +738,7 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
 }
 
 /**
- *	skb_peek
+ *	skb_peek - peek at the head of an &sk_buff_head
  *	@list_: list to peek at
  *
  *	Peek an &sk_buff. Unlike most other operations you _MUST_
@@ -759,7 +759,7 @@ static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
 }
 
 /**
- *	skb_peek_tail
+ *	skb_peek_tail - peek at the tail of an &sk_buff_head
  *	@list_: list to peek at
  *
  *	Peek an &sk_buff. Unlike most other operations you _MUST_
diff --git a/include/net/sock.h b/include/net/sock.h
index c8d400063c16..580d51fa28e9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1049,7 +1049,7 @@ extern void sk_common_release(struct sock *sk);
 extern void sock_init_data(struct socket *sock, struct sock *sk);
 
 /**
- *	sk_filter_release: Release a socket filter
+ *	sk_filter_release - release a socket filter
  *	@fp: filter to remove
  *
  *	Remove a filter from a socket and release its resources.
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 49278f830367..9ea45383480e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -78,7 +78,7 @@ rpc_timeout_upcall_queue(struct work_struct *work)
 }
 
 /**
- * rpc_queue_upcall
+ * rpc_queue_upcall - queue an upcall message to userspace
  * @inode: inode of upcall pipe on which to queue given message
  * @msg: message to queue
  *
-- 
cgit v1.2.3


From a1467085dcad8214bbb1d7edafbaa295cbd8c0e7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 14 Feb 2010 22:38:54 -0800
Subject: ethtool: Fix includes build break

Based upon a patch by Oliver Hartkopp <oliver@hartkopp.net>.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index a3cac53a0766..cca1c3de140d 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -14,7 +14,6 @@
 #define _LINUX_ETHTOOL_H
 
 #include <linux/types.h>
-#include <linux/rculist.h>
 
 /* This should work for both 32 and 64 bit userland. */
 struct ethtool_cmd {
@@ -409,6 +408,8 @@ struct ethtool_flash {
 
 #ifdef __KERNEL__
 
+#include <linux/rculist.h>
+
 struct ethtool_rx_ntuple_flow_spec_container {
 	struct ethtool_rx_ntuple_flow_spec fs;
 	struct list_head list;
-- 
cgit v1.2.3


From 6dd2e42bd892b2e16080ceba451fd9c3ed633145 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@nokia.com>
Date: Thu, 14 Jan 2010 17:32:13 +0200
Subject: OMAP: DSS2: OMAPFB: implement OMAPFB_GET_DISPLAY_INFO

Previously the only place to get the size of the display was from the
DSS's sysfs interface, making, for example, configuring overlays and doing
updates on manual displays more difficult.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@nokia.com>
---
 drivers/video/omap2/omapfb/omapfb-ioctl.c | 24 ++++++++++++++++++++++++
 include/linux/omapfb.h                    |  9 +++++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/omap2/omapfb/omapfb-ioctl.c b/drivers/video/omap2/omapfb/omapfb-ioctl.c
index 4c4bafdfaa43..33fc1459a7c9 100644
--- a/drivers/video/omap2/omapfb/omapfb-ioctl.c
+++ b/drivers/video/omap2/omapfb/omapfb-ioctl.c
@@ -483,6 +483,7 @@ int omapfb_ioctl(struct fb_info *fbi, unsigned int cmd, unsigned long arg)
 		struct omapfb_memory_read	memory_read;
 		struct omapfb_vram_info		vram_info;
 		struct omapfb_tearsync_info	tearsync_info;
+		struct omapfb_display_info	display_info;
 	} p;
 
 	int r = 0;
@@ -741,6 +742,29 @@ int omapfb_ioctl(struct fb_info *fbi, unsigned int cmd, unsigned long arg)
 		break;
 	}
 
+	case OMAPFB_GET_DISPLAY_INFO: {
+		u16 xres, yres;
+
+		DBG("ioctl GET_DISPLAY_INFO\n");
+
+		if (display == NULL) {
+			r = -ENODEV;
+			break;
+		}
+
+		display->get_resolution(display, &xres, &yres);
+
+		p.display_info.xres = xres;
+		p.display_info.yres = yres;
+		p.display_info.width = 0;
+		p.display_info.height = 0;
+
+		if (copy_to_user((void __user *)arg, &p.display_info,
+					sizeof(p.display_info)))
+			r = -EFAULT;
+		break;
+	}
+
 	default:
 		dev_err(fbdev->dev, "Unknown ioctl 0x%x\n", cmd);
 		r = -EINVAL;
diff --git a/include/linux/omapfb.h b/include/linux/omapfb.h
index f46c40ac6d45..9bdd91486b49 100644
--- a/include/linux/omapfb.h
+++ b/include/linux/omapfb.h
@@ -57,6 +57,7 @@
 #define OMAPFB_WAITFORGO	OMAP_IO(60)
 #define OMAPFB_GET_VRAM_INFO	OMAP_IOR(61, struct omapfb_vram_info)
 #define OMAPFB_SET_TEARSYNC	OMAP_IOW(62, struct omapfb_tearsync_info)
+#define OMAPFB_GET_DISPLAY_INFO	OMAP_IOR(63, struct omapfb_display_info)
 
 #define OMAPFB_CAPS_GENERIC_MASK	0x00000fff
 #define OMAPFB_CAPS_LCDC_MASK		0x00fff000
@@ -206,6 +207,14 @@ struct omapfb_tearsync_info {
 	__u16 reserved2;
 };
 
+struct omapfb_display_info {
+	__u16 xres;
+	__u16 yres;
+	__u32 width;	/* phys width of the display in micrometers */
+	__u32 height;	/* phys height of the display in micrometers */
+	__u32 reserved[5];
+};
+
 #ifdef __KERNEL__
 
 #include <plat/board.h>
-- 
cgit v1.2.3


From 23f3733d440b918ccb7746718f77256334cf6176 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 5 Jun 2009 17:31:46 +0200
Subject: netfilter: reduce NF_HOOK by one argument

No changes in vmlinux filesize.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 78f33d223680..2f22816a5514 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -163,11 +163,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct sk_buff *skb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sk_buff *), int thresh,
-				 int cond)
+				 int (*okfn)(struct sk_buff *), int thresh)
 {
-	if (!cond)
-		return 1;
 #ifndef CONFIG_NETFILTER_DEBUG
 	if (list_empty(&nf_hooks[pf][hook]))
 		return 1;
@@ -179,7 +176,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct sk_buff *))
 {
-	return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN, 1);
+	return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN);
 }
                    
 /* Activate hook; either okfn or kfree_skb called, unless a hook
@@ -206,13 +203,13 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 
 #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)	       \
 ({int __ret;								       \
-if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\
+if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh)) == 1)\
 	__ret = (okfn)(skb);						       \
 __ret;})
 
 #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond)		       \
 ({int __ret;								       \
-if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\
+if ((cond) || (__ret = nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN)) == 1)\
 	__ret = (okfn)(skb);						       \
 __ret;})
 
@@ -328,8 +325,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct sk_buff *skb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sk_buff *), int thresh,
-				 int cond)
+				 int (*okfn)(struct sk_buff *), int thresh)
 {
 	return okfn(skb);
 }
-- 
cgit v1.2.3


From 2249065f4b22b493bae2caf549b86f175f33188e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 13 Jun 2009 04:13:26 +0200
Subject: netfilter: get rid of the grossness in netfilter.h

GCC is now smart enough to follow the inline trail correctly.
vmlinux size remain the same.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter.h | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2f22816a5514..70079454ffd0 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -196,25 +196,36 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
    coders :)
 */
 
-/* This is gross, but inline doesn't cut it for avoiding the function
-   call in fast path: gcc doesn't inline (needs value tracking?). --RR */
-
-/* HX: It's slightly less gross now. */
-
-#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)	       \
-({int __ret;								       \
-if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh)) == 1)\
-	__ret = (okfn)(skb);						       \
-__ret;})
+static inline int
+NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb,
+	       struct net_device *in, struct net_device *out,
+	       int (*okfn)(struct sk_buff *), int thresh)
+{
+	int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh);
+	if (ret == 1)
+		ret = okfn(skb);
+	return ret;
+}
 
-#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond)		       \
-({int __ret;								       \
-if ((cond) || (__ret = nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN)) == 1)\
-	__ret = (okfn)(skb);						       \
-__ret;})
+static inline int
+NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb,
+	     struct net_device *in, struct net_device *out,
+	     int (*okfn)(struct sk_buff *), bool cond)
+{
+	int ret = 1;
+	if (cond ||
+	    (ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN) == 1))
+		ret = okfn(skb);
+	return ret;
+}
 
-#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
-	NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
+static inline int
+NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb,
+	struct net_device *in, struct net_device *out,
+	int (*okfn)(struct sk_buff *))
+{
+	return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN);
+}
 
 /* Call setsockopt() */
 int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
-- 
cgit v1.2.3


From 739674fb7febf116e7d647031fab16989a08a965 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Jun 2009 08:23:19 +0200
Subject: netfilter: xtables: constify args in compat copying functions

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h | 12 ++++++------
 net/ipv4/netfilter/arp_tables.c    |  4 ++--
 net/ipv4/netfilter/ip_tables.c     |  4 ++--
 net/ipv4/netfilter/ipt_ULOG.c      |  4 ++--
 net/ipv6/netfilter/ip6_tables.c    |  4 ++--
 net/netfilter/x_tables.c           |  8 ++++----
 net/netfilter/xt_hashlimit.c       |  4 ++--
 net/netfilter/xt_limit.c           |  4 ++--
 8 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index a0a144a6c6d9..c61758f4be31 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -286,8 +286,8 @@ struct xt_match {
 	void (*destroy)(const struct xt_mtdtor_param *);
 #ifdef CONFIG_COMPAT
 	/* Called when userspace align differs from kernel space one */
-	void (*compat_from_user)(void *dst, void *src);
-	int (*compat_to_user)(void __user *dst, void *src);
+	void (*compat_from_user)(void *dst, const void *src);
+	int (*compat_to_user)(void __user *dst, const void *src);
 #endif
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
@@ -328,8 +328,8 @@ struct xt_target {
 	void (*destroy)(const struct xt_tgdtor_param *);
 #ifdef CONFIG_COMPAT
 	/* Called when userspace align differs from kernel space one */
-	void (*compat_from_user)(void *dst, void *src);
-	int (*compat_to_user)(void __user *dst, void *src);
+	void (*compat_from_user)(void *dst, const void *src);
+	int (*compat_to_user)(void __user *dst, const void *src);
 #endif
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
@@ -593,13 +593,13 @@ extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset);
 extern int xt_compat_match_offset(const struct xt_match *match);
 extern int xt_compat_match_from_user(struct xt_entry_match *m,
 				     void **dstptr, unsigned int *size);
-extern int xt_compat_match_to_user(struct xt_entry_match *m,
+extern int xt_compat_match_to_user(const struct xt_entry_match *m,
 				   void __user **dstptr, unsigned int *size);
 
 extern int xt_compat_target_offset(const struct xt_target *target);
 extern void xt_compat_target_from_user(struct xt_entry_target *t,
 				       void **dstptr, unsigned int *size);
-extern int xt_compat_target_to_user(struct xt_entry_target *t,
+extern int xt_compat_target_to_user(const struct xt_entry_target *t,
 				    void __user **dstptr, unsigned int *size);
 
 #endif /* CONFIG_COMPAT */
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 72723ea1054b..2303dc92a277 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -842,7 +842,7 @@ static int copy_entries_to_user(unsigned int total_size,
 }
 
 #ifdef CONFIG_COMPAT
-static void compat_standard_from_user(void *dst, void *src)
+static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
 
@@ -851,7 +851,7 @@ static void compat_standard_from_user(void *dst, void *src)
 	memcpy(dst, &v, sizeof(v));
 }
 
-static int compat_standard_to_user(void __user *dst, void *src)
+static int compat_standard_to_user(void __user *dst, const void *src)
 {
 	compat_int_t cv = *(int *)src;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2057b1bb6178..2a4f745ce36e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1047,7 +1047,7 @@ copy_entries_to_user(unsigned int total_size,
 }
 
 #ifdef CONFIG_COMPAT
-static void compat_standard_from_user(void *dst, void *src)
+static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
 
@@ -1056,7 +1056,7 @@ static void compat_standard_from_user(void *dst, void *src)
 	memcpy(dst, &v, sizeof(v));
 }
 
-static int compat_standard_to_user(void __user *dst, void *src)
+static int compat_standard_to_user(void __user *dst, const void *src)
 {
 	compat_int_t cv = *(int *)src;
 
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 399061c3fd7d..09a5d3f7cc41 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -338,7 +338,7 @@ struct compat_ipt_ulog_info {
 	char		prefix[ULOG_PREFIX_LEN];
 };
 
-static void ulog_tg_compat_from_user(void *dst, void *src)
+static void ulog_tg_compat_from_user(void *dst, const void *src)
 {
 	const struct compat_ipt_ulog_info *cl = src;
 	struct ipt_ulog_info l = {
@@ -351,7 +351,7 @@ static void ulog_tg_compat_from_user(void *dst, void *src)
 	memcpy(dst, &l, sizeof(l));
 }
 
-static int ulog_tg_compat_to_user(void __user *dst, void *src)
+static int ulog_tg_compat_to_user(void __user *dst, const void *src)
 {
 	const struct ipt_ulog_info *l = src;
 	struct compat_ipt_ulog_info cl = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index dcd7825fe7b6..3ff4fd50e96e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1079,7 +1079,7 @@ copy_entries_to_user(unsigned int total_size,
 }
 
 #ifdef CONFIG_COMPAT
-static void compat_standard_from_user(void *dst, void *src)
+static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
 
@@ -1088,7 +1088,7 @@ static void compat_standard_from_user(void *dst, void *src)
 	memcpy(dst, &v, sizeof(v));
 }
 
-static int compat_standard_to_user(void __user *dst, void *src)
+static int compat_standard_to_user(void __user *dst, const void *src)
 {
 	compat_int_t cv = *(int *)src;
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 12503199826f..69c56287d518 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -485,8 +485,8 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
-int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
-			    unsigned int *size)
+int xt_compat_match_to_user(const struct xt_entry_match *m,
+			    void __user **dstptr, unsigned int *size)
 {
 	const struct xt_match *match = m->u.kernel.match;
 	struct compat_xt_entry_match __user *cm = *dstptr;
@@ -588,8 +588,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
 
-int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
-			     unsigned int *size)
+int xt_compat_target_to_user(const struct xt_entry_target *t,
+			     void __user **dstptr, unsigned int *size)
 {
 	const struct xt_target *target = t->u.kernel.target;
 	struct compat_xt_entry_target __user *ct = *dstptr;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 017c95966aa8..e47fb805ffb4 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -775,7 +775,7 @@ struct compat_xt_hashlimit_info {
 	compat_uptr_t master;
 };
 
-static void hashlimit_mt_compat_from_user(void *dst, void *src)
+static void hashlimit_mt_compat_from_user(void *dst, const void *src)
 {
 	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 
@@ -783,7 +783,7 @@ static void hashlimit_mt_compat_from_user(void *dst, void *src)
 	memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
 }
 
-static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
+static int hashlimit_mt_compat_to_user(void __user *dst, const void *src)
 {
 	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 2773be6a71dd..a0ca5339af41 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -148,7 +148,7 @@ struct compat_xt_rateinfo {
 
 /* To keep the full "prev" timestamp, the upper 32 bits are stored in the
  * master pointer, which does not need to be preserved. */
-static void limit_mt_compat_from_user(void *dst, void *src)
+static void limit_mt_compat_from_user(void *dst, const void *src)
 {
 	const struct compat_xt_rateinfo *cm = src;
 	struct xt_rateinfo m = {
@@ -162,7 +162,7 @@ static void limit_mt_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int limit_mt_compat_to_user(void __user *dst, void *src)
+static int limit_mt_compat_to_user(void __user *dst, const void *src)
 {
 	const struct xt_rateinfo *m = src;
 	struct compat_xt_rateinfo cm = {
-- 
cgit v1.2.3


From 5d0aa2ccd4699a01cfdf14886191c249d7b45a01 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 15 Feb 2010 18:13:33 +0100
Subject: netfilter: nf_conntrack: add support for "conntrack zones"

Normally, each connection needs a unique identity. Conntrack zones allow
to specify a numerical zone using the CT target, connections in different
zones can use the same identity.

Example:

iptables -t raw -A PREROUTING -i veth0 -j CT --zone 1
iptables -t raw -A OUTPUT -o veth1 -j CT --zone 1

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_CT.h                |   2 +-
 include/net/ip.h                               |   3 +
 include/net/ipv6.h                             |   3 +
 include/net/netfilter/nf_conntrack.h           |   5 +-
 include/net/netfilter/nf_conntrack_core.h      |   3 +-
 include/net/netfilter/nf_conntrack_expect.h    |   9 +-
 include/net/netfilter/nf_conntrack_extend.h    |   2 +
 include/net/netfilter/nf_conntrack_zones.h     |  23 ++++++
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |   3 +-
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |   8 +-
 net/ipv4/netfilter/nf_defrag_ipv4.c            |  12 ++-
 net/ipv4/netfilter/nf_nat_core.c               |  24 +++---
 net/ipv4/netfilter/nf_nat_pptp.c               |   3 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  12 ++-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |   8 +-
 net/netfilter/Kconfig                          |  13 +++
 net/netfilter/nf_conntrack_core.c              | 109 +++++++++++++++++++------
 net/netfilter/nf_conntrack_expect.c            |  21 +++--
 net/netfilter/nf_conntrack_h323_main.c         |   3 +-
 net/netfilter/nf_conntrack_netlink.c           |  20 ++---
 net/netfilter/nf_conntrack_pptp.c              |  14 ++--
 net/netfilter/nf_conntrack_sip.c               |   3 +-
 net/netfilter/nf_conntrack_standalone.c        |   6 ++
 net/netfilter/xt_CT.c                          |   8 +-
 net/netfilter/xt_connlimit.c                   |   4 +-
 25 files changed, 236 insertions(+), 85 deletions(-)
 create mode 100644 include/net/netfilter/nf_conntrack_zones.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
index 7fd0effe1316..1b564106891d 100644
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -5,7 +5,7 @@
 
 struct xt_ct_target_info {
 	u_int16_t	flags;
-	u_int16_t	__unused;
+	u_int16_t	zone;
 	u_int32_t	ct_events;
 	u_int32_t	exp_events;
 	char		helper[16];
diff --git a/include/net/ip.h b/include/net/ip.h
index fb63371c07a8..7bc47873e3fc 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -352,8 +352,11 @@ enum ip_defrag_users {
 	IP_DEFRAG_LOCAL_DELIVER,
 	IP_DEFRAG_CALL_RA_CHAIN,
 	IP_DEFRAG_CONNTRACK_IN,
+	__IP_DEFRAG_CONNTRACK_IN_END	= IP_DEFRAG_CONNTRACK_IN + USHORT_MAX,
 	IP_DEFRAG_CONNTRACK_OUT,
+	__IP_DEFRAG_CONNTRACK_OUT_END	= IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX,
 	IP_DEFRAG_CONNTRACK_BRIDGE_IN,
+	__IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX,
 	IP_DEFRAG_VS_IN,
 	IP_DEFRAG_VS_OUT,
 	IP_DEFRAG_VS_FWD
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 299bbf5adfb6..639ec53ea081 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -355,8 +355,11 @@ struct inet_frag_queue;
 enum ip6_defrag_users {
 	IP6_DEFRAG_LOCAL_DELIVER,
 	IP6_DEFRAG_CONNTRACK_IN,
+	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX,
 	IP6_DEFRAG_CONNTRACK_OUT,
+	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX,
 	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
+	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX,
 };
 
 struct ip6_create_arg {
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5b7d8835523f..bde095f7e845 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -198,7 +198,8 @@ extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int null
 extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
 
 extern struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
+__nf_conntrack_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple);
 
 extern void nf_conntrack_hash_insert(struct nf_conn *ct);
 extern void nf_ct_delete_from_lists(struct nf_conn *ct);
@@ -267,7 +268,7 @@ extern void
 nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data);
 extern void nf_conntrack_free(struct nf_conn *ct);
 extern struct nf_conn *
-nf_conntrack_alloc(struct net *net,
+nf_conntrack_alloc(struct net *net, u16 zone,
 		   const struct nf_conntrack_tuple *orig,
 		   const struct nf_conntrack_tuple *repl,
 		   gfp_t gfp);
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 5a449b44ba33..dffde8e6920e 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -49,7 +49,8 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 
 /* Find a connection corresponding to a tuple. */
 extern struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
+nf_conntrack_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple);
 
 extern int __nf_conntrack_confirm(struct sk_buff *skb);
 
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 4b47ec19ef39..11e815084fcf 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -74,13 +74,16 @@ int nf_conntrack_expect_init(struct net *net);
 void nf_conntrack_expect_fini(struct net *net);
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple);
+__nf_ct_expect_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
+nf_ct_expect_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple);
 
 struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple);
+nf_ct_find_expectation(struct net *net, u16 zone,
+		       const struct nf_conntrack_tuple *tuple);
 
 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
 void nf_ct_remove_expectations(struct nf_conn *ct);
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index e192dc17c583..2d2a1f9a61d8 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -8,6 +8,7 @@ enum nf_ct_ext_id {
 	NF_CT_EXT_NAT,
 	NF_CT_EXT_ACCT,
 	NF_CT_EXT_ECACHE,
+	NF_CT_EXT_ZONE,
 	NF_CT_EXT_NUM,
 };
 
@@ -15,6 +16,7 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
+#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
new file mode 100644
index 000000000000..0bbb2bd51e89
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -0,0 +1,23 @@
+#ifndef _NF_CONNTRACK_ZONES_H
+#define _NF_CONNTRACK_ZONES_H
+
+#include <net/netfilter/nf_conntrack_extend.h>
+
+#define NF_CT_DEFAULT_ZONE	0
+
+struct nf_conntrack_zone {
+	u16	id;
+};
+
+static inline u16 nf_ct_zone(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	struct nf_conntrack_zone *nf_ct_zone;
+	nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
+	if (nf_ct_zone)
+		return nf_ct_zone->id;
+#endif
+	return NF_CT_DEFAULT_ZONE;
+}
+
+#endif /* _NF_CONNTRACK_ZONES_H */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d1ea38a7c490..2bb1f87051c4 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -22,6 +22,7 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/nf_nat_helper.h>
@@ -266,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 		return -EINVAL;
 	}
 
-	h = nf_conntrack_find_get(sock_net(sk), &tuple);
+	h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
 	if (h) {
 		struct sockaddr_in sin;
 		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 327826a968a8..7404bde95994 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -18,6 +18,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_log.h>
 
 static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
@@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
 
 /* Returns conntrack if it dealt with ICMP, and filled in skb fields */
 static int
-icmp_error_message(struct net *net, struct sk_buff *skb,
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 		 enum ip_conntrack_info *ctinfo,
 		 unsigned int hooknum)
 {
 	struct nf_conntrack_tuple innertuple, origtuple;
 	const struct nf_conntrack_l4proto *innerproto;
 	const struct nf_conntrack_tuple_hash *h;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	NF_CT_ASSERT(skb->nfct == NULL);
 
@@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(net, &innertuple);
+	h = nf_conntrack_find_get(net, zone, &innertuple);
 	if (!h) {
 		pr_debug("icmp_error_message: no match\n");
 		return -NF_ACCEPT;
@@ -209,7 +211,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
 	    icmph->type != ICMP_REDIRECT)
 		return NF_ACCEPT;
 
-	return icmp_error_message(net, skb, ctinfo, hooknum);
+	return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index f6f46686cbc0..d498a704d456 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -16,6 +16,7 @@
 
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 #include <net/netfilter/nf_conntrack.h>
 
@@ -39,15 +40,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 					      struct sk_buff *skb)
 {
+	u16 zone = NF_CT_DEFAULT_ZONE;
+
+	if (skb->nfct)
+		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge &&
 	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
-		return IP_DEFRAG_CONNTRACK_BRIDGE_IN;
+		return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
 #endif
 	if (hooknum == NF_INET_PRE_ROUTING)
-		return IP_DEFRAG_CONNTRACK_IN;
+		return IP_DEFRAG_CONNTRACK_IN + zone;
 	else
-		return IP_DEFRAG_CONNTRACK_OUT;
+		return IP_DEFRAG_CONNTRACK_OUT + zone;
 }
 
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 26066a2327ad..4595281c2863 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -30,6 +30,7 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 static DEFINE_SPINLOCK(nf_nat_lock);
 
@@ -69,13 +70,14 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, u16 zone,
+	    const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
 
 	/* Original src, to ensure we map it consistently if poss. */
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
-			    (__force u32)tuple->src.u.all,
+			    (__force u32)tuple->src.u.all ^ zone,
 			    tuple->dst.protonum, 0);
 	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
@@ -139,12 +141,12 @@ same_src(const struct nf_conn *ct,
 
 /* Only called for SRC manip */
 static int
-find_appropriate_src(struct net *net,
+find_appropriate_src(struct net *net, u16 zone,
 		     const struct nf_conntrack_tuple *tuple,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(net, tuple);
+	unsigned int h = hash_by_src(net, zone, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
 	const struct hlist_node *n;
@@ -152,7 +154,7 @@ find_appropriate_src(struct net *net,
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
 		ct = nat->ct;
-		if (same_src(ct, tuple)) {
+		if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
 			/* Copy source part from reply tuple. */
 			nf_ct_invert_tuplepr(result,
 				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -175,7 +177,7 @@ find_appropriate_src(struct net *net,
    the ip with the lowest src-ip/dst-ip/proto usage.
 */
 static void
-find_best_ips_proto(struct nf_conntrack_tuple *tuple,
+find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 		    const struct nf_nat_range *range,
 		    const struct nf_conn *ct,
 		    enum nf_nat_manip_type maniptype)
@@ -209,7 +211,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
 	maxip = ntohl(range->max_ip);
 	j = jhash_2words((__force u32)tuple->src.u3.ip,
 			 range->flags & IP_NAT_RANGE_PERSISTENT ?
-				0 : (__force u32)tuple->dst.u3.ip, 0);
+				0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
 	j = ((u64)j * (maxip - minip + 1)) >> 32;
 	*var_ipp = htonl(minip + j);
 }
@@ -229,6 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 {
 	struct net *net = nf_ct_net(ct);
 	const struct nf_nat_protocol *proto;
+	u16 zone = nf_ct_zone(ct);
 
 	/* 1) If this srcip/proto/src-proto-part is currently mapped,
 	   and that same mapping gives a unique tuple within the given
@@ -239,7 +242,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   manips not an issue.  */
 	if (maniptype == IP_NAT_MANIP_SRC &&
 	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-		if (find_appropriate_src(net, orig_tuple, tuple, range)) {
+		if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
 			pr_debug("get_unique_tuple: Found current src map\n");
 			if (!nf_nat_used_tuple(tuple, ct))
 				return;
@@ -249,7 +252,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	/* 2) Select the least-used IP/proto combination in the given
 	   range. */
 	*tuple = *orig_tuple;
-	find_best_ips_proto(tuple, range, ct, maniptype);
+	find_best_ips_proto(zone, tuple, range, ct, maniptype);
 
 	/* 3) The per-protocol part of the manip is made to map into
 	   the range to make a unique tuple. */
@@ -327,7 +330,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (have_to_hash) {
 		unsigned int srchash;
 
-		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		srchash = hash_by_src(net, nf_ct_zone(ct),
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		spin_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 9eb171056c63..4c060038d29f 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -25,6 +25,7 @@
 #include <net/netfilter/nf_nat_rule.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
@@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 
 	pr_debug("trying to unexpect other dir: ");
 	nf_ct_dump_tuple_ip(&t);
-	other_exp = nf_ct_expect_find_get(net, &t);
+	other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
 	if (other_exp) {
 		nf_ct_unexpect_related(other_exp);
 		nf_ct_expect_put(other_exp);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 55ce22e5de49..996c3f41fecd 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -27,6 +27,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 #include <net/netfilter/nf_log.h>
 
@@ -191,15 +192,20 @@ out:
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 						struct sk_buff *skb)
 {
+	u16 zone = NF_CT_DEFAULT_ZONE;
+
+	if (skb->nfct)
+		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge &&
 	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
-		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN;
+		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
 #endif
 	if (hooknum == NF_INET_PRE_ROUTING)
-		return IP6_DEFRAG_CONNTRACK_IN;
+		return IP6_DEFRAG_CONNTRACK_IN + zone;
 	else
-		return IP6_DEFRAG_CONNTRACK_OUT;
+		return IP6_DEFRAG_CONNTRACK_OUT + zone;
 
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d772dc21857f..9be81776415e 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 #include <net/netfilter/nf_log.h>
 
@@ -128,7 +129,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 }
 
 static int
-icmpv6_error_message(struct net *net,
+icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 		     struct sk_buff *skb,
 		     unsigned int icmp6off,
 		     enum ip_conntrack_info *ctinfo,
@@ -137,6 +138,7 @@ icmpv6_error_message(struct net *net,
 	struct nf_conntrack_tuple intuple, origtuple;
 	const struct nf_conntrack_tuple_hash *h;
 	const struct nf_conntrack_l4proto *inproto;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	NF_CT_ASSERT(skb->nfct == NULL);
 
@@ -163,7 +165,7 @@ icmpv6_error_message(struct net *net,
 
 	*ctinfo = IP_CT_RELATED;
 
-	h = nf_conntrack_find_get(net, &intuple);
+	h = nf_conntrack_find_get(net, zone, &intuple);
 	if (!h) {
 		pr_debug("icmpv6_error: no match\n");
 		return -NF_ACCEPT;
@@ -216,7 +218,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
 
-	return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum);
+	return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum);
 }
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4469d45261f4..18d77b5c351a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -83,6 +83,19 @@ config NF_CONNTRACK_SECMARK
 
 	  If unsure, say 'N'.
 
+config NF_CONNTRACK_ZONES
+	bool  'Connection tracking zones'
+	depends on NETFILTER_ADVANCED
+	depends on NETFILTER_XT_TARGET_CT
+	help
+	  This option enables support for connection tracking zones.
+	  Normally, each connection needs to have a unique system wide
+	  identity. Connection tracking zones allow to have multiple
+	  connections using the same identity, as long as they are
+	  contained in different zones.
+
+	  If unsure, say `N'.
+
 config NF_CONNTRACK_EVENTS
 	bool "Connection tracking events"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 65351ed5d815..0c9bbe93cc16 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -42,6 +42,7 @@
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -68,7 +69,7 @@ static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
-				  unsigned int size, unsigned int rnd)
+				  u16 zone, unsigned int size, unsigned int rnd)
 {
 	unsigned int n;
 	u_int32_t h;
@@ -79,16 +80,16 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	 */
 	n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
 	h = jhash2((u32 *)tuple, n,
-		   rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
-			  tuple->dst.protonum));
+		   zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
+				 tuple->dst.protonum));
 
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct net *net,
+static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
 				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, net->ct.htable_size,
+	return __hash_conntrack(tuple, zone, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -292,11 +293,12 @@ static void death_by_timeout(unsigned long ul_conntrack)
  * - Caller must lock nf_conntrack_lock before calling this function
  */
 struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+__nf_conntrack_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(net, tuple);
+	unsigned int hash = hash_conntrack(net, zone, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -304,7 +306,8 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 	local_bh_disable();
 begin:
 	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
-		if (nf_ct_tuple_equal(tuple, &h->tuple)) {
+		if (nf_ct_tuple_equal(tuple, &h->tuple) &&
+		    nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
 			NF_CT_STAT_INC(net, found);
 			local_bh_enable();
 			return h;
@@ -326,21 +329,23 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find);
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_conntrack_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
 	rcu_read_lock();
 begin:
-	h = __nf_conntrack_find(net, tuple);
+	h = __nf_conntrack_find(net, zone, tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (unlikely(nf_ct_is_dying(ct) ||
 			     !atomic_inc_not_zero(&ct->ct_general.use)))
 			h = NULL;
 		else {
-			if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
+			if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
+				     nf_ct_zone(ct) != zone)) {
 				nf_ct_put(ct);
 				goto begin;
 			}
@@ -368,9 +373,11 @@ void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
+	u16 zone;
 
-	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	zone = nf_ct_zone(ct);
+	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -387,6 +394,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	struct hlist_nulls_node *n;
 	enum ip_conntrack_info ctinfo;
 	struct net *net;
+	u16 zone;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	net = nf_ct_net(ct);
@@ -398,8 +406,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	zone = nf_ct_zone(ct);
+	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -418,11 +427,13 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	   not in the hash.  If there is, we lost race. */
 	hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-				      &h->tuple))
+				      &h->tuple) &&
+		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
 			goto out;
 	hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-				      &h->tuple))
+				      &h->tuple) &&
+		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
 			goto out;
 
 	/* Remove from unconfirmed list */
@@ -469,15 +480,19 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(net, tuple);
+	struct nf_conn *ct;
+	u16 zone = nf_ct_zone(ignored_conntrack);
+	unsigned int hash = hash_conntrack(net, zone, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
 	 */
 	rcu_read_lock_bh();
 	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
-		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
-		    nf_ct_tuple_equal(tuple, &h->tuple)) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (ct != ignored_conntrack &&
+		    nf_ct_tuple_equal(tuple, &h->tuple) &&
+		    nf_ct_zone(ct) == zone) {
 			NF_CT_STAT_INC(net, found);
 			rcu_read_unlock_bh();
 			return 1;
@@ -540,7 +555,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	return dropped;
 }
 
-struct nf_conn *nf_conntrack_alloc(struct net *net,
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 				   const struct nf_conntrack_tuple *orig,
 				   const struct nf_conntrack_tuple *repl,
 				   gfp_t gfp)
@@ -558,7 +573,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(net, orig);
+		unsigned int hash = hash_conntrack(net, zone, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -595,13 +610,28 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 #ifdef CONFIG_NET_NS
 	ct->ct_net = net;
 #endif
-
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	if (zone) {
+		struct nf_conntrack_zone *nf_ct_zone;
+
+		nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC);
+		if (!nf_ct_zone)
+			goto out_free;
+		nf_ct_zone->id = zone;
+	}
+#endif
 	/*
 	 * changes to lookup keys must be done before setting refcnt to 1
 	 */
 	smp_wmb();
 	atomic_set(&ct->ct_general.use, 1);
 	return ct;
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+out_free:
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+	return ERR_PTR(-ENOMEM);
+#endif
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 
@@ -631,13 +661,14 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	struct nf_conntrack_tuple repl_tuple;
 	struct nf_conntrack_ecache *ecache;
 	struct nf_conntrack_expect *exp;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
 		pr_debug("Can't invert tuple.\n");
 		return NULL;
 	}
 
-	ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
 	if (IS_ERR(ct)) {
 		pr_debug("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)ct;
@@ -657,7 +688,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 			     GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = nf_ct_find_expectation(net, tuple);
+	exp = nf_ct_find_expectation(net, zone, tuple);
 	if (exp) {
 		pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
 			 ct, exp);
@@ -713,6 +744,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
 			     dataoff, l3num, protonum, &tuple, l3proto,
@@ -722,7 +754,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	}
 
 	/* look for tuple match */
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h) {
 		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
 				   skb, dataoff);
@@ -958,6 +990,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
 }
 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
+	.len	= sizeof(struct nf_conntrack_zone),
+	.align	= __alignof__(struct nf_conntrack_zone),
+	.id	= NF_CT_EXT_ZONE,
+};
+#endif
+
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 
 #include <linux/netfilter/nfnetlink.h>
@@ -1139,6 +1179,9 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	nf_ct_extend_unregister(&nf_ct_zone_extend);
+#endif
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1214,6 +1257,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	unsigned int hashsize, old_size;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
 
 	if (current->nsproxy->net_ns != &init_net)
 		return -EOPNOTSUPP;
@@ -1240,8 +1284,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
+			ct = nf_ct_tuplehash_to_ctrack(h);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize,
+			bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
+						  hashsize,
 						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
@@ -1299,6 +1345,11 @@ static int nf_conntrack_init_init_net(void)
 	if (ret < 0)
 		goto err_helper;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	ret = nf_ct_extend_register(&nf_ct_zone_extend);
+	if (ret < 0)
+		goto err_extend;
+#endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
 #ifdef CONFIG_NET_NS
 	nf_conntrack_untracked.ct_net = &init_net;
@@ -1309,6 +1360,10 @@ static int nf_conntrack_init_init_net(void)
 
 	return 0;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+err_extend:
+	nf_conntrack_helper_fini();
+#endif
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 6182fb1b55de..acb29ccaa41f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -27,6 +27,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 unsigned int nf_ct_expect_hsize __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
@@ -84,7 +85,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
 }
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 	struct hlist_node *n;
@@ -95,7 +97,8 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 
 	h = nf_ct_expect_dst_hash(tuple);
 	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
-		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
+		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+		    nf_ct_zone(i->master) == zone)
 			return i;
 	}
 	return NULL;
@@ -104,12 +107,13 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 
 /* Just find a expectation corresponding to a tuple. */
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 
 	rcu_read_lock();
-	i = __nf_ct_expect_find(net, tuple);
+	i = __nf_ct_expect_find(net, zone, tuple);
 	if (i && !atomic_inc_not_zero(&i->use))
 		i = NULL;
 	rcu_read_unlock();
@@ -121,7 +125,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, u16 zone,
+		       const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i, *exp = NULL;
 	struct hlist_node *n;
@@ -133,7 +138,8 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
 	h = nf_ct_expect_dst_hash(tuple);
 	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
-		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+		    nf_ct_zone(i->master) == zone) {
 			exp = i;
 			break;
 		}
@@ -204,7 +210,8 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
 {
 	return a->master == b->master && a->class == b->class &&
 		nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
-		nf_ct_tuple_mask_equal(&a->mask, &b->mask);
+		nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+		nf_ct_zone(a->master) == nf_ct_zone(b->master);
 }
 
 /* Generally a bad idea to call this: could have matched already. */
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 66369490230e..a1c8dd917e12 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -29,6 +29,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_h323.h>
 
 /* Parameters */
@@ -1216,7 +1217,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 	tuple.dst.u.tcp.port = port;
 	tuple.dst.protonum = IPPROTO_TCP;
 
-	exp = __nf_ct_expect_find(net, &tuple);
+	exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
 	if (exp && exp->master == ct)
 		return exp;
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index db35edac307b..51089cfe1167 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -811,7 +811,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, 0, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -872,7 +872,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, 0, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1221,7 +1221,7 @@ ctnetlink_create_conntrack(struct net *net,
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(net, otuple, rtuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
 
@@ -1325,7 +1325,7 @@ ctnetlink_create_conntrack(struct net *net,
 		if (err < 0)
 			goto err2;
 
-		master_h = nf_conntrack_find_get(net, &master);
+		master_h = nf_conntrack_find_get(net, 0, &master);
 		if (master_h == NULL) {
 			err = -ENOENT;
 			goto err2;
@@ -1374,9 +1374,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(net, &otuple);
+		h = __nf_conntrack_find(net, 0, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(net, &rtuple);
+		h = __nf_conntrack_find(net, 0, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
@@ -1714,7 +1714,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(net, &tuple);
+	exp = nf_ct_expect_find_get(net, 0, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1770,7 +1770,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(net, &tuple);
+		exp = nf_ct_expect_find_get(net, 0, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1855,7 +1855,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(net, &master_tuple);
+	h = nf_conntrack_find_get(net, 0, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1912,7 +1912,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(net, &tuple);
+	exp = __nf_ct_expect_find(net, 0, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 3807ac7faf4c..088944824e13 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
@@ -123,7 +124,7 @@ static void pptp_expectfn(struct nf_conn *ct,
 		pr_debug("trying to unexpect other dir: ");
 		nf_ct_dump_tuple(&inv_t);
 
-		exp_other = nf_ct_expect_find_get(net, &inv_t);
+		exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
 			pr_debug("found\n");
@@ -136,17 +137,18 @@ static void pptp_expectfn(struct nf_conn *ct,
 	rcu_read_unlock();
 }
 
-static int destroy_sibling_or_exp(struct net *net,
+static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
 				  const struct nf_conntrack_tuple *t)
 {
 	const struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
 	struct nf_conn *sibling;
+	u16 zone = nf_ct_zone(ct);
 
 	pr_debug("trying to timeout ct or exp for tuple ");
 	nf_ct_dump_tuple(t);
 
-	h = nf_conntrack_find_get(net, t);
+	h = nf_conntrack_find_get(net, zone, t);
 	if (h)  {
 		sibling = nf_ct_tuplehash_to_ctrack(h);
 		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
@@ -157,7 +159,7 @@ static int destroy_sibling_or_exp(struct net *net,
 		nf_ct_put(sibling);
 		return 1;
 	} else {
-		exp = nf_ct_expect_find_get(net, t);
+		exp = nf_ct_expect_find_get(net, zone, t);
 		if (exp) {
 			pr_debug("unexpect_related of expect %p\n", exp);
 			nf_ct_unexpect_related(exp);
@@ -182,7 +184,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
-	if (!destroy_sibling_or_exp(net, &t))
+	if (!destroy_sibling_or_exp(net, ct, &t))
 		pr_debug("failed to timeout original pns->pac ct/exp\n");
 
 	/* try reply (pac->pns) tuple */
@@ -190,7 +192,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
-	if (!destroy_sibling_or_exp(net, &t))
+	if (!destroy_sibling_or_exp(net, ct, &t))
 		pr_debug("failed to timeout reply pac->pns ct/exp\n");
 }
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index fbe8ff5a420a..8dd75d90efc0 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_sip.h>
 
 MODULE_LICENSE("GPL");
@@ -836,7 +837,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
 
 	rcu_read_lock();
 	do {
-		exp = __nf_ct_expect_find(net, &tuple);
+		exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
 
 		if (!exp || exp->master == ct ||
 		    nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index e310f1561bb2..24a42efe62ef 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -26,6 +26,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 MODULE_LICENSE("GPL");
 
@@ -171,6 +172,11 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		goto release;
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	if (seq_printf(s, "zone=%u ", nf_ct_zone(ct)))
+		goto release;
+#endif
+
 	if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
 		goto release;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8183a054256f..61c50fa84703 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -16,6 +16,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 static unsigned int xt_ct_target(struct sk_buff *skb,
 				 const struct xt_target_param *par)
@@ -69,11 +70,16 @@ static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
 		goto out;
 	}
 
+#ifndef CONFIG_NF_CONNTRACK_ZONES
+	if (info->zone)
+		goto err1;
+#endif
+
 	if (nf_ct_l3proto_try_module_get(par->family) < 0)
 		goto err1;
 
 	memset(&t, 0, sizeof(t));
-	ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL);
+	ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
 	if (IS_ERR(ct))
 		goto err2;
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 0d9d18ea2b09..26997ce90e48 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 /* we will save the tuples of all connections we care about */
 struct xt_connlimit_conn {
@@ -114,7 +115,8 @@ static int count_them(struct net *net,
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, tmp, hash, list) {
-		found    = nf_conntrack_find_get(net, &conn->tuple);
+		found    = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
+						 &conn->tuple);
 		found_ct = NULL;
 
 		if (found != NULL)
-- 
cgit v1.2.3


From ef00f89f1eb7e056aab9dfe068521e6f2320c94a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 15 Feb 2010 18:14:57 +0100
Subject: netfilter: ctnetlink: add zone support

Parse and dump the conntrack zone in ctnetlink.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h |  2 +
 net/netfilter/nf_conntrack_netlink.c          | 92 +++++++++++++++++++++------
 2 files changed, 75 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index ed4ef8d0b11b..9ed534c991b9 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -40,6 +40,7 @@ enum ctattr_type {
 	CTA_NAT_SEQ_ADJ_ORIG,
 	CTA_NAT_SEQ_ADJ_REPLY,
 	CTA_SECMARK,
+	CTA_ZONE,
 	__CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
@@ -159,6 +160,7 @@ enum ctattr_expect {
 	CTA_EXPECT_TIMEOUT,
 	CTA_EXPECT_ID,
 	CTA_EXPECT_HELP_NAME,
+	CTA_EXPECT_ZONE,
 	__CTA_EXPECT_MAX
 };
 #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 51089cfe1167..8b05f364b2f2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -39,6 +39,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -379,6 +380,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
+	if (nf_ct_zone(ct))
+		NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct)));
+
 	if (ctnetlink_dump_status(skb, ct) < 0 ||
 	    ctnetlink_dump_timeout(skb, ct) < 0 ||
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
@@ -517,6 +521,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
+	if (nf_ct_zone(ct))
+		NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct)));
+
 	if (ctnetlink_dump_id(skb, ct) < 0)
 		goto nla_put_failure;
 
@@ -750,6 +757,21 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	return 0;
 }
 
+static int
+ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
+{
+	if (attr)
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+		*zone = ntohs(nla_get_be16(attr));
+#else
+		return -EOPNOTSUPP;
+#endif
+	else
+		*zone = 0;
+
+	return 0;
+}
+
 static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
 	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING },
 };
@@ -781,6 +803,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_ID]		= { .type = NLA_U32 },
 	[CTA_NAT_DST]		= { .type = NLA_NESTED },
 	[CTA_TUPLE_MASTER]	= { .type = NLA_NESTED },
+	[CTA_ZONE]		= { .type = NLA_U16 },
 };
 
 static int
@@ -794,7 +817,12 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conn *ct;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
 
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
@@ -811,7 +839,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, 0, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -856,12 +884,17 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct sk_buff *skb2 = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP)
 		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
 					  ctnetlink_done);
 
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
 	else if (cda[CTA_TUPLE_REPLY])
@@ -872,7 +905,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, 0, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1211,7 +1244,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 }
 
 static struct nf_conn *
-ctnetlink_create_conntrack(struct net *net,
+ctnetlink_create_conntrack(struct net *net, u16 zone,
 			   const struct nlattr * const cda[],
 			   struct nf_conntrack_tuple *otuple,
 			   struct nf_conntrack_tuple *rtuple,
@@ -1221,7 +1254,7 @@ ctnetlink_create_conntrack(struct net *net,
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
 
@@ -1325,7 +1358,7 @@ ctnetlink_create_conntrack(struct net *net,
 		if (err < 0)
 			goto err2;
 
-		master_h = nf_conntrack_find_get(net, 0, &master);
+		master_h = nf_conntrack_find_get(net, zone, &master);
 		if (master_h == NULL) {
 			err = -ENOENT;
 			goto err2;
@@ -1358,7 +1391,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_tuple_hash *h = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
 
 	if (cda[CTA_TUPLE_ORIG]) {
 		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
@@ -1374,9 +1412,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(net, 0, &otuple);
+		h = __nf_conntrack_find(net, zone, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(net, 0, &rtuple);
+		h = __nf_conntrack_find(net, zone, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
@@ -1384,7 +1422,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			struct nf_conn *ct;
 			enum ip_conntrack_events events;
 
-			ct = ctnetlink_create_conntrack(net, cda, &otuple,
+			ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
 							&rtuple, u3);
 			if (IS_ERR(ct)) {
 				err = PTR_ERR(ct);
@@ -1698,7 +1736,8 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct sk_buff *skb2;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		return netlink_dump_start(ctnl, skb, nlh,
@@ -1706,6 +1745,10 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 					  ctnetlink_exp_done);
 	}
 
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	if (cda[CTA_EXPECT_MASTER])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
 	else
@@ -1714,7 +1757,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(net, 0, &tuple);
+	exp = nf_ct_expect_find_get(net, zone, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1761,16 +1804,21 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct hlist_node *n, *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	unsigned int i;
+	u16 zone;
 	int err;
 
 	if (cda[CTA_EXPECT_TUPLE]) {
 		/* delete a single expect by tuple */
+		err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+		if (err < 0)
+			return err;
+
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
 		if (err < 0)
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(net, 0, &tuple);
+		exp = nf_ct_expect_find_get(net, zone, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1832,7 +1880,8 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
 }
 
 static int
-ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
+ctnetlink_create_expect(struct net *net, u16 zone,
+			const struct nlattr * const cda[],
 			u_int8_t u3,
 			u32 pid, int report)
 {
@@ -1855,7 +1904,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(net, 0, &master_tuple);
+	h = nf_conntrack_find_get(net, zone, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1900,25 +1949,30 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_expect *exp;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (!cda[CTA_EXPECT_TUPLE]
 	    || !cda[CTA_EXPECT_MASK]
 	    || !cda[CTA_EXPECT_MASTER])
 		return -EINVAL;
 
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
 	if (err < 0)
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(net, 0, &tuple);
+	exp = __nf_ct_expect_find(net, zone, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
 		err = -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
-			err = ctnetlink_create_expect(net, cda,
+			err = ctnetlink_create_expect(net, zone, cda,
 						      u3,
 						      NETLINK_CB(skb).pid,
 						      nlmsg_report(nlh));
-- 
cgit v1.2.3


From 3e5e524ffb5fcf2447eb5dd9f8e54ad22dd9baa7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fwestphal@astaro.com>
Date: Mon, 15 Feb 2010 18:17:10 +0100
Subject: netfilter: CONFIG_COMPAT: allow delta to exceed 32767

with 32 bit userland and 64 bit kernels, it is unlikely but possible
that insertion of new rules fails even tough there are only about 2000
iptables rules.

This happens because the compat delta is using a short int.
Easily reproducible via "iptables -m limit" ; after about 2050
rules inserting new ones fails with -ELOOP.

Note that compat_delta included 2 bytes of padding on x86_64, so
structure size remains the same.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 2 +-
 net/netfilter/x_tables.c           | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c61758f4be31..a18119fb88f0 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -588,7 +588,7 @@ extern void xt_compat_unlock(u_int8_t af);
 
 extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta);
 extern void xt_compat_flush_offsets(u_int8_t af);
-extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset);
+extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
 
 extern int xt_compat_match_offset(const struct xt_match *match);
 extern int xt_compat_match_from_user(struct xt_entry_match *m,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 69c56287d518..0a12cedfe9e3 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 struct compat_delta {
 	struct compat_delta *next;
 	unsigned int offset;
-	short delta;
+	int delta;
 };
 
 struct xt_af {
@@ -439,10 +439,10 @@ void xt_compat_flush_offsets(u_int8_t af)
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
-short xt_compat_calc_jump(u_int8_t af, unsigned int offset)
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
 	struct compat_delta *tmp;
-	short delta;
+	int delta;
 
 	for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
 		if (tmp->offset < offset)
-- 
cgit v1.2.3


From 026331c4d9b526561ea96f95fac4bfc52b69e316 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Mon, 15 Feb 2010 12:53:10 +0200
Subject: cfg80211/mac80211: allow registering for and sending action frames

This implements a new command to register for action frames
that userspace wants to handle instead of the in-kernel
rejection. It is then responsible for rejecting ones that
it decided not to handle. There is no unregistration, but
the socket can be closed for that.

Frames that are not registered for will not be forwarded
to userspace and will be rejected by the kernel, the
cfg80211 API helps implementing that.

Additionally, this patch adds a new command that allows
doing action frame transmission from userspace. It can be
used either to exchange action frames on the current
operational channel (e.g., with the AP with which we are
currently associated) or to exchange off-channel Public
Action frames with the remain-on-channel command.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  41 ++++++-
 include/net/cfg80211.h     |  47 +++++++-
 include/net/mac80211.h     |   6 +-
 net/mac80211/cfg.c         |  12 ++-
 net/mac80211/ieee80211_i.h |   6 +-
 net/mac80211/mlme.c        |  35 ++++++
 net/mac80211/rx.c          |  42 ++++++--
 net/mac80211/status.c      |   7 +-
 net/wireless/core.c        |   4 +
 net/wireless/core.h        |   9 ++
 net/wireless/mlme.c        | 166 +++++++++++++++++++++++++++++
 net/wireless/nl80211.c     | 260 ++++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/nl80211.h     |   8 ++
 13 files changed, 625 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 127a73015760..8e6384f8fda6 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -3,7 +3,7 @@
 /*
  * 802.11 netlink interface public header
  *
- * Copyright 2006, 2007, 2008 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2008 Michael Wu <flamingice@sourmilk.net>
  * Copyright 2008 Luis Carlos Cobo <luisca@cozybit.com>
  * Copyright 2008 Michael Buesch <mb@bu3sch.de>
@@ -299,6 +299,31 @@
  *	rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface
  *	and @NL80211_ATTR_TX_RATES the set of allowed rates.
  *
+ * @NL80211_CMD_REGISTER_ACTION: Register for receiving certain action frames
+ *	(via @NL80211_CMD_ACTION) for processing in userspace. This command
+ *	requires an interface index and a match attribute containing the first
+ *	few bytes of the frame that should match, e.g. a single byte for only
+ *	a category match or four bytes for vendor frames including the OUI.
+ *	The registration cannot be dropped, but is removed automatically
+ *	when the netlink socket is closed. Multiple registrations can be made.
+ * @NL80211_CMD_ACTION: Action frame TX request and RX notification. This
+ *	command is used both as a request to transmit an Action frame and as an
+ *	event indicating reception of an Action frame that was not processed in
+ *	kernel code, but is for us (i.e., which may need to be processed in a
+ *	user space application). %NL80211_ATTR_FRAME is used to specify the
+ *	frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and
+ *	optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on
+ *	which channel the frame is to be transmitted or was received. This
+ *	channel has to be the current channel (remain-on-channel or the
+ *	operational channel). When called, this operation returns a cookie
+ *	(%NL80211_ATTR_COOKIE) that will be included with the TX status event
+ *	pertaining to the TX request.
+ * @NL80211_CMD_ACTION_TX_STATUS: Report TX status of an Action frame
+ *	transmitted with %NL80211_CMD_ACTION. %NL80211_ATTR_COOKIE identifies
+ *	the TX command and %NL80211_ATTR_FRAME includes the contents of the
+ *	frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged
+ *	the frame.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -387,6 +412,10 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_TX_BITRATE_MASK,
 
+	NL80211_CMD_REGISTER_ACTION,
+	NL80211_CMD_ACTION,
+	NL80211_CMD_ACTION_TX_STATUS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -653,6 +682,12 @@ enum nl80211_commands {
  *	rates based on negotiated supported rates information. This attribute
  *	is used with %NL80211_CMD_SET_TX_BITRATE_MASK.
  *
+ * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain
+ *	at least one byte, currently used with @NL80211_CMD_REGISTER_ACTION.
+ *
+ * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was
+ *	acknowledged by the recipient.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -798,6 +833,10 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_TX_RATES,
 
+	NL80211_ATTR_FRAME_MATCH,
+
+	NL80211_ATTR_ACK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5b3569b2a74c..7188934b64d3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3,7 +3,7 @@
 /*
  * 802.11 device and configuration interface
  *
- * Copyright 2006-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -998,6 +998,7 @@ struct cfg80211_pmksa {
  * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation.
  *	This allows the operation to be terminated prior to timeout based on
  *	the duration value.
+ * @action: Transmit an action frame
  *
  * @testmode_cmd: run a test mode command
  *
@@ -1144,6 +1145,11 @@ struct cfg80211_ops {
 					    struct net_device *dev,
 					    u64 cookie);
 
+	int	(*action)(struct wiphy *wiphy, struct net_device *dev,
+			  struct ieee80211_channel *chan,
+			  enum nl80211_channel_type channel_type,
+			  const u8 *buf, size_t len, u64 *cookie);
+
 	/* some temporary stuff to finish wext */
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
@@ -1445,6 +1451,8 @@ struct cfg80211_cached_keys;
  *	set by driver (if supported) on add_interface BEFORE registering the
  *	netdev and may otherwise be used by driver read-only, will be update
  *	by cfg80211 on change_interface
+ * @action_registrations: list of registrations for action frames
+ * @action_registrations_lock: lock for the list
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
@@ -1454,6 +1462,9 @@ struct wireless_dev {
 	struct list_head list;
 	struct net_device *netdev;
 
+	struct list_head action_registrations;
+	spinlock_t action_registrations_lock;
+
 	struct mutex mtx;
 
 	struct work_struct cleanup_work;
@@ -2291,4 +2302,38 @@ void cfg80211_remain_on_channel_expired(struct net_device *dev,
 void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 		      struct station_info *sinfo, gfp_t gfp);
 
+/**
+ * cfg80211_rx_action - notification of received, unprocessed Action frame
+ * @dev: network device
+ * @freq: Frequency on which the frame was received in MHz
+ * @buf: Action frame (header + body)
+ * @len: length of the frame data
+ * @gfp: context flags
+ * Returns %true if a user space application is responsible for rejecting the
+ *	unrecognized Action frame; %false if no such application is registered
+ *	(i.e., the driver is responsible for rejecting the unrecognized Action
+ *	frame)
+ *
+ * This function is called whenever an Action frame is received for a station
+ * mode interface, but is not processed in kernel.
+ */
+bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
+			size_t len, gfp_t gfp);
+
+/**
+ * cfg80211_action_tx_status - notification of TX status for Action frame
+ * @dev: network device
+ * @cookie: Cookie returned by cfg80211_ops::action()
+ * @buf: Action frame (header + body)
+ * @len: length of the frame data
+ * @ack: Whether frame was acknowledged
+ * @gfp: context flags
+ *
+ * This function is called whenever an Action frame was requested to be
+ * transmitted with cfg80211_ops::action() to report the TX status of the
+ * transmission attempt.
+ */
+void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
+			       const u8 *buf, size_t len, bool ack, gfp_t gfp);
+
 #endif /* __NET_CFG80211_H */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 314e98173166..80eb7cc42ce9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3,7 +3,7 @@
  *
  * Copyright 2002-2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
- * Copyright 2007-2008	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -264,6 +264,9 @@ struct ieee80211_bss_conf {
  *	be modified again (no seqno assignment, crypto, etc.)
  * @IEEE80211_TX_INTFL_HAS_RADIOTAP: This frame was injected and still
  *	has a radiotap header at skb->data.
+ * @IEEE80211_TX_INTFL_NL80211_FRAME_TX: Frame was requested through nl80211
+ *	MLME command (internal to mac80211 to figure out whether to send TX
+ *	status to user space)
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -286,6 +289,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_MORE_FRAMES		= BIT(18),
 	IEEE80211_TX_INTFL_RETRANSMISSION	= BIT(19),
 	IEEE80211_TX_INTFL_HAS_RADIOTAP		= BIT(20),
+	IEEE80211_TX_INTFL_NL80211_FRAME_TX	= BIT(21),
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e1731b7c2523..b7116ef84a3b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1,7 +1,7 @@
 /*
  * mac80211 configuration hooks for cfg80211
  *
- * Copyright 2006, 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This file is GPLv2 as found in COPYING.
  */
@@ -1448,6 +1448,15 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 	return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
 }
 
+static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
+			    struct ieee80211_channel *chan,
+			    enum nl80211_channel_type channel_type,
+			    const u8 *buf, size_t len, u64 *cookie)
+{
+	return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan,
+				    channel_type, buf, len, cookie);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1496,4 +1505,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.set_bitrate_mask = ieee80211_set_bitrate_mask,
 	.remain_on_channel = ieee80211_remain_on_channel,
 	.cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
+	.action = ieee80211_action,
 };
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9dd98b674cbc..241533e1bc03 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2,7 +2,7 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
- * Copyright 2007-2008	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -966,6 +966,10 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			   struct cfg80211_disassoc_request *req,
 			   void *cookie);
+int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_channel_type channel_type,
+			 const u8 *buf, size_t len, u64 *cookie);
 ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 					  struct sk_buff *skb);
 void ieee80211_send_pspoll(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bfc4a5070013..41812a15eea0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2084,3 +2084,38 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	return 0;
 }
+
+int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_channel_type channel_type,
+			 const u8 *buf, size_t len, u64 *cookie)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct sk_buff *skb;
+
+	/* Check that we are on the requested channel for transmission */
+	if ((chan != local->tmp_channel ||
+	     channel_type != local->tmp_channel_type) &&
+	    (chan != local->oper_channel ||
+	     channel_type != local->oper_channel_type))
+		return -EBUSY;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	memcpy(skb_put(skb, len), buf, len);
+
+	if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED))
+		IEEE80211_SKB_CB(skb)->flags |=
+			IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+		IEEE80211_TX_CTL_REQ_TX_STATUS;
+	skb->dev = sdata->dev;
+	ieee80211_tx_skb(sdata, skb);
+
+	*cookie = (unsigned long) skb;
+	return 0;
+}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a177472adc13..a6080d8d72bb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1856,28 +1856,25 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	struct ieee80211_sub_if_data *sdata = rx->sdata;
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
 	struct sk_buff *nskb;
+	struct ieee80211_rx_status *status;
 	int len = rx->skb->len;
 
 	if (!ieee80211_is_action(mgmt->frame_control))
 		return RX_CONTINUE;
 
-	if (!rx->sta)
+	/* drop too small frames */
+	if (len < IEEE80211_MIN_ACTION_SIZE)
 		return RX_DROP_UNUSABLE;
 
-	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
+	if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC)
 		return RX_DROP_UNUSABLE;
 
-	if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
+	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_DROP_UNUSABLE;
 
-	/* drop too small frames */
-	if (len < IEEE80211_MIN_ACTION_SIZE)
+	if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
 		return RX_DROP_UNUSABLE;
 
-	/* return action frames that have *only* category */
-	if (len < IEEE80211_MIN_ACTION_SIZE + 1)
-		goto return_frame;
-
 	switch (mgmt->u.action.category) {
 	case WLAN_CATEGORY_BACK:
 		/*
@@ -1891,6 +1888,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		    sdata->vif.type != NL80211_IFTYPE_AP)
 			break;
 
+		/* verify action_code is present */
+		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+			break;
+
 		switch (mgmt->u.action.u.addba_req.action_code) {
 		case WLAN_ACTION_ADDBA_REQ:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -1919,6 +1920,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		if (sdata->vif.type != NL80211_IFTYPE_STATION)
 			break;
 
+		/* verify action_code is present */
+		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+			break;
+
 		switch (mgmt->u.action.u.measurement.action_code) {
 		case WLAN_ACTION_SPCT_MSR_REQ:
 			if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -1954,7 +1959,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		}
 		break;
 	}
- return_frame:
+
 	/*
 	 * For AP mode, hostapd is responsible for handling any action
 	 * frames that we didn't handle, including returning unknown
@@ -1966,6 +1971,20 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		return RX_DROP_MONITOR;
 
+	/*
+	 * Getting here means the kernel doesn't know how to handle
+	 * it, but maybe userspace does ... include returned frames
+	 * so userspace can register for those to know whether ones
+	 * it transmitted were processed or returned.
+	 */
+	status = IEEE80211_SKB_RXCB(rx->skb);
+
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    cfg80211_rx_action(rx->sdata->dev, status->freq,
+			       rx->skb->data, rx->skb->len,
+			       GFP_ATOMIC))
+		goto handled;
+
 	/* do not return rejected action frames */
 	if (mgmt->u.action.category & 0x80)
 		return RX_DROP_UNUSABLE;
@@ -1985,7 +2004,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	}
 
  handled:
-	rx->sta->rx_packets++;
+	if (rx->sta)
+		rx->sta->rx_packets++;
 	dev_kfree_skb(rx->skb);
 	return RX_QUEUED;
 }
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index ded98730c111..56d5b9a6ec5b 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -2,7 +2,7 @@
  * Copyright 2002-2005, Instant802 Networks, Inc.
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
- * Copyright 2008-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2008-2010	Johannes Berg <johannes@sipsolutions.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -288,6 +288,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 					msecs_to_jiffies(10));
 	}
 
+	if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
+		cfg80211_action_tx_status(
+			skb->dev, (unsigned long) skb, skb->data, skb->len,
+			!!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
+
 	/* this was a transmitted frame, but now we want to reuse it */
 	skb_orphan(skb);
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 71b6b3a9cf1f..51908dc2ea00 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -677,6 +677,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
 		INIT_LIST_HEAD(&wdev->event_list);
 		spin_lock_init(&wdev->event_lock);
+		INIT_LIST_HEAD(&wdev->action_registrations);
+		spin_lock_init(&wdev->action_registrations_lock);
+
 		mutex_lock(&rdev->devlist_mtx);
 		list_add_rcu(&wdev->list, &rdev->netdev_list);
 		rdev->devlist_generation++;
@@ -792,6 +795,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
 			list_del_rcu(&wdev->list);
 			rdev->devlist_generation++;
+			cfg80211_mlme_purge_actions(wdev);
 #ifdef CONFIG_CFG80211_WEXT
 			kfree(wdev->wext.keys);
 #endif
diff --git a/net/wireless/core.h b/net/wireless/core.h
index c326a667022a..d52da913145a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -329,6 +329,15 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			       const u8 *resp_ie, size_t resp_ie_len,
 			       u16 status, bool wextev,
 			       struct cfg80211_bss *bss);
+int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
+				  const u8 *match_data, int match_len);
+void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid);
+void cfg80211_mlme_purge_actions(struct wireless_dev *wdev);
+int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_channel_type channel_type,
+			 const u8 *buf, size_t len, u64 *cookie);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 94d151f6f73e..62bc8855e123 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -728,3 +728,169 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 	nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp);
 }
 EXPORT_SYMBOL(cfg80211_new_sta);
+
+struct cfg80211_action_registration {
+	struct list_head list;
+
+	u32 nlpid;
+
+	int match_len;
+
+	u8 match[];
+};
+
+int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
+				  const u8 *match_data, int match_len)
+{
+	struct cfg80211_action_registration *reg, *nreg;
+	int err = 0;
+
+	nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL);
+	if (!nreg)
+		return -ENOMEM;
+
+	spin_lock_bh(&wdev->action_registrations_lock);
+
+	list_for_each_entry(reg, &wdev->action_registrations, list) {
+		int mlen = min(match_len, reg->match_len);
+
+		if (memcmp(reg->match, match_data, mlen) == 0) {
+			err = -EALREADY;
+			break;
+		}
+	}
+
+	if (err) {
+		kfree(nreg);
+		goto out;
+	}
+
+	memcpy(nreg->match, match_data, match_len);
+	nreg->match_len = match_len;
+	nreg->nlpid = snd_pid;
+	list_add(&nreg->list, &wdev->action_registrations);
+
+ out:
+	spin_unlock_bh(&wdev->action_registrations_lock);
+	return err;
+}
+
+void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid)
+{
+	struct cfg80211_action_registration *reg, *tmp;
+
+	spin_lock_bh(&wdev->action_registrations_lock);
+
+	list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) {
+		if (reg->nlpid == nlpid) {
+			list_del(&reg->list);
+			kfree(reg);
+		}
+	}
+
+	spin_unlock_bh(&wdev->action_registrations_lock);
+}
+
+void cfg80211_mlme_purge_actions(struct wireless_dev *wdev)
+{
+	struct cfg80211_action_registration *reg, *tmp;
+
+	spin_lock_bh(&wdev->action_registrations_lock);
+
+	list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) {
+		list_del(&reg->list);
+		kfree(reg);
+	}
+
+	spin_unlock_bh(&wdev->action_registrations_lock);
+}
+
+int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_channel_type channel_type,
+			 const u8 *buf, size_t len, u64 *cookie)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const struct ieee80211_mgmt *mgmt;
+
+	if (rdev->ops->action == NULL)
+		return -EOPNOTSUPP;
+	if (len < 24 + 1)
+		return -EINVAL;
+
+	mgmt = (const struct ieee80211_mgmt *) buf;
+	if (!ieee80211_is_action(mgmt->frame_control))
+		return -EINVAL;
+	if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
+		/* Verify that we are associated with the destination AP */
+		if (!wdev->current_bss ||
+		    memcmp(wdev->current_bss->pub.bssid, mgmt->bssid,
+			   ETH_ALEN) != 0 ||
+		    memcmp(wdev->current_bss->pub.bssid, mgmt->da,
+			   ETH_ALEN) != 0)
+			return -ENOTCONN;
+	}
+
+	if (memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0)
+		return -EINVAL;
+
+	/* Transmit the Action frame as requested by user space */
+	return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type,
+				 buf, len, cookie);
+}
+
+bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
+			size_t len, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct cfg80211_action_registration *reg;
+	const u8 *action_data;
+	int action_data_len;
+	bool result = false;
+
+	/* frame length - min size excluding category */
+	action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1);
+
+	/* action data starts with category */
+	action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1;
+
+	spin_lock_bh(&wdev->action_registrations_lock);
+
+	list_for_each_entry(reg, &wdev->action_registrations, list) {
+		if (reg->match_len > action_data_len)
+			continue;
+
+		if (memcmp(reg->match, action_data, reg->match_len))
+			continue;
+
+		/* found match! */
+
+		/* Indicate the received Action frame to user space */
+		if (nl80211_send_action(rdev, dev, reg->nlpid, freq,
+					buf, len, gfp))
+			continue;
+
+		result = true;
+		break;
+	}
+
+	spin_unlock_bh(&wdev->action_registrations_lock);
+
+	return result;
+}
+EXPORT_SYMBOL(cfg80211_rx_action);
+
+void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
+			       const u8 *buf, size_t len, bool ack, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	/* Indicate TX status of the Action frame to user space */
+	nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
+}
+EXPORT_SYMBOL(cfg80211_action_tx_status);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a95ab9e4c19e..328112081358 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1,7 +1,7 @@
 /*
  * This is the new netlink-based wireless configuration interface.
  *
- * Copyright 2006-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  */
 
 #include <linux/if.h>
@@ -145,6 +145,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
 	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
 	[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
+	[NL80211_ATTR_FRAME] = { .type = NLA_BINARY,
+				 .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
 };
 
 /* policy for the attributes */
@@ -577,6 +580,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(flush_pmksa, FLUSH_PMKSA);
 	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
 	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
+	CMD(action, ACTION);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -4526,6 +4530,139 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
 	return err;
 }
 
+static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_FRAME_MATCH])
+		return -EINVAL;
+
+	if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* not much point in registering if we can't reply */
+	if (!rdev->ops->action) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid,
+			nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
+			nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
+	struct ieee80211_channel *chan;
+	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	u32 freq;
+	int err;
+	void *hdr;
+	u64 cookie;
+	struct sk_buff *msg;
+
+	if (!info->attrs[NL80211_ATTR_FRAME] ||
+	    !info->attrs[NL80211_ATTR_WIPHY_FREQ])
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->action) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		channel_type = nla_get_u32(
+			info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40PLUS &&
+		    channel_type != NL80211_CHAN_HT40MINUS)
+			err = -EINVAL;
+			goto out;
+	}
+
+	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+	chan = rdev_freq_to_chan(rdev, freq, channel_type);
+	if (chan == NULL) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_ACTION);
+
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto free_msg;
+	}
+	err = cfg80211_mlme_action(rdev, dev, chan, channel_type,
+				   nla_data(info->attrs[NL80211_ATTR_FRAME]),
+				   nla_len(info->attrs[NL80211_ATTR_FRAME]),
+				   &cookie);
+	if (err)
+		goto free_msg;
+
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+
+	genlmsg_end(msg, hdr);
+	err = genlmsg_reply(msg, info);
+	goto out;
+
+ nla_put_failure:
+	err = -ENOBUFS;
+ free_msg:
+	nlmsg_free(msg);
+ out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4806,6 +4943,18 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_REGISTER_ACTION,
+		.doit = nl80211_register_action,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_ACTION,
+		.doit = nl80211_action,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -5478,6 +5627,110 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
 				nl80211_mlme_mcgrp.id, gfp);
 }
 
+int nl80211_send_action(struct cfg80211_registered_device *rdev,
+			struct net_device *netdev, u32 nlpid,
+			int freq, const u8 *buf, size_t len, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return -ENOMEM;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, freq);
+	NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	err = genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	if (err < 0)
+		return err;
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
+				   struct net_device *netdev, u64 cookie,
+				   const u8 *buf, size_t len, bool ack,
+				   gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf);
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+	if (ack)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_ACK);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
+static int nl80211_netlink_notify(struct notifier_block * nb,
+				  unsigned long state,
+				  void *_notify)
+{
+	struct netlink_notify *notify = _notify;
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+
+	if (state != NETLINK_URELEASE)
+		return NOTIFY_DONE;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
+		list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
+			cfg80211_mlme_unregister_actions(wdev, notify->pid);
+
+	rcu_read_unlock();
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block nl80211_netlink_notifier = {
+	.notifier_call = nl80211_netlink_notify,
+};
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
@@ -5511,6 +5764,10 @@ int nl80211_init(void)
 		goto err_out;
 #endif
 
+	err = netlink_register_notifier(&nl80211_netlink_notifier);
+	if (err)
+		goto err_out;
+
 	return 0;
  err_out:
 	genl_unregister_family(&nl80211_fam);
@@ -5519,5 +5776,6 @@ int nl80211_init(void)
 
 void nl80211_exit(void)
 {
+	netlink_unregister_notifier(&nl80211_netlink_notifier);
 	genl_unregister_family(&nl80211_fam);
 }
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 14855b8fb430..4ca511102c6c 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -74,4 +74,12 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
 			    struct net_device *dev, const u8 *mac_addr,
 			    struct station_info *sinfo, gfp_t gfp);
 
+int nl80211_send_action(struct cfg80211_registered_device *rdev,
+			struct net_device *netdev, u32 nlpid, int freq,
+			const u8 *buf, size_t len, gfp_t gfp);
+void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
+				   struct net_device *netdev, u64 cookie,
+				   const u8 *buf, size_t len, bool ack,
+				   gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From d85429a31790361b9e952be3817134c23b3b758a Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Mon, 15 Feb 2010 11:40:25 +0000
Subject: sh: extend INTC with force_disable

Extend the shared INTC code with force_disable support to
allow keeping mask bits statically disabled. Needed for
SDHI support to mask out unsupported interrupt sources.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/intc.c       | 3 +++
 include/linux/sh_intc.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 66594eb4477d..1da1edef2221 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -851,6 +851,9 @@ void __init register_intc_controller(struct intc_desc *desc)
 		d->chip.mask_ack = intc_mask_ack;
 	}
 
+	/* disable bits matching force_disable before registering irqs */
+	if (desc->force_disable)
+		intc_enable_disable_enum(desc, d, desc->force_disable, 0);
 
 	/* disable bits matching force_enable before registering irqs */
 	if (desc->force_enable)
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 66b4b0c45e71..51d288d8ac88 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -72,6 +72,7 @@ struct intc_hw_desc {
 struct intc_desc {
 	char *name;
 	intc_enum force_enable;
+	intc_enum force_disable;
 	struct intc_hw_desc hw;
 };
 
-- 
cgit v1.2.3


From 28f5318167adf23b16c844b9c2253f355cb21796 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Mon, 8 Feb 2010 15:35:55 +0530
Subject: sched: Fix sched_mv_power_savings for !SMT

Fix for sched_mc_powersavigs for pre-Nehalem platforms.
Child sched domain should clear SD_PREFER_SIBLING if parent will have
SD_POWERSAVINGS_BALANCE because they are contradicting.

Sets the flags correctly based on sched_mc_power_savings.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100208100555.GD2931@dirshya.in.ibm.com>
Cc: stable@kernel.org [2.6.32.x]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78efe7c485ac..1f5fa53b46b1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -878,7 +878,10 @@ static inline int sd_balance_for_mc_power(void)
 	if (sched_smt_power_savings)
 		return SD_POWERSAVINGS_BALANCE;
 
-	return SD_PREFER_SIBLING;
+	if (!sched_mc_power_savings)
+		return SD_PREFER_SIBLING;
+
+	return 0;
 }
 
 static inline int sd_balance_for_package_power(void)
-- 
cgit v1.2.3


From 5a5e0f4c7038168e38d1db6af09d1ac715ee9888 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 27 Jan 2010 17:09:38 +0300
Subject: kfifo: Don't use integer as NULL pointer

This patch fixes following sparse warnings:

include/linux/kfifo.h:127:25: warning: Using plain integer as NULL pointer
kernel/kfifo.c:83:21: warning: Using plain integer as NULL pointer

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kfifo.h | 2 +-
 kernel/kfifo.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6f6c5f300af6..bc0fc795bd35 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -124,7 +124,7 @@ extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
  */
 static inline bool kfifo_initialized(struct kfifo *fifo)
 {
-	return fifo->buffer != 0;
+	return fifo->buffer != NULL;
 }
 
 /**
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 559fb5582b60..35edbe22e9a9 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -80,7 +80,7 @@ int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask)
 
 	buffer = kmalloc(size, gfp_mask);
 	if (!buffer) {
-		_kfifo_init(fifo, 0, 0);
+		_kfifo_init(fifo, NULL, 0);
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3


From 02291680ffba92e5b5865bc0c5e7d1f3056b80ec Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 14 Feb 2010 03:25:51 +0000
Subject: net ipv4: Decouple ipv4 interface parameters from binary sysctl
 numbers

Stop using the binary sysctl enumeartion in sysctl.h as an index into
a per interface array.  This leads to unnecessary binary sysctl number
allocation, and a fragility in data structure and implementation
because of unnecessary coupling.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 42 ++++++++++++++++++++++++++++++++++++------
 include/linux/sysctl.h     |  4 ----
 net/ipv4/devinet.c         | 26 +++++++++++++-------------
 3 files changed, 49 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index cf257809771b..2be1a1a2beb9 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -10,10 +10,40 @@
 #include <linux/timer.h>
 #include <linux/sysctl.h>
 
+enum
+{
+	IPV4_DEVCONF_FORWARDING=1,
+	IPV4_DEVCONF_MC_FORWARDING,
+	IPV4_DEVCONF_PROXY_ARP,
+	IPV4_DEVCONF_ACCEPT_REDIRECTS,
+	IPV4_DEVCONF_SECURE_REDIRECTS,
+	IPV4_DEVCONF_SEND_REDIRECTS,
+	IPV4_DEVCONF_SHARED_MEDIA,
+	IPV4_DEVCONF_RP_FILTER,
+	IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE,
+	IPV4_DEVCONF_BOOTP_RELAY,
+	IPV4_DEVCONF_LOG_MARTIANS,
+	IPV4_DEVCONF_TAG,
+	IPV4_DEVCONF_ARPFILTER,
+	IPV4_DEVCONF_MEDIUM_ID,
+	IPV4_DEVCONF_NOXFRM,
+	IPV4_DEVCONF_NOPOLICY,
+	IPV4_DEVCONF_FORCE_IGMP_VERSION,
+	IPV4_DEVCONF_ARP_ANNOUNCE,
+	IPV4_DEVCONF_ARP_IGNORE,
+	IPV4_DEVCONF_PROMOTE_SECONDARIES,
+	IPV4_DEVCONF_ARP_ACCEPT,
+	IPV4_DEVCONF_ARP_NOTIFY,
+	IPV4_DEVCONF_ACCEPT_LOCAL,
+	IPV4_DEVCONF_SRC_VMARK,
+	IPV4_DEVCONF_PROXY_ARP_PVLAN,
+	__IPV4_DEVCONF_MAX
+};
+
 struct ipv4_devconf {
 	void	*sysctl;
-	int	data[__NET_IPV4_CONF_MAX - 1];
-	DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1);
+	int	data[__IPV4_DEVCONF_MAX - 1];
+	DECLARE_BITMAP(state, __IPV4_DEVCONF_MAX - 1);
 };
 
 struct in_device {
@@ -40,7 +70,7 @@ struct in_device {
 	struct rcu_head		rcu_head;
 };
 
-#define IPV4_DEVCONF(cnf, attr) ((cnf).data[NET_IPV4_CONF_ ## attr - 1])
+#define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1])
 #define IPV4_DEVCONF_ALL(net, attr) \
 	IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr)
 
@@ -60,13 +90,13 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
 
 static inline void ipv4_devconf_setall(struct in_device *in_dev)
 {
-	bitmap_fill(in_dev->cnf.state, __NET_IPV4_CONF_MAX - 1);
+	bitmap_fill(in_dev->cnf.state, __IPV4_DEVCONF_MAX - 1);
 }
 
 #define IN_DEV_CONF_GET(in_dev, attr) \
-	ipv4_devconf_get((in_dev), NET_IPV4_CONF_ ## attr)
+	ipv4_devconf_get((in_dev), IPV4_DEVCONF_ ## attr)
 #define IN_DEV_CONF_SET(in_dev, attr, val) \
-	ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val))
+	ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val))
 
 #define IN_DEV_ANDCONF(in_dev, attr) \
 	(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 9f236cdcf3fe..7c74e919cabe 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -481,10 +481,6 @@ enum
 	NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
-	NET_IPV4_CONF_ACCEPT_LOCAL=23,
-	NET_IPV4_CONF_SRC_VMARK=24,
-	NET_IPV4_CONF_PROXY_ARP_PVLAN=25,
-	__NET_IPV4_CONF_MAX
 };
 
 /* /proc/sys/net/ipv4/netfilter */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cd71a3908391..b1eddee9bf94 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,20 +64,20 @@
 
 static struct ipv4_devconf ipv4_devconf = {
 	.data = {
-		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
+		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
 	},
 };
 
 static struct ipv4_devconf ipv4_devconf_dflt = {
 	.data = {
-		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
-		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
-		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
+		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
+		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
+		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
 	},
 };
 
@@ -1360,7 +1360,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 	{ \
 		.procname	= name, \
 		.data		= ipv4_devconf.data + \
-				  NET_IPV4_CONF_ ## attr - 1, \
+				  IPV4_DEVCONF_ ## attr - 1, \
 		.maxlen		= sizeof(int), \
 		.mode		= mval, \
 		.proc_handler	= proc, \
@@ -1381,7 +1381,7 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 
 static struct devinet_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
+	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
 	char *dev_name;
 } devinet_sysctl = {
 	.devinet_vars = {
@@ -1503,7 +1503,7 @@ static struct ctl_table ctl_forward_entry[] = {
 	{
 		.procname	= "ip_forward",
 		.data		= &ipv4_devconf.data[
-					NET_IPV4_CONF_FORWARDING - 1],
+					IPV4_DEVCONF_FORWARDING - 1],
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= devinet_sysctl_forward,
@@ -1547,7 +1547,7 @@ static __net_init int devinet_init_net(struct net *net)
 		if (tbl == NULL)
 			goto err_alloc_ctl;
 
-		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
+		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
 		tbl[0].extra1 = all;
 		tbl[0].extra2 = net;
 #endif
-- 
cgit v1.2.3


From 54716e3beb0ab20c49471348dfe399a71bfc8fd3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 14 Feb 2010 03:27:03 +0000
Subject: net neigh: Decouple per interface neighbour table controls from
 binary sysctls

Stop computing the number of neighbour table settings we have by
counting the number of binary sysctls.  This behaviour was silly
and meant that we could not add another neighbour table setting
without also adding another binary sysctl.

Don't pass the binary sysctl path for neighour table entries
into neigh_sysctl_register.  These parameters are no longer
used and so are just dead code.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h  | 1 -
 include/net/neighbour.h | 1 -
 net/core/neighbour.c    | 7 ++++---
 net/ipv4/arp.c          | 3 +--
 net/ipv4/devinet.c      | 3 +--
 net/ipv6/addrconf.c     | 3 +--
 net/ipv6/ndisc.c        | 3 +--
 7 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7c74e919cabe..f66014c90c9f 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -596,7 +596,6 @@ enum {
 	NET_NEIGH_GC_THRESH3=16,
 	NET_NEIGH_RETRANS_TIME_MS=17,
 	NET_NEIGH_REACHABLE_TIME_MS=18,
-	__NET_NEIGH_MAX
 };
 
 /* /proc/sys/net/dccp */
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index b0173202cad9..7834f470d14a 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -251,7 +251,6 @@ extern void neigh_seq_stop(struct seq_file *, void *);
 
 extern int			neigh_sysctl_register(struct net_device *dev, 
 						      struct neigh_parms *p,
-						      int p_id, int pdev_id,
 						      char *p_name,
 						      proc_handler *proc_handler);
 extern void			neigh_sysctl_unregister(struct neigh_parms *p);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f2efd72da799..d102f6d9abdc 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2556,9 +2556,11 @@ EXPORT_SYMBOL(neigh_app_ns);
 
 #ifdef CONFIG_SYSCTL
 
+#define NEIGH_VARS_MAX 19
+
 static struct neigh_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	struct ctl_table neigh_vars[__NET_NEIGH_MAX];
+	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
 	char *dev_name;
 } neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
@@ -2675,8 +2677,7 @@ static struct neigh_sysctl_table {
 };
 
 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
-			  int p_id, int pdev_id, char *p_name,
-			  proc_handler *handler)
+			  char *p_name, proc_handler *handler)
 {
 	struct neigh_sysctl_table *t;
 	const char *dev_name_source = NULL;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1940b4df7699..c4dd13542802 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1284,8 +1284,7 @@ void __init arp_init(void)
 	dev_add_pack(&arp_packet_type);
 	arp_proc_init();
 #ifdef CONFIG_SYSCTL
-	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
-			      NET_IPV4_NEIGH, "ipv4", NULL);
+	neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL);
 #endif
 	register_netdevice_notifier(&arp_netdev_notifier);
 }
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b1eddee9bf94..014982b61564 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1487,8 +1487,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
 
 static void devinet_sysctl_register(struct in_device *idev)
 {
-	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
-			NET_IPV4_NEIGH, "ipv4", NULL);
+	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
 					&idev->cnf);
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 764ad37ca070..c79cbff54370 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4416,8 +4416,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
 
 static void addrconf_sysctl_register(struct inet6_dev *idev)
 {
-	neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
-			      NET_IPV6_NEIGH, "ipv6",
+	neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6",
 			      &ndisc_ifinfo_sysctl_change);
 	__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
 					idev, &idev->cnf);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2dfec6bb8ada..8bcc4b7db3bf 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1820,8 +1820,7 @@ int __init ndisc_init(void)
 	neigh_table_init(&nd_tbl);
 
 #ifdef CONFIG_SYSCTL
-	err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
-				    NET_IPV6_NEIGH, "ipv6",
+	err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6",
 				    &ndisc_ifinfo_sysctl_change);
 	if (err)
 		goto out_unregister_pernet;
-- 
cgit v1.2.3


From 522530311b35ec8fc4785062441dd2d63967ac55 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Mon, 8 Feb 2010 10:12:10 +0000
Subject: CAPI: Call a controller 'controller', not 'card'

At least for our internal use, fix the misnomers that refer to a CAPI
controller as 'card'. No functional changes.

Signed-off-by: Jan Kiszka <jan.kiszka@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/kcapi.c      | 320 +++++++++++++++++++++--------------------
 drivers/isdn/capi/kcapi.h      |   8 +-
 drivers/isdn/capi/kcapi_proc.c |  17 ++-
 include/linux/isdn/capilli.h   |   2 +-
 4 files changed, 178 insertions(+), 169 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index ef564ee1c06c..f37c13b4ddc7 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -67,24 +67,24 @@ static DEFINE_RWLOCK(application_lock);
 static DEFINE_MUTEX(controller_mutex);
 
 struct capi20_appl *capi_applications[CAPI_MAXAPPL];
-struct capi_ctr *capi_cards[CAPI_MAXCONTR];
+struct capi_ctr *capi_controller[CAPI_MAXCONTR];
 
-static int ncards;
+static int ncontrollers;
 
 /* -------- controller ref counting -------------------------------------- */
 
 static inline struct capi_ctr *
-capi_ctr_get(struct capi_ctr *card)
+capi_ctr_get(struct capi_ctr *ctr)
 {
-	if (!try_module_get(card->owner))
+	if (!try_module_get(ctr->owner))
 		return NULL;
-	return card;
+	return ctr;
 }
 
 static inline void
-capi_ctr_put(struct capi_ctr *card)
+capi_ctr_put(struct capi_ctr *ctr)
 {
-	module_put(card->owner);
+	module_put(ctr->owner);
 }
 
 /* ------------------------------------------------------------- */
@@ -94,7 +94,7 @@ static inline struct capi_ctr *get_capi_ctr_by_nr(u16 contr)
 	if (contr - 1 >= CAPI_MAXCONTR)
 		return NULL;
 
-	return capi_cards[contr - 1];
+	return capi_controller[contr - 1];
 }
 
 static inline struct capi20_appl *get_capi_appl_by_nr(u16 applid)
@@ -144,46 +144,48 @@ static inline int capi_subcmd_valid(u8 subcmd)
 
 /* ------------------------------------------------------------ */
 
-static void register_appl(struct capi_ctr *card, u16 applid, capi_register_params *rparam)
+static void
+register_appl(struct capi_ctr *ctr, u16 applid, capi_register_params *rparam)
 {
-	card = capi_ctr_get(card);
+	ctr = capi_ctr_get(ctr);
 
-	if (card)
-		card->register_appl(card, applid, rparam);
+	if (ctr)
+		ctr->register_appl(ctr, applid, rparam);
 	else
-		printk(KERN_WARNING "%s: cannot get card resources\n", __func__);
+		printk(KERN_WARNING "%s: cannot get controller resources\n",
+		       __func__);
 }
 
 
-static void release_appl(struct capi_ctr *card, u16 applid)
+static void release_appl(struct capi_ctr *ctr, u16 applid)
 {
 	DBG("applid %#x", applid);
 	
-	card->release_appl(card, applid);
-	capi_ctr_put(card);
+	ctr->release_appl(ctr, applid);
+	capi_ctr_put(ctr);
 }
 
 /* -------- KCI_CONTRUP --------------------------------------- */
 
 static void notify_up(u32 contr)
 {
-	struct capi_ctr *card = get_capi_ctr_by_nr(contr);
+	struct capi_ctr *ctr = get_capi_ctr_by_nr(contr);
 	struct capi20_appl *ap;
 	u16 applid;
 
 	if (showcapimsgs & 1) {
 	        printk(KERN_DEBUG "kcapi: notify up contr %d\n", contr);
 	}
-	if (!card) {
+	if (!ctr) {
 		printk(KERN_WARNING "%s: invalid contr %d\n", __func__, contr);
 		return;
 	}
 	for (applid = 1; applid <= CAPI_MAXAPPL; applid++) {
 		ap = get_capi_appl_by_nr(applid);
 		if (!ap || ap->release_in_progress) continue;
-		register_appl(card, applid, &ap->rparam);
+		register_appl(ctr, applid, &ap->rparam);
 		if (ap->callback && !ap->release_in_progress)
-			ap->callback(KCI_CONTRUP, contr, &card->profile);
+			ap->callback(KCI_CONTRUP, contr, &ctr->profile);
 	}
 }
 
@@ -269,14 +271,15 @@ static void recv_handler(struct work_struct *work)
 
 /**
  * capi_ctr_handle_message() - handle incoming CAPI message
- * @card:	controller descriptor structure.
+ * @ctr:	controller descriptor structure.
  * @appl:	application ID.
  * @skb:	message.
  *
  * Called by hardware driver to pass a CAPI message to the application.
  */
 
-void capi_ctr_handle_message(struct capi_ctr * card, u16 appl, struct sk_buff *skb)
+void capi_ctr_handle_message(struct capi_ctr *ctr, u16 appl,
+			     struct sk_buff *skb)
 {
 	struct capi20_appl *ap;
 	int showctl = 0;
@@ -284,43 +287,45 @@ void capi_ctr_handle_message(struct capi_ctr * card, u16 appl, struct sk_buff *s
 	unsigned long flags;
 	_cdebbuf *cdb;
 
-	if (card->cardstate != CARD_RUNNING) {
+	if (ctr->state != CAPI_CTR_RUNNING) {
 		cdb = capi_message2str(skb->data);
 		if (cdb) {
 			printk(KERN_INFO "kcapi: controller [%03d] not active, got: %s",
-				card->cnr, cdb->buf);
+				ctr->cnr, cdb->buf);
 			cdebbuf_free(cdb);
 		} else
 			printk(KERN_INFO "kcapi: controller [%03d] not active, cannot trace\n",
-				card->cnr);
+				ctr->cnr);
 		goto error;
 	}
 
 	cmd = CAPIMSG_COMMAND(skb->data);
         subcmd = CAPIMSG_SUBCOMMAND(skb->data);
 	if (cmd == CAPI_DATA_B3 && subcmd == CAPI_IND) {
-		card->nrecvdatapkt++;
-	        if (card->traceflag > 2) showctl |= 2;
+		ctr->nrecvdatapkt++;
+		if (ctr->traceflag > 2)
+			showctl |= 2;
 	} else {
-		card->nrecvctlpkt++;
-	        if (card->traceflag) showctl |= 2;
+		ctr->nrecvctlpkt++;
+		if (ctr->traceflag)
+			showctl |= 2;
 	}
-	showctl |= (card->traceflag & 1);
+	showctl |= (ctr->traceflag & 1);
 	if (showctl & 2) {
 		if (showctl & 1) {
 			printk(KERN_DEBUG "kcapi: got [%03d] id#%d %s len=%u\n",
-			       card->cnr, CAPIMSG_APPID(skb->data),
+			       ctr->cnr, CAPIMSG_APPID(skb->data),
 			       capi_cmd2str(cmd, subcmd),
 			       CAPIMSG_LEN(skb->data));
 		} else {
 			cdb = capi_message2str(skb->data);
 			if (cdb) {
 				printk(KERN_DEBUG "kcapi: got [%03d] %s\n",
-					card->cnr, cdb->buf);
+					ctr->cnr, cdb->buf);
 				cdebbuf_free(cdb);
 			} else
 				printk(KERN_DEBUG "kcapi: got [%03d] id#%d %s len=%u, cannot trace\n",
-					card->cnr, CAPIMSG_APPID(skb->data),
+					ctr->cnr, CAPIMSG_APPID(skb->data),
 					capi_cmd2str(cmd, subcmd),
 					CAPIMSG_LEN(skb->data));
 		}
@@ -356,74 +361,75 @@ EXPORT_SYMBOL(capi_ctr_handle_message);
 
 /**
  * capi_ctr_ready() - signal CAPI controller ready
- * @card:	controller descriptor structure.
+ * @ctr:	controller descriptor structure.
  *
  * Called by hardware driver to signal that the controller is up and running.
  */
 
-void capi_ctr_ready(struct capi_ctr * card)
+void capi_ctr_ready(struct capi_ctr *ctr)
 {
-	card->cardstate = CARD_RUNNING;
+	ctr->state = CAPI_CTR_RUNNING;
 
-        printk(KERN_NOTICE "kcapi: card [%03d] \"%s\" ready.\n",
-	       card->cnr, card->name);
+	printk(KERN_NOTICE "kcapi: controller [%03d] \"%s\" ready.\n",
+	       ctr->cnr, ctr->name);
 
-	notify_push(KCI_CONTRUP, card->cnr, 0, 0);
+	notify_push(KCI_CONTRUP, ctr->cnr, 0, 0);
 }
 
 EXPORT_SYMBOL(capi_ctr_ready);
 
 /**
  * capi_ctr_down() - signal CAPI controller not ready
- * @card:	controller descriptor structure.
+ * @ctr:	controller descriptor structure.
  *
  * Called by hardware driver to signal that the controller is down and
  * unavailable for use.
  */
 
-void capi_ctr_down(struct capi_ctr * card)
+void capi_ctr_down(struct capi_ctr *ctr)
 {
 	u16 appl;
 
 	DBG("");
 
-        if (card->cardstate == CARD_DETECTED)
+	if (ctr->state == CAPI_CTR_DETECTED)
 		return;
 
-        card->cardstate = CARD_DETECTED;
+	ctr->state = CAPI_CTR_DETECTED;
 
-	memset(card->manu, 0, sizeof(card->manu));
-	memset(&card->version, 0, sizeof(card->version));
-	memset(&card->profile, 0, sizeof(card->profile));
-	memset(card->serial, 0, sizeof(card->serial));
+	memset(ctr->manu, 0, sizeof(ctr->manu));
+	memset(&ctr->version, 0, sizeof(ctr->version));
+	memset(&ctr->profile, 0, sizeof(ctr->profile));
+	memset(ctr->serial, 0, sizeof(ctr->serial));
 
 	for (appl = 1; appl <= CAPI_MAXAPPL; appl++) {
 		struct capi20_appl *ap = get_capi_appl_by_nr(appl);
 		if (!ap || ap->release_in_progress)
 			continue;
 
-		capi_ctr_put(card);
+		capi_ctr_put(ctr);
 	}
 
-	printk(KERN_NOTICE "kcapi: card [%03d] down.\n", card->cnr);
+	printk(KERN_NOTICE "kcapi: controller [%03d] down.\n", ctr->cnr);
 
-	notify_push(KCI_CONTRDOWN, card->cnr, 0, 0);
+	notify_push(KCI_CONTRDOWN, ctr->cnr, 0, 0);
 }
 
 EXPORT_SYMBOL(capi_ctr_down);
 
 /**
  * capi_ctr_suspend_output() - suspend controller
- * @card:	controller descriptor structure.
+ * @ctr:	controller descriptor structure.
  *
  * Called by hardware driver to stop data flow.
  */
 
-void capi_ctr_suspend_output(struct capi_ctr *card)
+void capi_ctr_suspend_output(struct capi_ctr *ctr)
 {
-	if (!card->blocked) {
-		printk(KERN_DEBUG "kcapi: card [%03d] suspend\n", card->cnr);
-		card->blocked = 1;
+	if (!ctr->blocked) {
+		printk(KERN_DEBUG "kcapi: controller [%03d] suspend\n",
+		       ctr->cnr);
+		ctr->blocked = 1;
 	}
 }
 
@@ -431,16 +437,17 @@ EXPORT_SYMBOL(capi_ctr_suspend_output);
 
 /**
  * capi_ctr_resume_output() - resume controller
- * @card:	controller descriptor structure.
+ * @ctr:	controller descriptor structure.
  *
  * Called by hardware driver to resume data flow.
  */
 
-void capi_ctr_resume_output(struct capi_ctr *card)
+void capi_ctr_resume_output(struct capi_ctr *ctr)
 {
-	if (card->blocked) {
-		printk(KERN_DEBUG "kcapi: card [%03d] resume\n", card->cnr);
-		card->blocked = 0;
+	if (ctr->blocked) {
+		printk(KERN_DEBUG "kcapi: controller [%03d] resumed\n",
+		       ctr->cnr);
+		ctr->blocked = 0;
 	}
 }
 
@@ -450,21 +457,20 @@ EXPORT_SYMBOL(capi_ctr_resume_output);
 
 /**
  * attach_capi_ctr() - register CAPI controller
- * @card:	controller descriptor structure.
+ * @ctr:	controller descriptor structure.
  *
  * Called by hardware driver to register a controller with the CAPI subsystem.
  * Return value: 0 on success, error code < 0 on error
  */
 
-int
-attach_capi_ctr(struct capi_ctr *card)
+int attach_capi_ctr(struct capi_ctr *ctr)
 {
 	int i;
 
 	mutex_lock(&controller_mutex);
 
 	for (i = 0; i < CAPI_MAXCONTR; i++) {
-		if (capi_cards[i] == NULL)
+		if (!capi_controller[i])
 			break;
 	}
 	if (i == CAPI_MAXCONTR) {
@@ -472,25 +478,25 @@ attach_capi_ctr(struct capi_ctr *card)
 		printk(KERN_ERR "kcapi: out of controller slots\n");
 	   	return -EBUSY;
 	}
-	capi_cards[i] = card;
+	capi_controller[i] = ctr;
 
 	mutex_unlock(&controller_mutex);
 
-	card->nrecvctlpkt = 0;
-	card->nrecvdatapkt = 0;
-	card->nsentctlpkt = 0;
-	card->nsentdatapkt = 0;
-	card->cnr = i + 1;
-	card->cardstate = CARD_DETECTED;
-	card->blocked = 0;
-	card->traceflag = showcapimsgs;
-
-	sprintf(card->procfn, "capi/controllers/%d", card->cnr);
-	card->procent = proc_create_data(card->procfn, 0, NULL, card->proc_fops, card);
-
-	ncards++;
-	printk(KERN_NOTICE "kcapi: Controller [%03d]: %s attached\n",
-			card->cnr, card->name);
+	ctr->nrecvctlpkt = 0;
+	ctr->nrecvdatapkt = 0;
+	ctr->nsentctlpkt = 0;
+	ctr->nsentdatapkt = 0;
+	ctr->cnr = i + 1;
+	ctr->state = CAPI_CTR_DETECTED;
+	ctr->blocked = 0;
+	ctr->traceflag = showcapimsgs;
+
+	sprintf(ctr->procfn, "capi/controllers/%d", ctr->cnr);
+	ctr->procent = proc_create_data(ctr->procfn, 0, NULL, ctr->proc_fops, ctr);
+
+	ncontrollers++;
+	printk(KERN_NOTICE "kcapi: controller [%03d]: %s attached\n",
+			ctr->cnr, ctr->name);
 	return 0;
 }
 
@@ -498,27 +504,27 @@ EXPORT_SYMBOL(attach_capi_ctr);
 
 /**
  * detach_capi_ctr() - unregister CAPI controller
- * @card:	controller descriptor structure.
+ * @ctr:	controller descriptor structure.
  *
  * Called by hardware driver to remove the registration of a controller
  * with the CAPI subsystem.
  * Return value: 0 on success, error code < 0 on error
  */
 
-int detach_capi_ctr(struct capi_ctr *card)
+int detach_capi_ctr(struct capi_ctr *ctr)
 {
-        if (card->cardstate != CARD_DETECTED)
-		capi_ctr_down(card);
+	if (ctr->state != CAPI_CTR_DETECTED)
+		capi_ctr_down(ctr);
 
-	ncards--;
+	ncontrollers--;
 
-	if (card->procent) {
-	   remove_proc_entry(card->procfn, NULL);
-	   card->procent = NULL;
+	if (ctr->procent) {
+		remove_proc_entry(ctr->procfn, NULL);
+		ctr->procent = NULL;
 	}
-	capi_cards[card->cnr - 1] = NULL;
-	printk(KERN_NOTICE "kcapi: Controller [%03d]: %s unregistered\n",
-			card->cnr, card->name);
+	capi_controller[ctr->cnr - 1] = NULL;
+	printk(KERN_NOTICE "kcapi: controller [%03d]: %s unregistered\n",
+	       ctr->cnr, ctr->name);
 
 	return 0;
 }
@@ -576,7 +582,8 @@ u16 capi20_isinstalled(void)
 {
 	int i;
 	for (i = 0; i < CAPI_MAXCONTR; i++) {
-		if (capi_cards[i] && capi_cards[i]->cardstate == CARD_RUNNING)
+		if (capi_controller[i] &&
+		    capi_controller[i]->state == CAPI_CTR_RUNNING)
 			return CAPI_NOERROR;
 	}
 	return CAPI_REGNOTINSTALLED;
@@ -635,9 +642,10 @@ u16 capi20_register(struct capi20_appl *ap)
 	
 	mutex_lock(&controller_mutex);
 	for (i = 0; i < CAPI_MAXCONTR; i++) {
-		if (!capi_cards[i] || capi_cards[i]->cardstate != CARD_RUNNING)
+		if (!capi_controller[i] ||
+		    capi_controller[i]->state != CAPI_CTR_RUNNING)
 			continue;
-		register_appl(capi_cards[i], applid, &ap->rparam);
+		register_appl(capi_controller[i], applid, &ap->rparam);
 	}
 	mutex_unlock(&controller_mutex);
 
@@ -674,9 +682,10 @@ u16 capi20_release(struct capi20_appl *ap)
 
 	mutex_lock(&controller_mutex);
 	for (i = 0; i < CAPI_MAXCONTR; i++) {
-		if (!capi_cards[i] || capi_cards[i]->cardstate != CARD_RUNNING)
+		if (!capi_controller[i] ||
+		    capi_controller[i]->state != CAPI_CTR_RUNNING)
 			continue;
-		release_appl(capi_cards[i], ap->applid);
+		release_appl(capi_controller[i], ap->applid);
 	}
 	mutex_unlock(&controller_mutex);
 
@@ -703,13 +712,13 @@ EXPORT_SYMBOL(capi20_release);
 
 u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb)
 {
-	struct capi_ctr *card;
+	struct capi_ctr *ctr;
 	int showctl = 0;
 	u8 cmd, subcmd;
 
 	DBG("applid %#x", ap->applid);
  
-	if (ncards == 0)
+	if (ncontrollers == 0)
 		return CAPI_REGNOTINSTALLED;
 	if ((ap->applid == 0) || ap->release_in_progress)
 		return CAPI_ILLAPPNR;
@@ -717,28 +726,30 @@ u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb)
 	    || !capi_cmd_valid(CAPIMSG_COMMAND(skb->data))
 	    || !capi_subcmd_valid(CAPIMSG_SUBCOMMAND(skb->data)))
 		return CAPI_ILLCMDORSUBCMDORMSGTOSMALL;
-	card = get_capi_ctr_by_nr(CAPIMSG_CONTROLLER(skb->data));
-	if (!card || card->cardstate != CARD_RUNNING) {
-		card = get_capi_ctr_by_nr(1); // XXX why?
-	        if (!card || card->cardstate != CARD_RUNNING) 
+	ctr = get_capi_ctr_by_nr(CAPIMSG_CONTROLLER(skb->data));
+	if (!ctr || ctr->state != CAPI_CTR_RUNNING) {
+		ctr = get_capi_ctr_by_nr(1); /* XXX why? */
+		if (!ctr || ctr->state != CAPI_CTR_RUNNING)
 			return CAPI_REGNOTINSTALLED;
 	}
-	if (card->blocked)
+	if (ctr->blocked)
 		return CAPI_SENDQUEUEFULL;
 
 	cmd = CAPIMSG_COMMAND(skb->data);
         subcmd = CAPIMSG_SUBCOMMAND(skb->data);
 
 	if (cmd == CAPI_DATA_B3 && subcmd== CAPI_REQ) {
-		card->nsentdatapkt++;
+		ctr->nsentdatapkt++;
 		ap->nsentdatapkt++;
-	        if (card->traceflag > 2) showctl |= 2;
+		if (ctr->traceflag > 2)
+			showctl |= 2;
 	} else {
-		card->nsentctlpkt++;
+		ctr->nsentctlpkt++;
 		ap->nsentctlpkt++;
-	        if (card->traceflag) showctl |= 2;
+		if (ctr->traceflag)
+			showctl |= 2;
 	}
-	showctl |= (card->traceflag & 1);
+	showctl |= (ctr->traceflag & 1);
 	if (showctl & 2) {
 		if (showctl & 1) {
 			printk(KERN_DEBUG "kcapi: put [%03d] id#%d %s len=%u\n",
@@ -761,7 +772,7 @@ u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb)
 					CAPIMSG_LEN(skb->data));
 		}
 	}
-	return card->send_message(card, skb);
+	return ctr->send_message(ctr, skb);
 }
 
 EXPORT_SYMBOL(capi20_put_message);
@@ -778,16 +789,16 @@ EXPORT_SYMBOL(capi20_put_message);
 
 u16 capi20_get_manufacturer(u32 contr, u8 *buf)
 {
-	struct capi_ctr *card;
+	struct capi_ctr *ctr;
 
 	if (contr == 0) {
 		strlcpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN);
 		return CAPI_NOERROR;
 	}
-	card = get_capi_ctr_by_nr(contr);
-	if (!card || card->cardstate != CARD_RUNNING) 
+	ctr = get_capi_ctr_by_nr(contr);
+	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
 		return CAPI_REGNOTINSTALLED;
-	strlcpy(buf, card->manu, CAPI_MANUFACTURER_LEN);
+	strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN);
 	return CAPI_NOERROR;
 }
 
@@ -805,17 +816,17 @@ EXPORT_SYMBOL(capi20_get_manufacturer);
 
 u16 capi20_get_version(u32 contr, struct capi_version *verp)
 {
-	struct capi_ctr *card;
+	struct capi_ctr *ctr;
 
 	if (contr == 0) {
 		*verp = driver_version;
 		return CAPI_NOERROR;
 	}
-	card = get_capi_ctr_by_nr(contr);
-	if (!card || card->cardstate != CARD_RUNNING) 
+	ctr = get_capi_ctr_by_nr(contr);
+	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
 		return CAPI_REGNOTINSTALLED;
 
-	memcpy((void *) verp, &card->version, sizeof(capi_version));
+	memcpy(verp, &ctr->version, sizeof(capi_version));
 	return CAPI_NOERROR;
 }
 
@@ -833,17 +844,17 @@ EXPORT_SYMBOL(capi20_get_version);
 
 u16 capi20_get_serial(u32 contr, u8 *serial)
 {
-	struct capi_ctr *card;
+	struct capi_ctr *ctr;
 
 	if (contr == 0) {
 		strlcpy(serial, driver_serial, CAPI_SERIAL_LEN);
 		return CAPI_NOERROR;
 	}
-	card = get_capi_ctr_by_nr(contr);
-	if (!card || card->cardstate != CARD_RUNNING) 
+	ctr = get_capi_ctr_by_nr(contr);
+	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
 		return CAPI_REGNOTINSTALLED;
 
-	strlcpy((void *) serial, card->serial, CAPI_SERIAL_LEN);
+	strlcpy(serial, ctr->serial, CAPI_SERIAL_LEN);
 	return CAPI_NOERROR;
 }
 
@@ -861,18 +872,17 @@ EXPORT_SYMBOL(capi20_get_serial);
 
 u16 capi20_get_profile(u32 contr, struct capi_profile *profp)
 {
-	struct capi_ctr *card;
+	struct capi_ctr *ctr;
 
 	if (contr == 0) {
-		profp->ncontroller = ncards;
+		profp->ncontroller = ncontrollers;
 		return CAPI_NOERROR;
 	}
-	card = get_capi_ctr_by_nr(contr);
-	if (!card || card->cardstate != CARD_RUNNING) 
+	ctr = get_capi_ctr_by_nr(contr);
+	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
 		return CAPI_REGNOTINSTALLED;
 
-	memcpy((void *) profp, &card->profile,
-			sizeof(struct capi_profile));
+	memcpy(profp, &ctr->profile, sizeof(struct capi_profile));
 	return CAPI_NOERROR;
 }
 
@@ -885,7 +895,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 	avmb1_extcarddef cdef;
 	avmb1_resetdef rdef;
 	capicardparams cparams;
-	struct capi_ctr *card;
+	struct capi_ctr *ctr;
 	struct capi_driver *driver = NULL;
 	capiloaddata ldata;
 	struct list_head *l;
@@ -958,26 +968,26 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 					   sizeof(avmb1_loadandconfigdef)))
 				return -EFAULT;
 		}
-		card = get_capi_ctr_by_nr(ldef.contr);
-		if (!card)
+		ctr = get_capi_ctr_by_nr(ldef.contr);
+		if (!ctr)
 			return -EINVAL;
-		card = capi_ctr_get(card);
-		if (!card)
+		ctr = capi_ctr_get(ctr);
+		if (!ctr)
 			return -ESRCH;
-		if (card->load_firmware == NULL) {
+		if (ctr->load_firmware == NULL) {
 			printk(KERN_DEBUG "kcapi: load: no load function\n");
-			capi_ctr_put(card);
+			capi_ctr_put(ctr);
 			return -ESRCH;
 		}
 
 		if (ldef.t4file.len <= 0) {
 			printk(KERN_DEBUG "kcapi: load: invalid parameter: length of t4file is %d ?\n", ldef.t4file.len);
-			capi_ctr_put(card);
+			capi_ctr_put(ctr);
 			return -EINVAL;
 		}
 		if (ldef.t4file.data == NULL) {
 			printk(KERN_DEBUG "kcapi: load: invalid parameter: dataptr is 0\n");
-			capi_ctr_put(card);
+			capi_ctr_put(ctr);
 			return -EINVAL;
 		}
 
@@ -988,46 +998,46 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 		ldata.configuration.data = ldef.t4config.data;
 		ldata.configuration.len = ldef.t4config.len;
 
-		if (card->cardstate != CARD_DETECTED) {
+		if (ctr->state != CAPI_CTR_DETECTED) {
 			printk(KERN_INFO "kcapi: load: contr=%d not in detect state\n", ldef.contr);
-			capi_ctr_put(card);
+			capi_ctr_put(ctr);
 			return -EBUSY;
 		}
-		card->cardstate = CARD_LOADING;
+		ctr->state = CAPI_CTR_LOADING;
 
-		retval = card->load_firmware(card, &ldata);
+		retval = ctr->load_firmware(ctr, &ldata);
 
 		if (retval) {
-			card->cardstate = CARD_DETECTED;
-			capi_ctr_put(card);
+			ctr->state = CAPI_CTR_DETECTED;
+			capi_ctr_put(ctr);
 			return retval;
 		}
 
-		while (card->cardstate != CARD_RUNNING) {
+		while (ctr->state != CAPI_CTR_RUNNING) {
 
 			msleep_interruptible(100);	/* 0.1 sec */
 
 			if (signal_pending(current)) {
-				capi_ctr_put(card);
+				capi_ctr_put(ctr);
 				return -EINTR;
 			}
 		}
-		capi_ctr_put(card);
+		capi_ctr_put(ctr);
 		return 0;
 
 	case AVMB1_RESETCARD:
 		if (copy_from_user(&rdef, data, sizeof(avmb1_resetdef)))
 			return -EFAULT;
-		card = get_capi_ctr_by_nr(rdef.contr);
-		if (!card)
+		ctr = get_capi_ctr_by_nr(rdef.contr);
+		if (!ctr)
 			return -ESRCH;
 
-		if (card->cardstate == CARD_DETECTED)
+		if (ctr->state == CAPI_CTR_DETECTED)
 			return 0;
 
-		card->reset_ctr(card);
+		ctr->reset_ctr(ctr);
 
-		while (card->cardstate > CARD_DETECTED) {
+		while (ctr->state > CAPI_CTR_DETECTED) {
 
 			msleep_interruptible(100);	/* 0.1 sec */
 
@@ -1052,7 +1062,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 
 int capi20_manufacturer(unsigned int cmd, void __user *data)
 {
-        struct capi_ctr *card;
+	struct capi_ctr *ctr;
 
 	switch (cmd) {
 #ifdef AVMB1_COMPAT
@@ -1070,13 +1080,13 @@ int capi20_manufacturer(unsigned int cmd, void __user *data)
 		if (copy_from_user(&fdef, data, sizeof(kcapi_flagdef)))
 			return -EFAULT;
 
-		card = get_capi_ctr_by_nr(fdef.contr);
-		if (!card)
+		ctr = get_capi_ctr_by_nr(fdef.contr);
+		if (!ctr)
 			return -ESRCH;
 
-		card->traceflag = fdef.flag;
+		ctr->traceflag = fdef.flag;
 		printk(KERN_INFO "kcapi: contr [%03d] set trace=%d\n",
-			card->cnr, card->traceflag);
+		       ctr->cnr, ctr->traceflag);
 		return 0;
 	}
 	case KCAPI_CMD_ADDCARD:
diff --git a/drivers/isdn/capi/kcapi.h b/drivers/isdn/capi/kcapi.h
index 244711f7f838..f62c53bd6a97 100644
--- a/drivers/isdn/capi/kcapi.h
+++ b/drivers/isdn/capi/kcapi.h
@@ -24,16 +24,16 @@ printk(KERN_DEBUG "%s: " format "\n" , __func__ , ## arg); \
 #endif
 
 enum {
-	CARD_DETECTED = 1,
-	CARD_LOADING  =	2,
-	CARD_RUNNING  = 3,
+	CAPI_CTR_DETECTED = 1,
+	CAPI_CTR_LOADING  = 2,
+	CAPI_CTR_RUNNING  = 3,
 };
 
 extern struct list_head capi_drivers;
 extern rwlock_t capi_drivers_list_lock;
 
 extern struct capi20_appl *capi_applications[CAPI_MAXAPPL];
-extern struct capi_ctr *capi_cards[CAPI_MAXCONTR];
+extern struct capi_ctr *capi_controller[CAPI_MAXCONTR];
 
 #ifdef CONFIG_PROC_FS
 
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 09d4db764d22..59fd16a4ad8e 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -15,13 +15,12 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 
-static char *
-cardstate2str(unsigned short cardstate)
+static char *state2str(unsigned short state)
 {
-	switch (cardstate) {
-	case CARD_DETECTED:	return "detected";
-	case CARD_LOADING:	return "loading";
-	case CARD_RUNNING:	return "running";
+	switch (state) {
+	case CAPI_CTR_DETECTED:	return "detected";
+	case CAPI_CTR_LOADING:	return "loading";
+	case CAPI_CTR_RUNNING:	return "running";
 	default:	        return "???";
 	}
 }
@@ -38,7 +37,7 @@ cardstate2str(unsigned short cardstate)
 static void *controller_start(struct seq_file *seq, loff_t *pos)
 {
 	if (*pos < CAPI_MAXCONTR)
-		return &capi_cards[*pos];
+		return &capi_controller[*pos];
 
 	return NULL;
 }
@@ -47,7 +46,7 @@ static void *controller_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	++*pos;
 	if (*pos < CAPI_MAXCONTR)
-		return &capi_cards[*pos];
+		return &capi_controller[*pos];
 
 	return NULL;
 }
@@ -65,7 +64,7 @@ static int controller_show(struct seq_file *seq, void *v)
 
 	seq_printf(seq, "%d %-10s %-8s %-16s %s\n",
 		   ctr->cnr, ctr->driver_name,
-		   cardstate2str(ctr->cardstate),
+		   state2str(ctr->state),
 		   ctr->name,
 		   ctr->procinfo ?  ctr->procinfo(ctr) : "");
 
diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h
index d3e5e9da0c82..856f38eddd78 100644
--- a/include/linux/isdn/capilli.h
+++ b/include/linux/isdn/capilli.h
@@ -66,7 +66,7 @@ struct capi_ctr {
 	unsigned long nsentdatapkt;
 
 	int cnr;				/* controller number */
-	volatile unsigned short cardstate;	/* controller state */
+	volatile unsigned short state;		/* controller state */
 	volatile int blocked;			/* output blocked */
 	int traceflag;				/* capi trace */
 
-- 
cgit v1.2.3


From ef69bb2ec6036945da1d3d3f07b75253f484f693 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Mon, 8 Feb 2010 10:12:13 +0000
Subject: CAPI: Rework controller state notifier

Another step towards proper locking: Rework the callback provided to
capidrv for controller state changes. This is so far attached to an
application, which would require us to hold the corresponding lock
across notification calls.

But there is no direct relation between a controller up/down event and
an application, so let's decouple them and provide a notifier call chain
for those events instead. This notifier chain is first of all used
internally. Here we request the highest priority to unsure that
housekeeping work is done before any other notifications. The chain is
exported via [un]register_capictr_notifier to our only user, capidrv, to
replace the racy and unfixable capi20_set_callback.

Signed-off-by: Jan Kiszka <jan.kiszka@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/capidrv.c |  22 ++++++---
 drivers/isdn/capi/kcapi.c   | 112 ++++++++++++++++++++------------------------
 include/linux/kernelcapi.h  |  17 ++-----
 3 files changed, 72 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index 7d8899ad5796..bf55ed5f38e3 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -2210,19 +2210,24 @@ static int capidrv_delcontr(u16 contr)
 }
 
 
-static void lower_callback(unsigned int cmd, u32 contr, void *data)
+static int
+lower_callback(struct notifier_block *nb, unsigned long val, void *v)
 {
+	capi_profile profile;
+	u32 contr = (long)v;
 
-	switch (cmd) {
-	case KCI_CONTRUP:
+	switch (val) {
+	case CAPICTR_UP:
 		printk(KERN_INFO "capidrv: controller %hu up\n", contr);
-		(void) capidrv_addcontr(contr, (capi_profile *) data);
+		if (capi20_get_profile(contr, &profile) == CAPI_NOERROR)
+			(void) capidrv_addcontr(contr, &profile);
 		break;
-	case KCI_CONTRDOWN:
+	case CAPICTR_DOWN:
 		printk(KERN_INFO "capidrv: controller %hu down\n", contr);
 		(void) capidrv_delcontr(contr);
 		break;
 	}
+	return NOTIFY_OK;
 }
 
 /*
@@ -2262,6 +2267,10 @@ static void __exit proc_exit(void)
 	remove_proc_entry("capi/capidrv", NULL);
 }
 
+static struct notifier_block capictr_nb = {
+	.notifier_call = lower_callback,
+};
+
 static int __init capidrv_init(void)
 {
 	capi_profile profile;
@@ -2278,7 +2287,7 @@ static int __init capidrv_init(void)
 		return -EIO;
 	}
 
-	capi20_set_callback(&global.ap, lower_callback);
+	register_capictr_notifier(&capictr_nb);
 
 	errcode = capi20_get_profile(0, &profile);
 	if (errcode != CAPI_NOERROR) {
@@ -2300,6 +2309,7 @@ static int __init capidrv_init(void)
 
 static void __exit capidrv_exit(void)
 {
+	unregister_capictr_notifier(&capictr_nb);
 	capi20_release(&global.ap);
 
 	proc_exit();
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 9362a7a66aa1..e08914d33be1 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -44,12 +44,10 @@ module_param(showcapimsgs, uint, 0);
 
 /* ------------------------------------------------------------- */
 
-struct capi_notifier {
+struct capictr_event {
 	struct work_struct work;
-	unsigned int cmd;
+	unsigned int type;
 	u32 controller;
-	u16 applid;
-	u32 ncci;
 };
 
 /* ------------------------------------------------------------- */
@@ -71,6 +69,8 @@ struct capi_ctr *capi_controller[CAPI_MAXCONTR];
 
 static int ncontrollers;
 
+static BLOCKING_NOTIFIER_HEAD(ctr_notifier_list);
+
 /* -------- controller ref counting -------------------------------------- */
 
 static inline struct capi_ctr *
@@ -165,8 +165,6 @@ static void release_appl(struct capi_ctr *ctr, u16 applid)
 	capi_ctr_put(ctr);
 }
 
-/* -------- KCI_CONTRUP --------------------------------------- */
-
 static void notify_up(u32 contr)
 {
 	struct capi20_appl *ap;
@@ -188,16 +186,11 @@ static void notify_up(u32 contr)
 			if (!ap || ap->release_in_progress)
 				continue;
 			register_appl(ctr, applid, &ap->rparam);
-			if (ap->callback && !ap->release_in_progress)
-				ap->callback(KCI_CONTRUP, contr,
-					     &ctr->profile);
 		}
 	} else
 		printk(KERN_WARNING "%s: invalid contr %d\n", __func__, contr);
 }
 
-/* -------- KCI_CONTRDOWN ------------------------------------- */
-
 static void ctr_down(struct capi_ctr *ctr)
 {
 	struct capi20_appl *ap;
@@ -215,11 +208,8 @@ static void ctr_down(struct capi_ctr *ctr)
 
 	for (applid = 1; applid <= CAPI_MAXAPPL; applid++) {
 		ap = get_capi_appl_by_nr(applid);
-		if (ap && !ap->release_in_progress) {
-			if (ap->callback)
-				ap->callback(KCI_CONTRDOWN, ctr->cnr, NULL);
+		if (ap && !ap->release_in_progress)
 			capi_ctr_put(ctr);
-		}
 	}
 }
 
@@ -237,45 +227,63 @@ static void notify_down(u32 contr)
 		printk(KERN_WARNING "%s: invalid contr %d\n", __func__, contr);
 }
 
-static void notify_handler(struct work_struct *work)
+static int
+notify_handler(struct notifier_block *nb, unsigned long val, void *v)
 {
-	struct capi_notifier *np =
-		container_of(work, struct capi_notifier, work);
+	u32 contr = (long)v;
 
-	switch (np->cmd) {
-	case KCI_CONTRUP:
-		notify_up(np->controller);
+	switch (val) {
+	case CAPICTR_UP:
+		notify_up(contr);
 		break;
-	case KCI_CONTRDOWN:
-		notify_down(np->controller);
+	case CAPICTR_DOWN:
+		notify_down(contr);
 		break;
 	}
+	return NOTIFY_OK;
+}
+
+static void do_notify_work(struct work_struct *work)
+{
+	struct capictr_event *event =
+		container_of(work, struct capictr_event, work);
 
-	kfree(np);
+	blocking_notifier_call_chain(&ctr_notifier_list, event->type,
+				     (void *)(long)event->controller);
+	kfree(event);
 }
 
 /*
  * The notifier will result in adding/deleteing of devices. Devices can
  * only removed in user process, not in bh.
  */
-static int notify_push(unsigned int cmd, u32 controller, u16 applid, u32 ncci)
+static int notify_push(unsigned int event_type, u32 controller)
 {
-	struct capi_notifier *np = kmalloc(sizeof(*np), GFP_ATOMIC);
+	struct capictr_event *event = kmalloc(sizeof(*event), GFP_ATOMIC);
 
-	if (!np)
+	if (!event)
 		return -ENOMEM;
 
-	INIT_WORK(&np->work, notify_handler);
-	np->cmd = cmd;
-	np->controller = controller;
-	np->applid = applid;
-	np->ncci = ncci;
+	INIT_WORK(&event->work, do_notify_work);
+	event->type = event_type;
+	event->controller = controller;
 
-	schedule_work(&np->work);
+	schedule_work(&event->work);
 	return 0;
 }
 
-	
+int register_capictr_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&ctr_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_capictr_notifier);
+
+int unregister_capictr_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&ctr_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_capictr_notifier);
+
 /* -------- Receiver ------------------------------------------ */
 
 static void recv_handler(struct work_struct *work)
@@ -401,7 +409,7 @@ void capi_ctr_ready(struct capi_ctr *ctr)
 	printk(KERN_NOTICE "kcapi: controller [%03d] \"%s\" ready.\n",
 	       ctr->cnr, ctr->name);
 
-	notify_push(KCI_CONTRUP, ctr->cnr, 0, 0);
+	notify_push(CAPICTR_UP, ctr->cnr);
 }
 
 EXPORT_SYMBOL(capi_ctr_ready);
@@ -418,7 +426,7 @@ void capi_ctr_down(struct capi_ctr *ctr)
 {
 	printk(KERN_NOTICE "kcapi: controller [%03d] down.\n", ctr->cnr);
 
-	notify_push(KCI_CONTRDOWN, ctr->cnr, 0, 0);
+	notify_push(CAPICTR_DOWN, ctr->cnr);
 }
 
 EXPORT_SYMBOL(capi_ctr_down);
@@ -633,7 +641,6 @@ u16 capi20_register(struct capi20_appl *ap)
 	ap->nrecvdatapkt = 0;
 	ap->nsentctlpkt = 0;
 	ap->nsentdatapkt = 0;
-	ap->callback = NULL;
 	mutex_init(&ap->recv_mtx);
 	skb_queue_head_init(&ap->recv_queue);
 	INIT_WORK(&ap->recv_work, recv_handler);
@@ -1137,30 +1144,6 @@ int capi20_manufacturer(unsigned int cmd, void __user *data)
 
 EXPORT_SYMBOL(capi20_manufacturer);
 
-/* temporary hack */
-
-/**
- * capi20_set_callback() - set CAPI application notification callback function
- * @ap:		CAPI application descriptor structure.
- * @callback:	callback function (NULL to remove).
- *
- * If not NULL, the callback function will be called to notify the
- * application of the addition or removal of a controller.
- * The first argument (cmd) will tell whether the controller was added
- * (KCI_CONTRUP) or removed (KCI_CONTRDOWN).
- * The second argument (contr) will be the controller number.
- * For cmd==KCI_CONTRUP the third argument (data) will be a pointer to the
- * new controller's capability profile structure.
- */
-
-void capi20_set_callback(struct capi20_appl *ap,
-			 void (*callback) (unsigned int cmd, __u32 contr, void *data))
-{
-	ap->callback = callback;
-}
-
-EXPORT_SYMBOL(capi20_set_callback);
-
 /* ------------------------------------------------------------- */
 /* -------- Init & Cleanup ------------------------------------- */
 /* ------------------------------------------------------------- */
@@ -1169,10 +1152,17 @@ EXPORT_SYMBOL(capi20_set_callback);
  * init / exit functions
  */
 
+static struct notifier_block capictr_nb = {
+	.notifier_call = notify_handler,
+	.priority = INT_MAX,
+};
+
 static int __init kcapi_init(void)
 {
 	int err;
 
+	register_capictr_notifier(&capictr_nb);
+
 	err = cdebug_init();
 	if (!err)
 		kcapi_proc_init();
diff --git a/include/linux/kernelcapi.h b/include/linux/kernelcapi.h
index a53e932f80fb..9c2683929fd3 100644
--- a/include/linux/kernelcapi.h
+++ b/include/linux/kernelcapi.h
@@ -48,9 +48,7 @@ typedef struct kcapi_carddef {
 #include <linux/list.h>
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
-
-#define	KCI_CONTRUP	0	/* arg: struct capi_profile */
-#define	KCI_CONTRDOWN	1	/* arg: NULL */
+#include <linux/notifier.h>
 
 struct capi20_appl {
 	u16 applid;
@@ -67,11 +65,6 @@ struct capi20_appl {
 	struct sk_buff_head recv_queue;
 	struct work_struct recv_work;
 	int release_in_progress;
-
-	/* ugly hack to allow for notification of added/removed
-	 * controllers. The Right Way (tm) is known. XXX
-	 */
-	void (*callback) (unsigned int cmd, __u32 contr, void *data);
 };
 
 u16 capi20_isinstalled(void);
@@ -84,11 +77,11 @@ u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN]);
 u16 capi20_get_profile(u32 contr, struct capi_profile *profp);
 int capi20_manufacturer(unsigned int cmd, void __user *data);
 
-/* temporary hack XXX */
-void capi20_set_callback(struct capi20_appl *ap, 
-			 void (*callback) (unsigned int cmd, __u32 contr, void *data));
-
+#define CAPICTR_UP			0
+#define CAPICTR_DOWN			1
 
+int register_capictr_notifier(struct notifier_block *nb);
+int unregister_capictr_notifier(struct notifier_block *nb);
 
 #define CAPI_NOERROR                      0x0000
 
-- 
cgit v1.2.3


From 0ca3a017a7373a4545dd7b345a8a0cecc16bc7e2 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Mon, 8 Feb 2010 10:12:14 +0000
Subject: CAPI: Rework locking of controller data structures

This patch applies the mutex so far only protecting the controller list
to (almost) all accesses of controller data structures. It also reworks
waiting on state changes in old_capi_manufacturer so that it no longer
poll and holds a module reference to the controller owner while waiting
(the latter was partly done already). Modification and checking of the
blocked state remains racy by design, the caller is responsible for
dealing with this.

Signed-off-by: Jan Kiszka <jan.kiszka@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/kcapi.c      | 291 +++++++++++++++++++++++++++++------------
 drivers/isdn/capi/kcapi.h      |   5 +-
 drivers/isdn/capi/kcapi_proc.c |   5 +
 include/linux/isdn/capilli.h   |   5 +-
 4 files changed, 217 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index e08914d33be1..a99f7e3f8f51 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -61,11 +61,12 @@ static char capi_manufakturer[64] = "AVM Berlin";
 LIST_HEAD(capi_drivers);
 DEFINE_MUTEX(capi_drivers_lock);
 
+struct capi_ctr *capi_controller[CAPI_MAXCONTR];
+DEFINE_MUTEX(capi_controller_lock);
+
 static DEFINE_RWLOCK(application_lock);
-static DEFINE_MUTEX(controller_mutex);
 
 struct capi20_appl *capi_applications[CAPI_MAXAPPL];
-struct capi_ctr *capi_controller[CAPI_MAXCONTR];
 
 static int ncontrollers;
 
@@ -171,13 +172,15 @@ static void notify_up(u32 contr)
 	struct capi_ctr *ctr;
 	u16 applid;
 
+	mutex_lock(&capi_controller_lock);
+
 	if (showcapimsgs & 1)
 	        printk(KERN_DEBUG "kcapi: notify up contr %d\n", contr);
 
 	ctr = get_capi_ctr_by_nr(contr);
 	if (ctr) {
 		if (ctr->state == CAPI_CTR_RUNNING)
-			return;
+			goto unlock_out;
 
 		ctr->state = CAPI_CTR_RUNNING;
 
@@ -187,19 +190,24 @@ static void notify_up(u32 contr)
 				continue;
 			register_appl(ctr, applid, &ap->rparam);
 		}
+
+		wake_up_interruptible_all(&ctr->state_wait_queue);
 	} else
 		printk(KERN_WARNING "%s: invalid contr %d\n", __func__, contr);
+
+unlock_out:
+	mutex_unlock(&capi_controller_lock);
 }
 
-static void ctr_down(struct capi_ctr *ctr)
+static void ctr_down(struct capi_ctr *ctr, int new_state)
 {
 	struct capi20_appl *ap;
 	u16 applid;
 
-	if (ctr->state == CAPI_CTR_DETECTED)
+	if (ctr->state == CAPI_CTR_DETECTED || ctr->state == CAPI_CTR_DETACHED)
 		return;
 
-	ctr->state = CAPI_CTR_DETECTED;
+	ctr->state = new_state;
 
 	memset(ctr->manu, 0, sizeof(ctr->manu));
 	memset(&ctr->version, 0, sizeof(ctr->version));
@@ -211,20 +219,26 @@ static void ctr_down(struct capi_ctr *ctr)
 		if (ap && !ap->release_in_progress)
 			capi_ctr_put(ctr);
 	}
+
+	wake_up_interruptible_all(&ctr->state_wait_queue);
 }
 
 static void notify_down(u32 contr)
 {
 	struct capi_ctr *ctr;
 
+	mutex_lock(&capi_controller_lock);
+
 	if (showcapimsgs & 1)
 		printk(KERN_DEBUG "kcapi: notify down contr %d\n", contr);
 
 	ctr = get_capi_ctr_by_nr(contr);
 	if (ctr)
-		ctr_down(ctr);
+		ctr_down(ctr, CAPI_CTR_DETECTED);
 	else
 		printk(KERN_WARNING "%s: invalid contr %d\n", __func__, contr);
+
+	mutex_unlock(&capi_controller_lock);
 }
 
 static int
@@ -436,6 +450,9 @@ EXPORT_SYMBOL(capi_ctr_down);
  * @ctr:	controller descriptor structure.
  *
  * Called by hardware driver to stop data flow.
+ *
+ * Note: The caller is responsible for synchronizing concurrent state changes
+ * as well as invocations of capi_ctr_handle_message.
  */
 
 void capi_ctr_suspend_output(struct capi_ctr *ctr)
@@ -454,6 +471,9 @@ EXPORT_SYMBOL(capi_ctr_suspend_output);
  * @ctr:	controller descriptor structure.
  *
  * Called by hardware driver to resume data flow.
+ *
+ * Note: The caller is responsible for synchronizing concurrent state changes
+ * as well as invocations of capi_ctr_handle_message.
  */
 
 void capi_ctr_resume_output(struct capi_ctr *ctr)
@@ -481,21 +501,19 @@ int attach_capi_ctr(struct capi_ctr *ctr)
 {
 	int i;
 
-	mutex_lock(&controller_mutex);
+	mutex_lock(&capi_controller_lock);
 
 	for (i = 0; i < CAPI_MAXCONTR; i++) {
 		if (!capi_controller[i])
 			break;
 	}
 	if (i == CAPI_MAXCONTR) {
-		mutex_unlock(&controller_mutex);
+		mutex_unlock(&capi_controller_lock);
 		printk(KERN_ERR "kcapi: out of controller slots\n");
 	   	return -EBUSY;
 	}
 	capi_controller[i] = ctr;
 
-	mutex_unlock(&controller_mutex);
-
 	ctr->nrecvctlpkt = 0;
 	ctr->nrecvdatapkt = 0;
 	ctr->nsentctlpkt = 0;
@@ -504,11 +522,15 @@ int attach_capi_ctr(struct capi_ctr *ctr)
 	ctr->state = CAPI_CTR_DETECTED;
 	ctr->blocked = 0;
 	ctr->traceflag = showcapimsgs;
+	init_waitqueue_head(&ctr->state_wait_queue);
 
 	sprintf(ctr->procfn, "capi/controllers/%d", ctr->cnr);
 	ctr->procent = proc_create_data(ctr->procfn, 0, NULL, ctr->proc_fops, ctr);
 
 	ncontrollers++;
+
+	mutex_unlock(&capi_controller_lock);
+
 	printk(KERN_NOTICE "kcapi: controller [%03d]: %s attached\n",
 			ctr->cnr, ctr->name);
 	return 0;
@@ -527,19 +549,29 @@ EXPORT_SYMBOL(attach_capi_ctr);
 
 int detach_capi_ctr(struct capi_ctr *ctr)
 {
-	ctr_down(ctr);
+	int err = 0;
 
-	ncontrollers--;
+	mutex_lock(&capi_controller_lock);
 
-	if (ctr->procent) {
-		remove_proc_entry(ctr->procfn, NULL);
-		ctr->procent = NULL;
+	ctr_down(ctr, CAPI_CTR_DETACHED);
+
+	if (capi_controller[ctr->cnr - 1] != ctr) {
+		err = -EINVAL;
+		goto unlock_out;
 	}
 	capi_controller[ctr->cnr - 1] = NULL;
+	ncontrollers--;
+
+	if (ctr->procent)
+		remove_proc_entry(ctr->procfn, NULL);
+
 	printk(KERN_NOTICE "kcapi: controller [%03d]: %s unregistered\n",
 	       ctr->cnr, ctr->name);
 
-	return 0;
+unlock_out:
+	mutex_unlock(&capi_controller_lock);
+
+	return err;
 }
 
 EXPORT_SYMBOL(detach_capi_ctr);
@@ -589,13 +621,21 @@ EXPORT_SYMBOL(unregister_capi_driver);
 
 u16 capi20_isinstalled(void)
 {
+	u16 ret = CAPI_REGNOTINSTALLED;
 	int i;
-	for (i = 0; i < CAPI_MAXCONTR; i++) {
+
+	mutex_lock(&capi_controller_lock);
+
+	for (i = 0; i < CAPI_MAXCONTR; i++)
 		if (capi_controller[i] &&
-		    capi_controller[i]->state == CAPI_CTR_RUNNING)
-			return CAPI_NOERROR;
-	}
-	return CAPI_REGNOTINSTALLED;
+		    capi_controller[i]->state == CAPI_CTR_RUNNING) {
+			ret = CAPI_NOERROR;
+			break;
+		}
+
+	mutex_unlock(&capi_controller_lock);
+
+	return ret;
 }
 
 EXPORT_SYMBOL(capi20_isinstalled);
@@ -648,14 +688,16 @@ u16 capi20_register(struct capi20_appl *ap)
 
 	write_unlock_irqrestore(&application_lock, flags);
 	
-	mutex_lock(&controller_mutex);
+	mutex_lock(&capi_controller_lock);
+
 	for (i = 0; i < CAPI_MAXCONTR; i++) {
 		if (!capi_controller[i] ||
 		    capi_controller[i]->state != CAPI_CTR_RUNNING)
 			continue;
 		register_appl(capi_controller[i], applid, &ap->rparam);
 	}
-	mutex_unlock(&controller_mutex);
+
+	mutex_unlock(&capi_controller_lock);
 
 	if (showcapimsgs & 1) {
 		printk(KERN_DEBUG "kcapi: appl %d up\n", applid);
@@ -688,14 +730,16 @@ u16 capi20_release(struct capi20_appl *ap)
 	capi_applications[ap->applid - 1] = NULL;
 	write_unlock_irqrestore(&application_lock, flags);
 
-	mutex_lock(&controller_mutex);
+	mutex_lock(&capi_controller_lock);
+
 	for (i = 0; i < CAPI_MAXCONTR; i++) {
 		if (!capi_controller[i] ||
 		    capi_controller[i]->state != CAPI_CTR_RUNNING)
 			continue;
 		release_appl(capi_controller[i], ap->applid);
 	}
-	mutex_unlock(&controller_mutex);
+
+	mutex_unlock(&capi_controller_lock);
 
 	flush_scheduled_work();
 	skb_queue_purge(&ap->recv_queue);
@@ -734,6 +778,12 @@ u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb)
 	    || !capi_cmd_valid(CAPIMSG_COMMAND(skb->data))
 	    || !capi_subcmd_valid(CAPIMSG_SUBCOMMAND(skb->data)))
 		return CAPI_ILLCMDORSUBCMDORMSGTOSMALL;
+
+	/*
+	 * The controller reference is protected by the existence of the
+	 * application passed to us. We assume that the caller properly
+	 * synchronizes this service with capi20_release.
+	 */
 	ctr = get_capi_ctr_by_nr(CAPIMSG_CONTROLLER(skb->data));
 	if (!ctr || ctr->state != CAPI_CTR_RUNNING) {
 		ctr = get_capi_ctr_by_nr(1); /* XXX why? */
@@ -798,16 +848,24 @@ EXPORT_SYMBOL(capi20_put_message);
 u16 capi20_get_manufacturer(u32 contr, u8 *buf)
 {
 	struct capi_ctr *ctr;
+	u16 ret;
 
 	if (contr == 0) {
 		strlcpy(buf, capi_manufakturer, CAPI_MANUFACTURER_LEN);
 		return CAPI_NOERROR;
 	}
+
+	mutex_lock(&capi_controller_lock);
+
 	ctr = get_capi_ctr_by_nr(contr);
-	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
-		return CAPI_REGNOTINSTALLED;
-	strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN);
-	return CAPI_NOERROR;
+	if (ctr && ctr->state == CAPI_CTR_RUNNING) {
+		strlcpy(buf, ctr->manu, CAPI_MANUFACTURER_LEN);
+		ret = CAPI_NOERROR;
+	} else
+		ret = CAPI_REGNOTINSTALLED;
+
+	mutex_unlock(&capi_controller_lock);
+	return ret;
 }
 
 EXPORT_SYMBOL(capi20_get_manufacturer);
@@ -825,17 +883,24 @@ EXPORT_SYMBOL(capi20_get_manufacturer);
 u16 capi20_get_version(u32 contr, struct capi_version *verp)
 {
 	struct capi_ctr *ctr;
+	u16 ret;
 
 	if (contr == 0) {
 		*verp = driver_version;
 		return CAPI_NOERROR;
 	}
+
+	mutex_lock(&capi_controller_lock);
+
 	ctr = get_capi_ctr_by_nr(contr);
-	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
-		return CAPI_REGNOTINSTALLED;
+	if (ctr && ctr->state == CAPI_CTR_RUNNING) {
+		memcpy(verp, &ctr->version, sizeof(capi_version));
+		ret = CAPI_NOERROR;
+	} else
+		ret = CAPI_REGNOTINSTALLED;
 
-	memcpy(verp, &ctr->version, sizeof(capi_version));
-	return CAPI_NOERROR;
+	mutex_unlock(&capi_controller_lock);
+	return ret;
 }
 
 EXPORT_SYMBOL(capi20_get_version);
@@ -853,17 +918,24 @@ EXPORT_SYMBOL(capi20_get_version);
 u16 capi20_get_serial(u32 contr, u8 *serial)
 {
 	struct capi_ctr *ctr;
+	u16 ret;
 
 	if (contr == 0) {
 		strlcpy(serial, driver_serial, CAPI_SERIAL_LEN);
 		return CAPI_NOERROR;
 	}
+
+	mutex_lock(&capi_controller_lock);
+
 	ctr = get_capi_ctr_by_nr(contr);
-	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
-		return CAPI_REGNOTINSTALLED;
+	if (ctr && ctr->state == CAPI_CTR_RUNNING) {
+		strlcpy(serial, ctr->serial, CAPI_SERIAL_LEN);
+		ret = CAPI_NOERROR;
+	} else
+		ret = CAPI_REGNOTINSTALLED;
 
-	strlcpy(serial, ctr->serial, CAPI_SERIAL_LEN);
-	return CAPI_NOERROR;
+	mutex_unlock(&capi_controller_lock);
+	return ret;
 }
 
 EXPORT_SYMBOL(capi20_get_serial);
@@ -881,21 +953,64 @@ EXPORT_SYMBOL(capi20_get_serial);
 u16 capi20_get_profile(u32 contr, struct capi_profile *profp)
 {
 	struct capi_ctr *ctr;
+	u16 ret;
 
 	if (contr == 0) {
 		profp->ncontroller = ncontrollers;
 		return CAPI_NOERROR;
 	}
+
+	mutex_lock(&capi_controller_lock);
+
 	ctr = get_capi_ctr_by_nr(contr);
-	if (!ctr || ctr->state != CAPI_CTR_RUNNING)
-		return CAPI_REGNOTINSTALLED;
+	if (ctr && ctr->state == CAPI_CTR_RUNNING) {
+		memcpy(profp, &ctr->profile, sizeof(struct capi_profile));
+		ret = CAPI_NOERROR;
+	} else
+		ret = CAPI_REGNOTINSTALLED;
 
-	memcpy(profp, &ctr->profile, sizeof(struct capi_profile));
-	return CAPI_NOERROR;
+	mutex_unlock(&capi_controller_lock);
+	return ret;
 }
 
 EXPORT_SYMBOL(capi20_get_profile);
 
+/* Must be called with capi_controller_lock held. */
+static int wait_on_ctr_state(struct capi_ctr *ctr, unsigned int state)
+{
+	DEFINE_WAIT(wait);
+	int retval = 0;
+
+	ctr = capi_ctr_get(ctr);
+	if (!ctr)
+		return -ESRCH;
+
+	for (;;) {
+		prepare_to_wait(&ctr->state_wait_queue, &wait,
+				TASK_INTERRUPTIBLE);
+
+		if (ctr->state == state)
+			break;
+		if (ctr->state == CAPI_CTR_DETACHED) {
+			retval = -ESRCH;
+			break;
+		}
+		if (signal_pending(current)) {
+			retval = -EINTR;
+			break;
+		}
+
+		mutex_unlock(&capi_controller_lock);
+		schedule();
+		mutex_lock(&capi_controller_lock);
+	}
+	finish_wait(&ctr->state_wait_queue, &wait);
+
+	capi_ctr_put(ctr);
+
+	return retval;
+}
+
 #ifdef AVMB1_COMPAT
 static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 {
@@ -973,27 +1088,30 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 					   sizeof(avmb1_loadandconfigdef)))
 				return -EFAULT;
 		}
+
+		mutex_lock(&capi_controller_lock);
+
 		ctr = get_capi_ctr_by_nr(ldef.contr);
-		if (!ctr)
-			return -EINVAL;
-		ctr = capi_ctr_get(ctr);
-		if (!ctr)
-			return -ESRCH;
+		if (!ctr) {
+			retval = -EINVAL;
+			goto load_unlock_out;
+		}
+
 		if (ctr->load_firmware == NULL) {
 			printk(KERN_DEBUG "kcapi: load: no load function\n");
-			capi_ctr_put(ctr);
-			return -ESRCH;
+			retval = -ESRCH;
+			goto load_unlock_out;
 		}
 
 		if (ldef.t4file.len <= 0) {
 			printk(KERN_DEBUG "kcapi: load: invalid parameter: length of t4file is %d ?\n", ldef.t4file.len);
-			capi_ctr_put(ctr);
-			return -EINVAL;
+			retval = -EINVAL;
+			goto load_unlock_out;
 		}
 		if (ldef.t4file.data == NULL) {
 			printk(KERN_DEBUG "kcapi: load: invalid parameter: dataptr is 0\n");
-			capi_ctr_put(ctr);
-			return -EINVAL;
+			retval = -EINVAL;
+			goto load_unlock_out;
 		}
 
 		ldata.firmware.user = 1;
@@ -1005,52 +1123,47 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 
 		if (ctr->state != CAPI_CTR_DETECTED) {
 			printk(KERN_INFO "kcapi: load: contr=%d not in detect state\n", ldef.contr);
-			capi_ctr_put(ctr);
-			return -EBUSY;
+			retval = -EBUSY;
+			goto load_unlock_out;
 		}
 		ctr->state = CAPI_CTR_LOADING;
 
 		retval = ctr->load_firmware(ctr, &ldata);
-
 		if (retval) {
 			ctr->state = CAPI_CTR_DETECTED;
-			capi_ctr_put(ctr);
-			return retval;
+			goto load_unlock_out;
 		}
 
-		while (ctr->state != CAPI_CTR_RUNNING) {
-
-			msleep_interruptible(100);	/* 0.1 sec */
+		retval = wait_on_ctr_state(ctr, CAPI_CTR_RUNNING);
 
-			if (signal_pending(current)) {
-				capi_ctr_put(ctr);
-				return -EINTR;
-			}
-		}
-		capi_ctr_put(ctr);
-		return 0;
+load_unlock_out:
+		mutex_unlock(&capi_controller_lock);
+		return retval;
 
 	case AVMB1_RESETCARD:
 		if (copy_from_user(&rdef, data, sizeof(avmb1_resetdef)))
 			return -EFAULT;
+
+		retval = 0;
+
+		mutex_lock(&capi_controller_lock);
+
 		ctr = get_capi_ctr_by_nr(rdef.contr);
-		if (!ctr)
-			return -ESRCH;
+		if (!ctr) {
+			retval = -ESRCH;
+			goto reset_unlock_out;
+		}
 
 		if (ctr->state == CAPI_CTR_DETECTED)
-			return 0;
+			goto reset_unlock_out;
 
 		ctr->reset_ctr(ctr);
 
-		while (ctr->state > CAPI_CTR_DETECTED) {
-
-			msleep_interruptible(100);	/* 0.1 sec */
-
-			if (signal_pending(current))
-				return -EINTR;
-		}
-		return 0;
+		retval = wait_on_ctr_state(ctr, CAPI_CTR_DETECTED);
 
+reset_unlock_out:
+		mutex_unlock(&capi_controller_lock);
+		return retval;
 	}
 	return -EINVAL;
 }
@@ -1068,6 +1181,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 int capi20_manufacturer(unsigned int cmd, void __user *data)
 {
 	struct capi_ctr *ctr;
+	int retval;
 
 	switch (cmd) {
 #ifdef AVMB1_COMPAT
@@ -1085,14 +1199,20 @@ int capi20_manufacturer(unsigned int cmd, void __user *data)
 		if (copy_from_user(&fdef, data, sizeof(kcapi_flagdef)))
 			return -EFAULT;
 
+		mutex_lock(&capi_controller_lock);
+
 		ctr = get_capi_ctr_by_nr(fdef.contr);
-		if (!ctr)
-			return -ESRCH;
+		if (ctr) {
+			ctr->traceflag = fdef.flag;
+			printk(KERN_INFO "kcapi: contr [%03d] set trace=%d\n",
+			       ctr->cnr, ctr->traceflag);
+			retval = 0;
+		} else
+			retval = -ESRCH;
+
+		mutex_unlock(&capi_controller_lock);
 
-		ctr->traceflag = fdef.flag;
-		printk(KERN_INFO "kcapi: contr [%03d] set trace=%d\n",
-		       ctr->cnr, ctr->traceflag);
-		return 0;
+		return retval;
 	}
 	case KCAPI_CMD_ADDCARD:
 	{
@@ -1100,7 +1220,6 @@ int capi20_manufacturer(unsigned int cmd, void __user *data)
 		struct capi_driver *driver = NULL;
 		capicardparams cparams;
 		kcapi_carddef cdef;
-		int retval;
 
 		if ((retval = copy_from_user(&cdef, data, sizeof(cdef))))
 			return retval;
diff --git a/drivers/isdn/capi/kcapi.h b/drivers/isdn/capi/kcapi.h
index 07c58500fe48..f4620b38ec51 100644
--- a/drivers/isdn/capi/kcapi.h
+++ b/drivers/isdn/capi/kcapi.h
@@ -24,6 +24,7 @@ printk(KERN_DEBUG "%s: " format "\n" , __func__ , ## arg); \
 #endif
 
 enum {
+	CAPI_CTR_DETACHED = 0,
 	CAPI_CTR_DETECTED = 1,
 	CAPI_CTR_LOADING  = 2,
 	CAPI_CTR_RUNNING  = 3,
@@ -32,8 +33,10 @@ enum {
 extern struct list_head capi_drivers;
 extern struct mutex capi_drivers_lock;
 
-extern struct capi20_appl *capi_applications[CAPI_MAXAPPL];
 extern struct capi_ctr *capi_controller[CAPI_MAXCONTR];
+extern struct mutex capi_controller_lock;
+
+extern struct capi20_appl *capi_applications[CAPI_MAXAPPL];
 
 #ifdef CONFIG_PROC_FS
 
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 71b07610ff31..3e6e17a24389 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -35,7 +35,10 @@ static char *state2str(unsigned short state)
 // ---------------------------------------------------------------------------
 
 static void *controller_start(struct seq_file *seq, loff_t *pos)
+	__acquires(capi_controller_lock)
 {
+	mutex_lock(&capi_controller_lock);
+
 	if (*pos < CAPI_MAXCONTR)
 		return &capi_controller[*pos];
 
@@ -52,7 +55,9 @@ static void *controller_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void controller_stop(struct seq_file *seq, void *v)
+	__releases(capi_controller_lock)
 {
+	mutex_unlock(&capi_controller_lock);
 }
 
 static int controller_show(struct seq_file *seq, void *v)
diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h
index 856f38eddd78..11b57c485854 100644
--- a/include/linux/isdn/capilli.h
+++ b/include/linux/isdn/capilli.h
@@ -66,9 +66,10 @@ struct capi_ctr {
 	unsigned long nsentdatapkt;
 
 	int cnr;				/* controller number */
-	volatile unsigned short state;		/* controller state */
-	volatile int blocked;			/* output blocked */
+	unsigned short state;			/* controller state */
+	int blocked;				/* output blocked */
 	int traceflag;				/* capi trace */
+	wait_queue_head_t state_wait_queue;
 
 	struct proc_dir_entry *procent;
         char procfn[128];
-- 
cgit v1.2.3


From 43cf38eb5cea91245502df3fcee4dbfc1c74dd1c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Feb 2010 14:38:57 +0900
Subject: percpu: add __percpu sparse annotations to core kernel subsystems

Add __percpu sparse annotations to core subsystems.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-mm@kvack.org
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric Biederman <ebiederm@xmission.com>
---
 include/linux/blktrace_api.h   |  4 ++--
 include/linux/genhd.h          |  2 +-
 include/linux/kexec.h          |  2 +-
 include/linux/mmzone.h         |  2 +-
 include/linux/module.h         |  2 +-
 include/linux/percpu_counter.h |  2 +-
 include/linux/srcu.h           |  2 +-
 kernel/kexec.c                 |  2 +-
 kernel/sched.c                 |  4 ++--
 kernel/stop_machine.c          |  2 +-
 mm/percpu.c                    | 18 ++++++++++--------
 11 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 3b73b9992b26..416bf62d6d46 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -150,8 +150,8 @@ struct blk_user_trace_setup {
 struct blk_trace {
 	int trace_state;
 	struct rchan *rchan;
-	unsigned long *sequence;
-	unsigned char *msg_data;
+	unsigned long __percpu *sequence;
+	unsigned char __percpu *msg_data;
 	u16 act_mask;
 	u64 start_lba;
 	u64 end_lba;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 9717081c75ad..56b50514ab25 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -101,7 +101,7 @@ struct hd_struct {
 	unsigned long stamp;
 	int in_flight[2];
 #ifdef	CONFIG_SMP
-	struct disk_stats *dkstats;
+	struct disk_stats __percpu *dkstats;
 #else
 	struct disk_stats dkstats;
 #endif
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c356b6914ffd..03e8e8dbc577 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -199,7 +199,7 @@ extern struct kimage *kexec_crash_image;
  */
 extern struct resource crashk_res;
 typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4];
-extern note_buf_t *crash_notes;
+extern note_buf_t __percpu *crash_notes;
 extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7874201a3556..41acd4bf7664 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -301,7 +301,7 @@ struct zone {
 	unsigned long		min_unmapped_pages;
 	unsigned long		min_slab_pages;
 #endif
-	struct per_cpu_pageset	*pageset;
+	struct per_cpu_pageset __percpu *pageset;
 	/*
 	 * free areas of different sizes
 	 */
diff --git a/include/linux/module.h b/include/linux/module.h
index 7e74ae0051cc..dd618eb026aa 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -365,7 +365,7 @@ struct module
 
 	struct module_ref {
 		int count;
-	} *refptr;
+	} __percpu *refptr;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index a7684a513994..9bd103c844ee 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -21,7 +21,7 @@ struct percpu_counter {
 #ifdef CONFIG_HOTPLUG_CPU
 	struct list_head list;	/* All percpu_counters are on a list */
 #endif
-	s32 *counters;
+	s32 __percpu *counters;
 };
 
 extern int percpu_counter_batch;
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4765d97dcafb..41eedccc962c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -33,7 +33,7 @@ struct srcu_struct_array {
 
 struct srcu_struct {
 	int completed;
-	struct srcu_struct_array *per_cpu_ref;
+	struct srcu_struct_array __percpu *per_cpu_ref;
 	struct mutex mutex;
 };
 
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ef077fb73155..87ebe8adc474 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -41,7 +41,7 @@
 #include <asm/sections.h>
 
 /* Per cpu memory for storing cpu states in case of system crash. */
-note_buf_t* crash_notes;
+note_buf_t __percpu *crash_notes;
 
 /* vmcoreinfo stuff */
 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
diff --git a/kernel/sched.c b/kernel/sched.c
index 3a8fb30a91b1..978edfd35a96 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1566,7 +1566,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static __read_mostly unsigned long *update_shares_data;
+static __read_mostly unsigned long __percpu *update_shares_data;
 
 static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 
@@ -10683,7 +10683,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
-	u64 *cpuusage;
+	u64 __percpu *cpuusage;
 	struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
 	struct cpuacct *parent;
 };
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 912823e2a11b..9bb9fb1bd79c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -45,7 +45,7 @@ static int refcount;
 static struct workqueue_struct *stop_machine_wq;
 static struct stop_machine_data active, idle;
 static const struct cpumask *active_cpus;
-static void *stop_machine_work;
+static void __percpu *stop_machine_work;
 
 static void set_state(enum stopmachine_state newstate)
 {
diff --git a/mm/percpu.c b/mm/percpu.c
index b336638d20e7..768419d44ad7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,13 +80,15 @@
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
 #ifndef __addr_to_pcpu_ptr
 #define __addr_to_pcpu_ptr(addr)					\
-	(void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr	\
-		 + (unsigned long)__per_cpu_start)
+	(void __percpu *)((unsigned long)(addr) -			\
+			  (unsigned long)pcpu_base_addr	+		\
+			  (unsigned long)__per_cpu_start)
 #endif
 #ifndef __pcpu_ptr_to_addr
 #define __pcpu_ptr_to_addr(ptr)						\
-	(void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr	\
-		 - (unsigned long)__per_cpu_start)
+	(void __force *)((unsigned long)(ptr) +				\
+			 (unsigned long)pcpu_base_addr -		\
+			 (unsigned long)__per_cpu_start)
 #endif
 
 struct pcpu_chunk {
@@ -1065,7 +1067,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-static void *pcpu_alloc(size_t size, size_t align, bool reserved)
+static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
 {
 	static int warn_limit = 10;
 	struct pcpu_chunk *chunk;
@@ -1194,7 +1196,7 @@ fail_unlock_mutex:
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-void *__alloc_percpu(size_t size, size_t align)
+void __percpu *__alloc_percpu(size_t size, size_t align)
 {
 	return pcpu_alloc(size, align, false);
 }
@@ -1215,7 +1217,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-void *__alloc_reserved_percpu(size_t size, size_t align)
+void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
 {
 	return pcpu_alloc(size, align, true);
 }
@@ -1267,7 +1269,7 @@ static void pcpu_reclaim(struct work_struct *work)
  * CONTEXT:
  * Can be called from atomic context.
  */
-void free_percpu(void *ptr)
+void free_percpu(void __percpu *ptr)
 {
 	void *addr;
 	struct pcpu_chunk *chunk;
-- 
cgit v1.2.3


From 003cb608a2533d0927a83bc4e07e46d7a622eda9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Feb 2010 14:39:01 +0900
Subject: percpu: add __percpu sparse annotations to fs

Add __percpu sparse annotations to fs.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Alex Elder <aelder@sgi.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ext4.h            | 2 +-
 fs/nfs/iostat.h           | 4 ++--
 fs/xfs/xfs_mount.h        | 2 +-
 include/linux/mount.h     | 2 +-
 include/linux/nfs_fs_sb.h | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 874d169a193e..4cedc91ec59d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1014,7 +1014,7 @@ struct ext4_sb_info {
 	atomic_t s_lock_busy;
 
 	/* locality groups */
-	struct ext4_locality_group *s_locality_groups;
+	struct ext4_locality_group __percpu *s_locality_groups;
 
 	/* for write statistics */
 	unsigned long s_sectors_written_start;
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 46d779abafd3..1d8d5c813b01 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -57,12 +57,12 @@ static inline void nfs_add_fscache_stats(struct inode *inode,
 }
 #endif
 
-static inline struct nfs_iostats *nfs_alloc_iostats(void)
+static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
 {
 	return alloc_percpu(struct nfs_iostats);
 }
 
-static inline void nfs_free_iostats(struct nfs_iostats *stats)
+static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
 {
 	if (stats != NULL)
 		free_percpu(stats);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1df7e4502967..24c88870cdb2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -243,7 +243,7 @@ typedef struct xfs_mount {
 	struct xfs_qmops	*m_qm_ops;	/* vector of XQM ops */
 	atomic_t		m_active_trans;	/* number trans frozen */
 #ifdef HAVE_PERCPU_SB
-	xfs_icsb_cnts_t		*m_sb_cnts;	/* per-cpu superblock counters */
+	xfs_icsb_cnts_t __percpu *m_sb_cnts;	/* per-cpu superblock counters */
 	unsigned long		m_icsb_counters; /* disabled per-cpu counters */
 	struct notifier_block	m_icsb_notifier; /* hotplug cpu notifier */
 	struct mutex		m_icsb_mutex;	/* balancer sync lock */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5d5275364867..b5f43a34ef88 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -66,7 +66,7 @@ struct vfsmount {
 	int mnt_pinned;
 	int mnt_ghosts;
 #ifdef CONFIG_SMP
-	int *mnt_writers;
+	int __percpu *mnt_writers;
 #else
 	int mnt_writers;
 #endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 34fc6be5bfcf..6a2e44fd75e2 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -105,7 +105,7 @@ struct nfs_server {
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nlm_host		*nlm_host;	/* NLM client handle */
-	struct nfs_iostats *	io_stats;	/* I/O statistics */
+	struct nfs_iostats __percpu *io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	atomic_long_t		writeback;	/* number of writeback pages */
 	int			flags;		/* various flags */
-- 
cgit v1.2.3


From a29d8b8e2d811a24bbe49215a0f0c536b72ebc18 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Feb 2010 14:39:15 +0900
Subject: percpu: add __percpu sparse annotations to what's left

Add __percpu sparse annotations to places which didn't make it in one
of the previous patches.  All converions are trivial.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Neil Brown <neilb@suse.de>
---
 crypto/cryptd.c                  | 2 +-
 drivers/acpi/processor_perflib.c | 2 +-
 drivers/dma/dmaengine.c          | 2 +-
 drivers/edac/amd64_edac.c        | 2 +-
 drivers/md/raid5.c               | 2 +-
 drivers/md/raid5.h               | 2 +-
 include/acpi/processor.h         | 2 +-
 include/linux/dmaengine.h        | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 704c14115323..ef71318976c7 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -31,7 +31,7 @@ struct cryptd_cpu_queue {
 };
 
 struct cryptd_queue {
-	struct cryptd_cpu_queue *cpu_queue;
+	struct cryptd_cpu_queue __percpu *cpu_queue;
 };
 
 struct cryptd_instance_ctx {
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 2cabadcc4d8c..8c6a6497d7f3 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -557,7 +557,7 @@ end:
 }
 
 int acpi_processor_preregister_performance(
-		struct acpi_processor_performance *performance)
+		struct acpi_processor_performance __percpu *performance)
 {
 	int count, count_target;
 	int retval = 0;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 6f51a0a7a8bb..4eadd98cea53 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -284,7 +284,7 @@ struct dma_chan_tbl_ent {
 /**
  * channel_table - percpu lookup table for memory-to-memory offload providers
  */
-static struct dma_chan_tbl_ent *channel_table[DMA_TX_TYPE_END];
+static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
 
 static int __init dma_channel_table_init(void)
 {
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 000dc67b85b7..7b36c8838b2f 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -13,7 +13,7 @@ module_param(report_gart_errors, int, 0644);
 static int ecc_enable_override;
 module_param(ecc_enable_override, int, 0644);
 
-static struct msr *msrs;
+static struct msr __percpu *msrs;
 
 /* Lookup table for all possible MC control instances */
 struct amd64_pvt;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e84204eb12df..77cb3ab4bf45 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4680,7 +4680,7 @@ static int raid5_alloc_percpu(raid5_conf_t *conf)
 {
 	unsigned long cpu;
 	struct page *spare_page;
-	struct raid5_percpu *allcpus;
+	struct raid5_percpu __percpu *allcpus;
 	void *scribble;
 	int err;
 
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index dd708359b451..0f86f5e36724 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -405,7 +405,7 @@ struct raid5_private_data {
 					      * lists and performing address
 					      * conversions
 					      */
-	} *percpu;
+	} __percpu *percpu;
 	size_t			scribble_len; /* size of scribble region must be
 					       * associated with conf to handle
 					       * cpu hotplug while reshaping
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 0ea5ef4eb6a9..477544fd8e9e 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -238,7 +238,7 @@ struct acpi_processor_errata {
 
 extern int acpi_processor_preregister_performance(struct
 						  acpi_processor_performance
-						  *performance);
+						  __percpu *performance);
 
 extern int acpi_processor_register_performance(struct acpi_processor_performance
 					       *performance, unsigned int cpu);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 78784982b33e..21fd9b7c6a40 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -162,7 +162,7 @@ struct dma_chan {
 	struct dma_chan_dev *dev;
 
 	struct list_head device_node;
-	struct dma_chan_percpu *local;
+	struct dma_chan_percpu __percpu *local;
 	int client_count;
 	int table_count;
 	void *private;
-- 
cgit v1.2.3


From 580e0ad21d6d6f932461d24b47041e3dd499c23f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 16 Feb 2010 18:40:35 -0800
Subject: core: Move early_res from arch/x86 to kernel/

This makes the range reservation feature available to other
architectures.

-v2: add get_max_mapped, max_pfn_mapped only defined in x86...
     to fix PPC compiling
-v3: according to hpa, add CONFIG_HAVE_EARLY_RES
-v4: fix typo about EARLY_RES in config

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B7B5723.4070009@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Kconfig                 |   3 +
 arch/x86/include/asm/e820.h      |   2 +-
 arch/x86/include/asm/early_res.h |  21 --
 arch/x86/kernel/Makefile         |   2 +-
 arch/x86/kernel/e820.c           |  10 +-
 arch/x86/kernel/early_res.c      | 521 ---------------------------------------
 include/linux/early_res.h        |  22 ++
 kernel/Makefile                  |   1 +
 kernel/early_res.c               | 513 ++++++++++++++++++++++++++++++++++++++
 9 files changed, 550 insertions(+), 545 deletions(-)
 delete mode 100644 arch/x86/include/asm/early_res.h
 delete mode 100644 arch/x86/kernel/early_res.c
 create mode 100644 include/linux/early_res.h
 create mode 100644 kernel/early_res.c

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 29f9efb74fc7..0e9f8b10de52 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -183,6 +183,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config HAVE_EARLY_RES
+	def_bool y
+
 config HAVE_INTEL_TXT
 	def_bool y
 	depends on EXPERIMENTAL && DMAR && ACPI
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index a8299e134437..0e22296790d3 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -112,7 +112,7 @@ extern unsigned long end_user_pfn;
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
-#include <asm/early_res.h>
+#include <linux/early_res.h>
 
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
diff --git a/arch/x86/include/asm/early_res.h b/arch/x86/include/asm/early_res.h
deleted file mode 100644
index 9758f3df9dad..000000000000
--- a/arch/x86/include/asm/early_res.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _ASM_X86_EARLY_RES_H
-#define _ASM_X86_EARLY_RES_H
-#ifdef __KERNEL__
-
-extern void reserve_early(u64 start, u64 end, char *name);
-extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
-extern void free_early(u64 start, u64 end);
-extern void early_res_to_bootmem(u64 start, u64 end);
-
-void reserve_early_without_check(u64 start, u64 end, char *name);
-u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
-			 u64 size, u64 align);
-u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
-			 u64 *sizep, u64 align);
-u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align);
-#include <linux/range.h>
-int get_free_all_memory_range(struct range **rangep, int nodeid);
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_EARLY_RES_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f5fb9f0b6277..d87f09bc5a52 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
-obj-y			+= bootflag.o e820.o early_res.o
+obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o io_delay.o rtc.o
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 36918d8463ab..740b440fbd73 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -17,7 +17,6 @@
 #include <linux/firmware-map.h>
 
 #include <asm/e820.h>
-#include <asm/early_res.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
 
@@ -752,6 +751,15 @@ u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
 {
 	return find_e820_area(start, end, size, align);
 }
+
+u64 __init get_max_mapped(void)
+{
+	u64 end = max_pfn_mapped;
+
+	end <<= PAGE_SHIFT;
+
+	return end;
+}
 /*
  * Find next free range after *start
  */
diff --git a/arch/x86/kernel/early_res.c b/arch/x86/kernel/early_res.c
deleted file mode 100644
index 1458dc022343..000000000000
--- a/arch/x86/kernel/early_res.c
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * early_res, could be used to replace bootmem
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-
-#include <asm/early_res.h>
-
-/*
- * Early reserved memory areas.
- */
-/*
- * need to make sure this one is bigger enough before
- * find_fw_memmap_area could be used
- */
-#define MAX_EARLY_RES_X 32
-
-struct early_res {
-	u64 start, end;
-	char name[15];
-	char overlap_ok;
-};
-static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata;
-
-static int max_early_res __initdata = MAX_EARLY_RES_X;
-static struct early_res *early_res __initdata = &early_res_x[0];
-static int early_res_count __initdata;
-
-static int __init find_overlapped_early(u64 start, u64 end)
-{
-	int i;
-	struct early_res *r;
-
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		r = &early_res[i];
-		if (end > r->start && start < r->end)
-			break;
-	}
-
-	return i;
-}
-
-/*
- * Drop the i-th range from the early reservation map,
- * by copying any higher ranges down one over it, and
- * clearing what had been the last slot.
- */
-static void __init drop_range(int i)
-{
-	int j;
-
-	for (j = i + 1; j < max_early_res && early_res[j].end; j++)
-		;
-
-	memmove(&early_res[i], &early_res[i + 1],
-	       (j - 1 - i) * sizeof(struct early_res));
-
-	early_res[j - 1].end = 0;
-	early_res_count--;
-}
-
-/*
- * Split any existing ranges that:
- *  1) are marked 'overlap_ok', and
- *  2) overlap with the stated range [start, end)
- * into whatever portion (if any) of the existing range is entirely
- * below or entirely above the stated range.  Drop the portion
- * of the existing range that overlaps with the stated range,
- * which will allow the caller of this routine to then add that
- * stated range without conflicting with any existing range.
- */
-static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
-{
-	int i;
-	struct early_res *r;
-	u64 lower_start, lower_end;
-	u64 upper_start, upper_end;
-	char name[15];
-
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		r = &early_res[i];
-
-		/* Continue past non-overlapping ranges */
-		if (end <= r->start || start >= r->end)
-			continue;
-
-		/*
-		 * Leave non-ok overlaps as is; let caller
-		 * panic "Overlapping early reservations"
-		 * when it hits this overlap.
-		 */
-		if (!r->overlap_ok)
-			return;
-
-		/*
-		 * We have an ok overlap.  We will drop it from the early
-		 * reservation map, and add back in any non-overlapping
-		 * portions (lower or upper) as separate, overlap_ok,
-		 * non-overlapping ranges.
-		 */
-
-		/* 1. Note any non-overlapping (lower or upper) ranges. */
-		strncpy(name, r->name, sizeof(name) - 1);
-
-		lower_start = lower_end = 0;
-		upper_start = upper_end = 0;
-		if (r->start < start) {
-			lower_start = r->start;
-			lower_end = start;
-		}
-		if (r->end > end) {
-			upper_start = end;
-			upper_end = r->end;
-		}
-
-		/* 2. Drop the original ok overlapping range */
-		drop_range(i);
-
-		i--;		/* resume for-loop on copied down entry */
-
-		/* 3. Add back in any non-overlapping ranges. */
-		if (lower_end)
-			reserve_early_overlap_ok(lower_start, lower_end, name);
-		if (upper_end)
-			reserve_early_overlap_ok(upper_start, upper_end, name);
-	}
-}
-
-static void __init __reserve_early(u64 start, u64 end, char *name,
-						int overlap_ok)
-{
-	int i;
-	struct early_res *r;
-
-	i = find_overlapped_early(start, end);
-	if (i >= max_early_res)
-		panic("Too many early reservations");
-	r = &early_res[i];
-	if (r->end)
-		panic("Overlapping early reservations "
-		      "%llx-%llx %s to %llx-%llx %s\n",
-		      start, end - 1, name ? name : "", r->start,
-		      r->end - 1, r->name);
-	r->start = start;
-	r->end = end;
-	r->overlap_ok = overlap_ok;
-	if (name)
-		strncpy(r->name, name, sizeof(r->name) - 1);
-	early_res_count++;
-}
-
-/*
- * A few early reservtations come here.
- *
- * The 'overlap_ok' in the name of this routine does -not- mean it
- * is ok for these reservations to overlap an earlier reservation.
- * Rather it means that it is ok for subsequent reservations to
- * overlap this one.
- *
- * Use this entry point to reserve early ranges when you are doing
- * so out of "Paranoia", reserving perhaps more memory than you need,
- * just in case, and don't mind a subsequent overlapping reservation
- * that is known to be needed.
- *
- * The drop_overlaps_that_are_ok() call here isn't really needed.
- * It would be needed if we had two colliding 'overlap_ok'
- * reservations, so that the second such would not panic on the
- * overlap with the first.  We don't have any such as of this
- * writing, but might as well tolerate such if it happens in
- * the future.
- */
-void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
-{
-	drop_overlaps_that_are_ok(start, end);
-	__reserve_early(start, end, name, 1);
-}
-
-u64 __init __weak find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
-{
-	panic("should have find_fw_memmap_area defined with arch");
-
-	return -1ULL;
-}
-
-static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end)
-{
-	u64 start, end, size, mem;
-	struct early_res *new;
-
-	/* do we have enough slots left ? */
-	if ((max_early_res - early_res_count) > max(max_early_res/8, 2))
-		return;
-
-	/* double it */
-	mem = -1ULL;
-	size = sizeof(struct early_res) * max_early_res * 2;
-	if (early_res == early_res_x)
-		start = 0;
-	else
-		start = early_res[0].end;
-	end = ex_start;
-	if (start + size < end)
-		mem = find_fw_memmap_area(start, end, size,
-					 sizeof(struct early_res));
-	if (mem == -1ULL) {
-		start = ex_end;
-		end = max_pfn_mapped << PAGE_SHIFT;
-		if (start + size < end)
-			mem = find_fw_memmap_area(start, end, size,
-						 sizeof(struct early_res));
-	}
-	if (mem == -1ULL)
-		panic("can not find more space for early_res array");
-
-	new = __va(mem);
-	/* save the first one for own */
-	new[0].start = mem;
-	new[0].end = mem + size;
-	new[0].overlap_ok = 0;
-	/* copy old to new */
-	if (early_res == early_res_x) {
-		memcpy(&new[1], &early_res[0],
-			 sizeof(struct early_res) * max_early_res);
-		memset(&new[max_early_res+1], 0,
-			 sizeof(struct early_res) * (max_early_res - 1));
-		early_res_count++;
-	} else {
-		memcpy(&new[1], &early_res[1],
-			 sizeof(struct early_res) * (max_early_res - 1));
-		memset(&new[max_early_res], 0,
-			 sizeof(struct early_res) * max_early_res);
-	}
-	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
-	early_res = new;
-	max_early_res *= 2;
-	printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n",
-		max_early_res, mem, mem + size - 1);
-}
-
-/*
- * Most early reservations come here.
- *
- * We first have drop_overlaps_that_are_ok() drop any pre-existing
- * 'overlap_ok' ranges, so that we can then reserve this memory
- * range without risk of panic'ing on an overlapping overlap_ok
- * early reservation.
- */
-void __init reserve_early(u64 start, u64 end, char *name)
-{
-	if (start >= end)
-		return;
-
-	__check_and_double_early_res(start, end);
-
-	drop_overlaps_that_are_ok(start, end);
-	__reserve_early(start, end, name, 0);
-}
-
-void __init reserve_early_without_check(u64 start, u64 end, char *name)
-{
-	struct early_res *r;
-
-	if (start >= end)
-		return;
-
-	__check_and_double_early_res(start, end);
-
-	r = &early_res[early_res_count];
-
-	r->start = start;
-	r->end = end;
-	r->overlap_ok = 0;
-	if (name)
-		strncpy(r->name, name, sizeof(r->name) - 1);
-	early_res_count++;
-}
-
-void __init free_early(u64 start, u64 end)
-{
-	struct early_res *r;
-	int i;
-
-	i = find_overlapped_early(start, end);
-	r = &early_res[i];
-	if (i >= max_early_res || r->end != end || r->start != start)
-		panic("free_early on not reserved area: %llx-%llx!",
-			 start, end - 1);
-
-	drop_range(i);
-}
-
-#ifdef CONFIG_NO_BOOTMEM
-static void __init subtract_early_res(struct range *range, int az)
-{
-	int i, count;
-	u64 final_start, final_end;
-	int idx = 0;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	/* need to skip first one ?*/
-	if (early_res != early_res_x)
-		idx = 1;
-
-#define DEBUG_PRINT_EARLY_RES 1
-
-#if DEBUG_PRINT_EARLY_RES
-	printk(KERN_INFO "Subtract (%d early reservations)\n", count);
-#endif
-	for (i = idx; i < count; i++) {
-		struct early_res *r = &early_res[i];
-#if DEBUG_PRINT_EARLY_RES
-		printk(KERN_INFO "  #%d [%010llx - %010llx] %15s\n", i,
-			r->start, r->end, r->name);
-#endif
-		final_start = PFN_DOWN(r->start);
-		final_end = PFN_UP(r->end);
-		if (final_start >= final_end)
-			continue;
-		subtract_range(range, az, final_start, final_end);
-	}
-
-}
-
-int __init get_free_all_memory_range(struct range **rangep, int nodeid)
-{
-	int i, count;
-	u64 start = 0, end;
-	u64 size;
-	u64 mem;
-	struct range *range;
-	int nr_range;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	count *= 2;
-
-	size = sizeof(struct range) * count;
-#ifdef MAX_DMA32_PFN
-	if (max_pfn_mapped > MAX_DMA32_PFN)
-		start = MAX_DMA32_PFN << PAGE_SHIFT;
-#endif
-	end = max_pfn_mapped << PAGE_SHIFT;
-	mem = find_fw_memmap_area(start, end, size, sizeof(struct range));
-	if (mem == -1ULL)
-		panic("can not find more space for range free");
-
-	range = __va(mem);
-	/* use early_node_map[] and early_res to get range array at first */
-	memset(range, 0, size);
-	nr_range = 0;
-
-	/* need to go over early_node_map to find out good range for node */
-	nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
-#ifdef CONFIG_X86_32
-	subtract_range(range, count, max_low_pfn, -1ULL);
-#endif
-	subtract_early_res(range, count);
-	nr_range = clean_sort_range(range, count);
-
-	/* need to clear it ? */
-	if (nodeid == MAX_NUMNODES) {
-		memset(&early_res[0], 0,
-			 sizeof(struct early_res) * max_early_res);
-		early_res = NULL;
-		max_early_res = 0;
-	}
-
-	*rangep = range;
-	return nr_range;
-}
-#else
-void __init early_res_to_bootmem(u64 start, u64 end)
-{
-	int i, count;
-	u64 final_start, final_end;
-	int idx = 0;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	/* need to skip first one ?*/
-	if (early_res != early_res_x)
-		idx = 1;
-
-	printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n",
-			 count - idx, max_early_res, start, end);
-	for (i = idx; i < count; i++) {
-		struct early_res *r = &early_res[i];
-		printk(KERN_INFO "  #%d [%010llx - %010llx] %16s", i,
-			r->start, r->end, r->name);
-		final_start = max(start, r->start);
-		final_end = min(end, r->end);
-		if (final_start >= final_end) {
-			printk(KERN_CONT "\n");
-			continue;
-		}
-		printk(KERN_CONT " ==> [%010llx - %010llx]\n",
-			final_start, final_end);
-		reserve_bootmem_generic(final_start, final_end - final_start,
-				BOOTMEM_DEFAULT);
-	}
-	/* clear them */
-	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
-	early_res = NULL;
-	max_early_res = 0;
-	early_res_count = 0;
-}
-#endif
-
-/* Check for already reserved areas */
-static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
-{
-	int i;
-	u64 addr = *addrp;
-	int changed = 0;
-	struct early_res *r;
-again:
-	i = find_overlapped_early(addr, addr + size);
-	r = &early_res[i];
-	if (i < max_early_res && r->end) {
-		*addrp = addr = round_up(r->end, align);
-		changed = 1;
-		goto again;
-	}
-	return changed;
-}
-
-/* Check for already reserved areas */
-static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
-{
-	int i;
-	u64 addr = *addrp, last;
-	u64 size = *sizep;
-	int changed = 0;
-again:
-	last = addr + size;
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		struct early_res *r = &early_res[i];
-		if (last > r->start && addr < r->start) {
-			size = r->start - addr;
-			changed = 1;
-			goto again;
-		}
-		if (last > r->end && addr < r->end) {
-			addr = round_up(r->end, align);
-			size = last - addr;
-			changed = 1;
-			goto again;
-		}
-		if (last <= r->end && addr >= r->start) {
-			(*sizep)++;
-			return 0;
-		}
-	}
-	if (changed) {
-		*addrp = addr;
-		*sizep = size;
-	}
-	return changed;
-}
-
-/*
- * Find a free area with specified alignment in a specific range.
- * only with the area.between start to end is active range from early_node_map
- * so they are good as RAM
- */
-u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
-			 u64 size, u64 align)
-{
-	u64 addr, last;
-
-	addr = round_up(ei_start, align);
-	if (addr < start)
-		addr = round_up(start, align);
-	if (addr >= ei_last)
-		goto out;
-	while (bad_addr(&addr, size, align) && addr+size <= ei_last)
-		;
-	last = addr + size;
-	if (last > ei_last)
-		goto out;
-	if (last > end)
-		goto out;
-
-	return addr;
-
-out:
-	return -1ULL;
-}
-
-u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
-			 u64 *sizep, u64 align)
-{
-	u64 addr, last;
-
-	addr = round_up(ei_start, align);
-	if (addr < start)
-		addr = round_up(start, align);
-	if (addr >= ei_last)
-		goto out;
-	*sizep = ei_last - addr;
-	while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last)
-		;
-	last = addr + *sizep;
-	if (last > ei_last)
-		goto out;
-
-	return addr;
-
-out:
-	return -1ULL;
-}
diff --git a/include/linux/early_res.h b/include/linux/early_res.h
new file mode 100644
index 000000000000..50f7663bb8b1
--- /dev/null
+++ b/include/linux/early_res.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_EARLY_RES_H
+#define _LINUX_EARLY_RES_H
+#ifdef __KERNEL__
+
+extern void reserve_early(u64 start, u64 end, char *name);
+extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
+extern void free_early(u64 start, u64 end);
+extern void early_res_to_bootmem(u64 start, u64 end);
+
+void reserve_early_without_check(u64 start, u64 end, char *name);
+u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+			 u64 size, u64 align);
+u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
+			 u64 *sizep, u64 align);
+u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align);
+u64 get_max_mapped(void);
+#include <linux/range.h>
+int get_free_all_memory_range(struct range **rangep, int nodeid);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_EARLY_RES_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index ad47330ccf32..1292b863d667 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
 	    async.o range.o
+obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
 obj-y += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/early_res.c b/kernel/early_res.c
new file mode 100644
index 000000000000..aa5494ac4462
--- /dev/null
+++ b/kernel/early_res.c
@@ -0,0 +1,513 @@
+/*
+ * early_res, could be used to replace bootmem
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/early_res.h>
+
+/*
+ * Early reserved memory areas.
+ */
+/*
+ * need to make sure this one is bigger enough before
+ * find_fw_memmap_area could be used
+ */
+#define MAX_EARLY_RES_X 32
+
+struct early_res {
+	u64 start, end;
+	char name[15];
+	char overlap_ok;
+};
+static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata;
+
+static int max_early_res __initdata = MAX_EARLY_RES_X;
+static struct early_res *early_res __initdata = &early_res_x[0];
+static int early_res_count __initdata;
+
+static int __init find_overlapped_early(u64 start, u64 end)
+{
+	int i;
+	struct early_res *r;
+
+	for (i = 0; i < max_early_res && early_res[i].end; i++) {
+		r = &early_res[i];
+		if (end > r->start && start < r->end)
+			break;
+	}
+
+	return i;
+}
+
+/*
+ * Drop the i-th range from the early reservation map,
+ * by copying any higher ranges down one over it, and
+ * clearing what had been the last slot.
+ */
+static void __init drop_range(int i)
+{
+	int j;
+
+	for (j = i + 1; j < max_early_res && early_res[j].end; j++)
+		;
+
+	memmove(&early_res[i], &early_res[i + 1],
+	       (j - 1 - i) * sizeof(struct early_res));
+
+	early_res[j - 1].end = 0;
+	early_res_count--;
+}
+
+/*
+ * Split any existing ranges that:
+ *  1) are marked 'overlap_ok', and
+ *  2) overlap with the stated range [start, end)
+ * into whatever portion (if any) of the existing range is entirely
+ * below or entirely above the stated range.  Drop the portion
+ * of the existing range that overlaps with the stated range,
+ * which will allow the caller of this routine to then add that
+ * stated range without conflicting with any existing range.
+ */
+static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
+{
+	int i;
+	struct early_res *r;
+	u64 lower_start, lower_end;
+	u64 upper_start, upper_end;
+	char name[15];
+
+	for (i = 0; i < max_early_res && early_res[i].end; i++) {
+		r = &early_res[i];
+
+		/* Continue past non-overlapping ranges */
+		if (end <= r->start || start >= r->end)
+			continue;
+
+		/*
+		 * Leave non-ok overlaps as is; let caller
+		 * panic "Overlapping early reservations"
+		 * when it hits this overlap.
+		 */
+		if (!r->overlap_ok)
+			return;
+
+		/*
+		 * We have an ok overlap.  We will drop it from the early
+		 * reservation map, and add back in any non-overlapping
+		 * portions (lower or upper) as separate, overlap_ok,
+		 * non-overlapping ranges.
+		 */
+
+		/* 1. Note any non-overlapping (lower or upper) ranges. */
+		strncpy(name, r->name, sizeof(name) - 1);
+
+		lower_start = lower_end = 0;
+		upper_start = upper_end = 0;
+		if (r->start < start) {
+			lower_start = r->start;
+			lower_end = start;
+		}
+		if (r->end > end) {
+			upper_start = end;
+			upper_end = r->end;
+		}
+
+		/* 2. Drop the original ok overlapping range */
+		drop_range(i);
+
+		i--;		/* resume for-loop on copied down entry */
+
+		/* 3. Add back in any non-overlapping ranges. */
+		if (lower_end)
+			reserve_early_overlap_ok(lower_start, lower_end, name);
+		if (upper_end)
+			reserve_early_overlap_ok(upper_start, upper_end, name);
+	}
+}
+
+static void __init __reserve_early(u64 start, u64 end, char *name,
+						int overlap_ok)
+{
+	int i;
+	struct early_res *r;
+
+	i = find_overlapped_early(start, end);
+	if (i >= max_early_res)
+		panic("Too many early reservations");
+	r = &early_res[i];
+	if (r->end)
+		panic("Overlapping early reservations "
+		      "%llx-%llx %s to %llx-%llx %s\n",
+		      start, end - 1, name ? name : "", r->start,
+		      r->end - 1, r->name);
+	r->start = start;
+	r->end = end;
+	r->overlap_ok = overlap_ok;
+	if (name)
+		strncpy(r->name, name, sizeof(r->name) - 1);
+	early_res_count++;
+}
+
+/*
+ * A few early reservtations come here.
+ *
+ * The 'overlap_ok' in the name of this routine does -not- mean it
+ * is ok for these reservations to overlap an earlier reservation.
+ * Rather it means that it is ok for subsequent reservations to
+ * overlap this one.
+ *
+ * Use this entry point to reserve early ranges when you are doing
+ * so out of "Paranoia", reserving perhaps more memory than you need,
+ * just in case, and don't mind a subsequent overlapping reservation
+ * that is known to be needed.
+ *
+ * The drop_overlaps_that_are_ok() call here isn't really needed.
+ * It would be needed if we had two colliding 'overlap_ok'
+ * reservations, so that the second such would not panic on the
+ * overlap with the first.  We don't have any such as of this
+ * writing, but might as well tolerate such if it happens in
+ * the future.
+ */
+void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
+{
+	drop_overlaps_that_are_ok(start, end);
+	__reserve_early(start, end, name, 1);
+}
+
+static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end)
+{
+	u64 start, end, size, mem;
+	struct early_res *new;
+
+	/* do we have enough slots left ? */
+	if ((max_early_res - early_res_count) > max(max_early_res/8, 2))
+		return;
+
+	/* double it */
+	mem = -1ULL;
+	size = sizeof(struct early_res) * max_early_res * 2;
+	if (early_res == early_res_x)
+		start = 0;
+	else
+		start = early_res[0].end;
+	end = ex_start;
+	if (start + size < end)
+		mem = find_fw_memmap_area(start, end, size,
+					 sizeof(struct early_res));
+	if (mem == -1ULL) {
+		start = ex_end;
+		end = get_max_mapped();
+		if (start + size < end)
+			mem = find_fw_memmap_area(start, end, size,
+						 sizeof(struct early_res));
+	}
+	if (mem == -1ULL)
+		panic("can not find more space for early_res array");
+
+	new = __va(mem);
+	/* save the first one for own */
+	new[0].start = mem;
+	new[0].end = mem + size;
+	new[0].overlap_ok = 0;
+	/* copy old to new */
+	if (early_res == early_res_x) {
+		memcpy(&new[1], &early_res[0],
+			 sizeof(struct early_res) * max_early_res);
+		memset(&new[max_early_res+1], 0,
+			 sizeof(struct early_res) * (max_early_res - 1));
+		early_res_count++;
+	} else {
+		memcpy(&new[1], &early_res[1],
+			 sizeof(struct early_res) * (max_early_res - 1));
+		memset(&new[max_early_res], 0,
+			 sizeof(struct early_res) * max_early_res);
+	}
+	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
+	early_res = new;
+	max_early_res *= 2;
+	printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n",
+		max_early_res, mem, mem + size - 1);
+}
+
+/*
+ * Most early reservations come here.
+ *
+ * We first have drop_overlaps_that_are_ok() drop any pre-existing
+ * 'overlap_ok' ranges, so that we can then reserve this memory
+ * range without risk of panic'ing on an overlapping overlap_ok
+ * early reservation.
+ */
+void __init reserve_early(u64 start, u64 end, char *name)
+{
+	if (start >= end)
+		return;
+
+	__check_and_double_early_res(start, end);
+
+	drop_overlaps_that_are_ok(start, end);
+	__reserve_early(start, end, name, 0);
+}
+
+void __init reserve_early_without_check(u64 start, u64 end, char *name)
+{
+	struct early_res *r;
+
+	if (start >= end)
+		return;
+
+	__check_and_double_early_res(start, end);
+
+	r = &early_res[early_res_count];
+
+	r->start = start;
+	r->end = end;
+	r->overlap_ok = 0;
+	if (name)
+		strncpy(r->name, name, sizeof(r->name) - 1);
+	early_res_count++;
+}
+
+void __init free_early(u64 start, u64 end)
+{
+	struct early_res *r;
+	int i;
+
+	i = find_overlapped_early(start, end);
+	r = &early_res[i];
+	if (i >= max_early_res || r->end != end || r->start != start)
+		panic("free_early on not reserved area: %llx-%llx!",
+			 start, end - 1);
+
+	drop_range(i);
+}
+
+#ifdef CONFIG_NO_BOOTMEM
+static void __init subtract_early_res(struct range *range, int az)
+{
+	int i, count;
+	u64 final_start, final_end;
+	int idx = 0;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	/* need to skip first one ?*/
+	if (early_res != early_res_x)
+		idx = 1;
+
+#define DEBUG_PRINT_EARLY_RES 1
+
+#if DEBUG_PRINT_EARLY_RES
+	printk(KERN_INFO "Subtract (%d early reservations)\n", count);
+#endif
+	for (i = idx; i < count; i++) {
+		struct early_res *r = &early_res[i];
+#if DEBUG_PRINT_EARLY_RES
+		printk(KERN_INFO "  #%d [%010llx - %010llx] %15s\n", i,
+			r->start, r->end, r->name);
+#endif
+		final_start = PFN_DOWN(r->start);
+		final_end = PFN_UP(r->end);
+		if (final_start >= final_end)
+			continue;
+		subtract_range(range, az, final_start, final_end);
+	}
+
+}
+
+int __init get_free_all_memory_range(struct range **rangep, int nodeid)
+{
+	int i, count;
+	u64 start = 0, end;
+	u64 size;
+	u64 mem;
+	struct range *range;
+	int nr_range;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	count *= 2;
+
+	size = sizeof(struct range) * count;
+	end = get_max_mapped();
+#ifdef MAX_DMA32_PFN
+	if (end > (MAX_DMA32_PFN << PAGE_SHIFT))
+		start = MAX_DMA32_PFN << PAGE_SHIFT;
+#endif
+	mem = find_fw_memmap_area(start, end, size, sizeof(struct range));
+	if (mem == -1ULL)
+		panic("can not find more space for range free");
+
+	range = __va(mem);
+	/* use early_node_map[] and early_res to get range array at first */
+	memset(range, 0, size);
+	nr_range = 0;
+
+	/* need to go over early_node_map to find out good range for node */
+	nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
+#ifdef CONFIG_X86_32
+	subtract_range(range, count, max_low_pfn, -1ULL);
+#endif
+	subtract_early_res(range, count);
+	nr_range = clean_sort_range(range, count);
+
+	/* need to clear it ? */
+	if (nodeid == MAX_NUMNODES) {
+		memset(&early_res[0], 0,
+			 sizeof(struct early_res) * max_early_res);
+		early_res = NULL;
+		max_early_res = 0;
+	}
+
+	*rangep = range;
+	return nr_range;
+}
+#else
+void __init early_res_to_bootmem(u64 start, u64 end)
+{
+	int i, count;
+	u64 final_start, final_end;
+	int idx = 0;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	/* need to skip first one ?*/
+	if (early_res != early_res_x)
+		idx = 1;
+
+	printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n",
+			 count - idx, max_early_res, start, end);
+	for (i = idx; i < count; i++) {
+		struct early_res *r = &early_res[i];
+		printk(KERN_INFO "  #%d [%010llx - %010llx] %16s", i,
+			r->start, r->end, r->name);
+		final_start = max(start, r->start);
+		final_end = min(end, r->end);
+		if (final_start >= final_end) {
+			printk(KERN_CONT "\n");
+			continue;
+		}
+		printk(KERN_CONT " ==> [%010llx - %010llx]\n",
+			final_start, final_end);
+		reserve_bootmem_generic(final_start, final_end - final_start,
+				BOOTMEM_DEFAULT);
+	}
+	/* clear them */
+	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
+	early_res = NULL;
+	max_early_res = 0;
+	early_res_count = 0;
+}
+#endif
+
+/* Check for already reserved areas */
+static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
+{
+	int i;
+	u64 addr = *addrp;
+	int changed = 0;
+	struct early_res *r;
+again:
+	i = find_overlapped_early(addr, addr + size);
+	r = &early_res[i];
+	if (i < max_early_res && r->end) {
+		*addrp = addr = round_up(r->end, align);
+		changed = 1;
+		goto again;
+	}
+	return changed;
+}
+
+/* Check for already reserved areas */
+static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
+{
+	int i;
+	u64 addr = *addrp, last;
+	u64 size = *sizep;
+	int changed = 0;
+again:
+	last = addr + size;
+	for (i = 0; i < max_early_res && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		if (last > r->start && addr < r->start) {
+			size = r->start - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last > r->end && addr < r->end) {
+			addr = round_up(r->end, align);
+			size = last - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last <= r->end && addr >= r->start) {
+			(*sizep)++;
+			return 0;
+		}
+	}
+	if (changed) {
+		*addrp = addr;
+		*sizep = size;
+	}
+	return changed;
+}
+
+/*
+ * Find a free area with specified alignment in a specific range.
+ * only with the area.between start to end is active range from early_node_map
+ * so they are good as RAM
+ */
+u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+			 u64 size, u64 align)
+{
+	u64 addr, last;
+
+	addr = round_up(ei_start, align);
+	if (addr < start)
+		addr = round_up(start, align);
+	if (addr >= ei_last)
+		goto out;
+	while (bad_addr(&addr, size, align) && addr+size <= ei_last)
+		;
+	last = addr + size;
+	if (last > ei_last)
+		goto out;
+	if (last > end)
+		goto out;
+
+	return addr;
+
+out:
+	return -1ULL;
+}
+
+u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
+			 u64 *sizep, u64 align)
+{
+	u64 addr, last;
+
+	addr = round_up(ei_start, align);
+	if (addr < start)
+		addr = round_up(start, align);
+	if (addr >= ei_last)
+		goto out;
+	*sizep = ei_last - addr;
+	while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last)
+		;
+	last = addr + *sizep;
+	if (last > ei_last)
+		goto out;
+
+	return addr;
+
+out:
+	return -1ULL;
+}
-- 
cgit v1.2.3


From 47d742752df4c1088589d4424840bc761613ab2a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 16 Feb 2010 15:21:08 +0000
Subject: percpu: add __percpu sparse annotations to net drivers

Add __percpu sparse annotations to net drivers.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/chelsio/sge.c  |  2 +-
 drivers/net/loopback.c     | 16 +++++++++-------
 drivers/net/veth.c         |  4 ++--
 include/linux/if_macvlan.h |  2 +-
 4 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 109d2783e4d8..8e12505ac3aa 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -267,7 +267,7 @@ struct sge {
 	struct sk_buff	*espibug_skb[MAX_NPORTS];
 	u32		sge_control;	/* shadow value of sge control reg */
 	struct sge_intr_counts stats;
-	struct sge_port_stats *port_stats[MAX_NPORTS];
+	struct sge_port_stats __percpu *port_stats[MAX_NPORTS];
 	struct sched	*tx_sched;
 	struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned_in_smp;
 };
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index b9fcc9819837..72b7949c91b1 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -72,7 +72,8 @@ struct pcpu_lstats {
 static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 				 struct net_device *dev)
 {
-	struct pcpu_lstats *pcpu_lstats, *lb_stats;
+	struct pcpu_lstats __percpu *pcpu_lstats;
+	struct pcpu_lstats *lb_stats;
 	int len;
 
 	skb_orphan(skb);
@@ -80,7 +81,7 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 	skb->protocol = eth_type_trans(skb, dev);
 
 	/* it's OK to use per_cpu_ptr() because BHs are off */
-	pcpu_lstats = dev->ml_priv;
+	pcpu_lstats = (void __percpu __force *)dev->ml_priv;
 	lb_stats = this_cpu_ptr(pcpu_lstats);
 
 	len = skb->len;
@@ -95,14 +96,14 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 
 static struct net_device_stats *loopback_get_stats(struct net_device *dev)
 {
-	const struct pcpu_lstats *pcpu_lstats;
+	const struct pcpu_lstats __percpu *pcpu_lstats;
 	struct net_device_stats *stats = &dev->stats;
 	unsigned long bytes = 0;
 	unsigned long packets = 0;
 	unsigned long drops = 0;
 	int i;
 
-	pcpu_lstats = dev->ml_priv;
+	pcpu_lstats = (void __percpu __force *)dev->ml_priv;
 	for_each_possible_cpu(i) {
 		const struct pcpu_lstats *lb_stats;
 
@@ -135,19 +136,20 @@ static const struct ethtool_ops loopback_ethtool_ops = {
 
 static int loopback_dev_init(struct net_device *dev)
 {
-	struct pcpu_lstats *lstats;
+	struct pcpu_lstats __percpu *lstats;
 
 	lstats = alloc_percpu(struct pcpu_lstats);
 	if (!lstats)
 		return -ENOMEM;
 
-	dev->ml_priv = lstats;
+	dev->ml_priv = (void __force *)lstats;
 	return 0;
 }
 
 static void loopback_dev_free(struct net_device *dev)
 {
-	struct pcpu_lstats *lstats = dev->ml_priv;
+	struct pcpu_lstats __percpu *lstats =
+		(void __percpu __force *)dev->ml_priv;
 
 	free_percpu(lstats);
 	free_netdev(dev);
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 3a15de56df9c..35609e64f6fd 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -34,7 +34,7 @@ struct veth_net_stats {
 
 struct veth_priv {
 	struct net_device *peer;
-	struct veth_net_stats *stats;
+	struct veth_net_stats __percpu *stats;
 	unsigned ip_summed;
 };
 
@@ -263,7 +263,7 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu)
 
 static int veth_dev_init(struct net_device *dev)
 {
-	struct veth_net_stats *stats;
+	struct veth_net_stats __percpu *stats;
 	struct veth_priv *priv;
 
 	stats = alloc_percpu(struct veth_net_stats);
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 51f1512045e9..f9cb9ba1475d 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -30,7 +30,7 @@ struct macvlan_dev {
 	struct hlist_node	hlist;
 	struct macvlan_port	*port;
 	struct net_device	*lowerdev;
-	struct macvlan_rx_stats *rx_stats;
+	struct macvlan_rx_stats __percpu *rx_stats;
 	enum macvlan_mode	mode;
 	int (*receive)(struct sk_buff *skb);
 	int (*forward)(struct net_device *dev, struct sk_buff *skb);
-- 
cgit v1.2.3


From e7b8e675d9c71b868b66f62f725a948047514719 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 26 Jan 2010 04:40:03 -0500
Subject: tracing: Unify arch_syscall_addr() implementations

Most implementations of arch_syscall_addr() are the same, so create a
default version in common code and move the one piece that differs (the
syscall table) to asm/syscall.h.  New arch ports don't have to waste
time copying & pasting this simple function.

The s390/sparc versions need to be different, so document why.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1264498803-17278-1-git-send-email-vapier@gentoo.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 Documentation/trace/ftrace-design.txt |  5 ++---
 arch/s390/include/asm/syscall.h       |  7 +++++++
 arch/s390/kernel/ftrace.c             | 10 ----------
 arch/sh/include/asm/syscall.h         |  2 ++
 arch/sh/kernel/ftrace.c               |  9 ---------
 arch/sparc/include/asm/syscall.h      |  7 +++++++
 arch/sparc/kernel/ftrace.c            | 11 -----------
 arch/x86/include/asm/syscall.h        |  2 ++
 arch/x86/kernel/ftrace.c              | 10 ----------
 include/linux/ftrace.h                |  6 ++++++
 kernel/trace/trace_syscalls.c         |  5 +++++
 11 files changed, 31 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index 239f14b2b55a..99df1101d2a5 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -218,11 +218,10 @@ HAVE_SYSCALL_TRACEPOINTS
 
 You need very few things to get the syscalls tracing in an arch.
 
+- Support HAVE_ARCH_TRACEHOOK (see arch/Kconfig).
 - Have a NR_syscalls variable in <asm/unistd.h> that provides the number
   of syscalls supported by the arch.
-- Implement arch_syscall_addr() that resolves a syscall address from a
-  syscall number.
-- Support the TIF_SYSCALL_TRACEPOINT thread flags
+- Support the TIF_SYSCALL_TRACEPOINT thread flags.
 - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace
   in the ptrace syscalls tracing path.
 - Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index e0a73d3eb837..8429686951f9 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -15,6 +15,13 @@
 #include <linux/sched.h>
 #include <asm/ptrace.h>
 
+/*
+ * The syscall table always contains 32 bit pointers since we know that the
+ * address of the function to be called is (way) below 4GB.  So the "int"
+ * type here is what we want [need] for both 32 bit and 64 bit systems.
+ */
+extern const unsigned int sys_call_table[];
+
 static inline long syscall_get_nr(struct task_struct *task,
 				  struct pt_regs *regs)
 {
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 5a82bc68193e..9e69449e77ad 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -200,13 +200,3 @@ out:
 	return parent;
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#ifdef CONFIG_FTRACE_SYSCALLS
-
-extern unsigned int sys_call_table[];
-
-unsigned long __init arch_syscall_addr(int nr)
-{
-	return (unsigned long)sys_call_table[nr];
-}
-#endif
diff --git a/arch/sh/include/asm/syscall.h b/arch/sh/include/asm/syscall.h
index 6a381429ee9d..aa7777bdc370 100644
--- a/arch/sh/include/asm/syscall.h
+++ b/arch/sh/include/asm/syscall.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_SH_SYSCALL_H
 #define __ASM_SH_SYSCALL_H
 
+extern const unsigned long sys_call_table[];
+
 #ifdef CONFIG_SUPERH32
 # include "syscall_32.h"
 #else
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index a48cdedc73b5..30e13196d35b 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -399,12 +399,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 	}
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#ifdef CONFIG_FTRACE_SYSCALLS
-extern unsigned long *sys_call_table;
-
-unsigned long __init arch_syscall_addr(int nr)
-{
-	return (unsigned long)sys_call_table[nr];
-}
-#endif /* CONFIG_FTRACE_SYSCALLS */
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 7486c605e23c..025a02ad2e31 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -5,6 +5,13 @@
 #include <linux/sched.h>
 #include <asm/ptrace.h>
 
+/*
+ * The syscall table always contains 32 bit pointers since we know that the
+ * address of the function to be called is (way) below 4GB.  So the "int"
+ * type here is what we want [need] for both 32 bit and 64 bit systems.
+ */
+extern const unsigned int sys_call_table[];
+
 /* The system call number is given by the user in %g1 */
 static inline long syscall_get_nr(struct task_struct *task,
 				  struct pt_regs *regs)
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 29973daa9930..9103a56b39e8 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -91,14 +91,3 @@ int __init ftrace_dyn_arch_init(void *data)
 	return 0;
 }
 #endif
-
-#ifdef CONFIG_FTRACE_SYSCALLS
-
-extern unsigned int sys_call_table[];
-
-unsigned long __init arch_syscall_addr(int nr)
-{
-	return (unsigned long)sys_call_table[nr];
-}
-
-#endif
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 8d33bc5462d1..c4a348f7bd43 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,6 +16,8 @@
 #include <linux/sched.h>
 #include <linux/err.h>
 
+extern const unsigned long sys_call_table[];
+
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
  * This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 309689245431..0d93a941934c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -484,13 +484,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	}
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#ifdef CONFIG_FTRACE_SYSCALLS
-
-extern unsigned long *sys_call_table;
-
-unsigned long __init arch_syscall_addr(int nr)
-{
-	return (unsigned long)(&sys_call_table)[nr];
-}
-#endif
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0b4f97d24d7f..1cbb36f2759c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -511,4 +511,10 @@ static inline void trace_hw_branch_oops(void) {}
 
 #endif /* CONFIG_HW_BRANCH_TRACER */
 
+#ifdef CONFIG_FTRACE_SYSCALLS
+
+unsigned long arch_syscall_addr(int nr);
+
+#endif /* CONFIG_FTRACE_SYSCALLS */
+
 #endif /* _LINUX_FTRACE_H */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 49cea70fbf6d..ecf00782b46c 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -394,6 +394,11 @@ int init_syscall_trace(struct ftrace_event_call *call)
 	return id;
 }
 
+unsigned long __init arch_syscall_addr(int nr)
+{
+	return (unsigned long)sys_call_table[nr];
+}
+
 int __init init_ftrace_syscalls(void)
 {
 	struct syscall_metadata *meta;
-- 
cgit v1.2.3


From 60b86755929e1a7e9038c8d860a8491cfdf8d93a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 17 Feb 2010 10:30:23 +0000
Subject: usbnet: Convert dev(dbg|err|warn|info) macros to netdev_<level>

These macros are too similar to the dev_<level> equivalents
but take a usbnet * argument.  Convert them to the recently
introduced netdev_<level> macros and remove the old macros.

The old macros had "\n" appended to the format string.
Add the "\n" to the converted uses.

Some existing uses of the dev<foo> macros in cdc_eem.c
probably mistakenly had trailing "\n".  No "\n" added there.

Fix net1080 this/other log message inversion.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix.c            |  97 ++++++------
 drivers/net/usb/cdc_eem.c         |  10 +-
 drivers/net/usb/cdc_ether.c       |  18 +--
 drivers/net/usb/dm9601.c          |  49 +++---
 drivers/net/usb/int51x1.c         |  15 +-
 drivers/net/usb/net1080.c         | 101 ++++++------
 drivers/net/usb/rndis_host.c      |  10 +-
 drivers/net/usb/smsc95xx.c        | 188 +++++++++++-----------
 drivers/net/usb/usbnet.c          | 162 +++++++++----------
 drivers/net/wireless/rndis_wlan.c | 325 ++++++++++++++++++++------------------
 include/linux/usb/usbnet.h        |  21 ---
 11 files changed, 507 insertions(+), 489 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index f02551713b13..f605204de3e5 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -184,8 +184,8 @@ static int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 	void *buf;
 	int err = -ENOMEM;
 
-	devdbg(dev,"asix_read_cmd() cmd=0x%02x value=0x%04x index=0x%04x size=%d",
-		cmd, value, index, size);
+	netdev_dbg(dev->net, "asix_read_cmd() cmd=0x%02x value=0x%04x index=0x%04x size=%d\n",
+		   cmd, value, index, size);
 
 	buf = kmalloc(size, GFP_KERNEL);
 	if (!buf)
@@ -217,8 +217,8 @@ static int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 	void *buf = NULL;
 	int err = -ENOMEM;
 
-	devdbg(dev,"asix_write_cmd() cmd=0x%02x value=0x%04x index=0x%04x size=%d",
-		cmd, value, index, size);
+	netdev_dbg(dev->net, "asix_write_cmd() cmd=0x%02x value=0x%04x index=0x%04x size=%d\n",
+		   cmd, value, index, size);
 
 	if (data) {
 		buf = kmalloc(size, GFP_KERNEL);
@@ -264,15 +264,15 @@ asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 	int status;
 	struct urb *urb;
 
-	devdbg(dev,"asix_write_cmd_async() cmd=0x%02x value=0x%04x index=0x%04x size=%d",
-		cmd, value, index, size);
+	netdev_dbg(dev->net, "asix_write_cmd_async() cmd=0x%02x value=0x%04x index=0x%04x size=%d\n",
+		   cmd, value, index, size);
 	if ((urb = usb_alloc_urb(0, GFP_ATOMIC)) == NULL) {
-		deverr(dev, "Error allocating URB in write_cmd_async!");
+		netdev_err(dev->net, "Error allocating URB in write_cmd_async!\n");
 		return;
 	}
 
 	if ((req = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC)) == NULL) {
-		deverr(dev, "Failed to allocate memory for control request");
+		netdev_err(dev->net, "Failed to allocate memory for control request\n");
 		usb_free_urb(urb);
 		return;
 	}
@@ -289,8 +289,8 @@ asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 			     asix_async_cmd_callback, req);
 
 	if((status = usb_submit_urb(urb, GFP_ATOMIC)) < 0) {
-		deverr(dev, "Error submitting the control message: status=%d",
-				status);
+		netdev_err(dev->net, "Error submitting the control message: status=%d\n",
+			   status);
 		kfree(req);
 		usb_free_urb(urb);
 	}
@@ -314,7 +314,7 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	while (skb->len > 0) {
 		if ((short)(header & 0x0000ffff) !=
 		    ~((short)((header & 0xffff0000) >> 16))) {
-			deverr(dev,"asix_rx_fixup() Bad Header Length");
+			netdev_err(dev->net, "asix_rx_fixup() Bad Header Length\n");
 		}
 		/* get the packet length */
 		size = (u16) (header & 0x0000ffff);
@@ -322,7 +322,8 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		if ((skb->len) - ((size + 1) & 0xfffe) == 0)
 			return 2;
 		if (size > ETH_FRAME_LEN) {
-			deverr(dev,"asix_rx_fixup() Bad RX Length %d", size);
+			netdev_err(dev->net, "asix_rx_fixup() Bad RX Length %d\n",
+				   size);
 			return 0;
 		}
 		ax_skb = skb_clone(skb, GFP_ATOMIC);
@@ -348,7 +349,8 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	}
 
 	if (skb->len < 0) {
-		deverr(dev,"asix_rx_fixup() Bad SKB Length %d", skb->len);
+		netdev_err(dev->net, "asix_rx_fixup() Bad SKB Length %d\n",
+			   skb->len);
 		return 0;
 	}
 	return 1;
@@ -409,7 +411,7 @@ static void asix_status(struct usbnet *dev, struct urb *urb)
 			usbnet_defer_kevent (dev, EVENT_LINK_RESET );
 		} else
 			netif_carrier_off(dev->net);
-		devdbg(dev, "Link Status is: %d", link);
+		netdev_dbg(dev->net, "Link Status is: %d\n", link);
 	}
 }
 
@@ -418,7 +420,7 @@ static inline int asix_set_sw_mii(struct usbnet *dev)
 	int ret;
 	ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL);
 	if (ret < 0)
-		deverr(dev, "Failed to enable software MII access");
+		netdev_err(dev->net, "Failed to enable software MII access\n");
 	return ret;
 }
 
@@ -427,7 +429,7 @@ static inline int asix_set_hw_mii(struct usbnet *dev)
 	int ret;
 	ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL);
 	if (ret < 0)
-		deverr(dev, "Failed to enable hardware MII access");
+		netdev_err(dev->net, "Failed to enable hardware MII access\n");
 	return ret;
 }
 
@@ -436,13 +438,14 @@ static inline int asix_get_phy_addr(struct usbnet *dev)
 	u8 buf[2];
 	int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf);
 
-	devdbg(dev, "asix_get_phy_addr()");
+	netdev_dbg(dev->net, "asix_get_phy_addr()\n");
 
 	if (ret < 0) {
-		deverr(dev, "Error reading PHYID register: %02x", ret);
+		netdev_err(dev->net, "Error reading PHYID register: %02x\n", ret);
 		goto out;
 	}
-	devdbg(dev, "asix_get_phy_addr() returning 0x%04x", *((__le16 *)buf));
+	netdev_dbg(dev->net, "asix_get_phy_addr() returning 0x%04x\n",
+		   *((__le16 *)buf));
 	ret = buf[1];
 
 out:
@@ -455,7 +458,7 @@ static int asix_sw_reset(struct usbnet *dev, u8 flags)
 
         ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL);
 	if (ret < 0)
-		deverr(dev,"Failed to send software reset: %02x", ret);
+		netdev_err(dev->net, "Failed to send software reset: %02x\n", ret);
 
 	return ret;
 }
@@ -466,7 +469,7 @@ static u16 asix_read_rx_ctl(struct usbnet *dev)
 	int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v);
 
 	if (ret < 0) {
-		deverr(dev, "Error reading RX_CTL register: %02x", ret);
+		netdev_err(dev->net, "Error reading RX_CTL register: %02x\n", ret);
 		goto out;
 	}
 	ret = le16_to_cpu(v);
@@ -478,11 +481,11 @@ static int asix_write_rx_ctl(struct usbnet *dev, u16 mode)
 {
 	int ret;
 
-	devdbg(dev,"asix_write_rx_ctl() - mode = 0x%04x", mode);
+	netdev_dbg(dev->net, "asix_write_rx_ctl() - mode = 0x%04x\n", mode);
 	ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL);
 	if (ret < 0)
-		deverr(dev, "Failed to write RX_CTL mode to 0x%04x: %02x",
-		       mode, ret);
+		netdev_err(dev->net, "Failed to write RX_CTL mode to 0x%04x: %02x\n",
+			   mode, ret);
 
 	return ret;
 }
@@ -493,7 +496,8 @@ static u16 asix_read_medium_status(struct usbnet *dev)
 	int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, 0, 0, 2, &v);
 
 	if (ret < 0) {
-		deverr(dev, "Error reading Medium Status register: %02x", ret);
+		netdev_err(dev->net, "Error reading Medium Status register: %02x\n",
+			   ret);
 		goto out;
 	}
 	ret = le16_to_cpu(v);
@@ -505,11 +509,11 @@ static int asix_write_medium_mode(struct usbnet *dev, u16 mode)
 {
 	int ret;
 
-	devdbg(dev,"asix_write_medium_mode() - mode = 0x%04x", mode);
+	netdev_dbg(dev->net, "asix_write_medium_mode() - mode = 0x%04x\n", mode);
 	ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, mode, 0, 0, NULL);
 	if (ret < 0)
-		deverr(dev, "Failed to write Medium Mode mode to 0x%04x: %02x",
-			mode, ret);
+		netdev_err(dev->net, "Failed to write Medium Mode mode to 0x%04x: %02x\n",
+			   mode, ret);
 
 	return ret;
 }
@@ -518,11 +522,11 @@ static int asix_write_gpio(struct usbnet *dev, u16 value, int sleep)
 {
 	int ret;
 
-	devdbg(dev,"asix_write_gpio() - value = 0x%04x", value);
+	netdev_dbg(dev->net, "asix_write_gpio() - value = 0x%04x\n", value);
 	ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL);
 	if (ret < 0)
-		deverr(dev, "Failed to write GPIO value 0x%04x: %02x",
-			value, ret);
+		netdev_err(dev->net, "Failed to write GPIO value 0x%04x: %02x\n",
+			   value, ret);
 
 	if (sleep)
 		msleep(sleep);
@@ -588,7 +592,8 @@ static int asix_mdio_read(struct net_device *netdev, int phy_id, int loc)
 	asix_set_hw_mii(dev);
 	mutex_unlock(&dev->phy_mutex);
 
-	devdbg(dev, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x", phy_id, loc, le16_to_cpu(res));
+	netdev_dbg(dev->net, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n",
+		   phy_id, loc, le16_to_cpu(res));
 
 	return le16_to_cpu(res);
 }
@@ -599,7 +604,8 @@ asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val)
 	struct usbnet *dev = netdev_priv(netdev);
 	__le16 res = cpu_to_le16(val);
 
-	devdbg(dev, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x", phy_id, loc, val);
+	netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n",
+		   phy_id, loc, val);
 	mutex_lock(&dev->phy_mutex);
 	asix_set_sw_mii(dev);
 	asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res);
@@ -800,7 +806,8 @@ static int ax88172_link_reset(struct usbnet *dev)
 	if (ecmd.duplex != DUPLEX_FULL)
 		mode |= ~AX88172_MEDIUM_FD;
 
-	devdbg(dev, "ax88172_link_reset() speed: %d duplex: %d setting mode to 0x%04x", ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88172_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
+		   ecmd.speed, ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -902,7 +909,8 @@ static int ax88772_link_reset(struct usbnet *dev)
 	if (ecmd.duplex != DUPLEX_FULL)
 		mode &= ~AX_MEDIUM_FD;
 
-	devdbg(dev, "ax88772_link_reset() speed: %d duplex: %d setting mode to 0x%04x", ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88772_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
+		   ecmd.speed, ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -1059,10 +1067,10 @@ static int marvell_phy_init(struct usbnet *dev)
 	struct asix_data *data = (struct asix_data *)&dev->data;
 	u16 reg;
 
-	devdbg(dev,"marvell_phy_init()");
+	netdev_dbg(dev->net, "marvell_phy_init()\n");
 
 	reg = asix_mdio_read(dev->net, dev->mii.phy_id, MII_MARVELL_STATUS);
-	devdbg(dev,"MII_MARVELL_STATUS = 0x%04x", reg);
+	netdev_dbg(dev->net, "MII_MARVELL_STATUS = 0x%04x\n", reg);
 
 	asix_mdio_write(dev->net, dev->mii.phy_id, MII_MARVELL_CTRL,
 			MARVELL_CTRL_RXDELAY | MARVELL_CTRL_TXDELAY);
@@ -1070,7 +1078,7 @@ static int marvell_phy_init(struct usbnet *dev)
 	if (data->ledmode) {
 		reg = asix_mdio_read(dev->net, dev->mii.phy_id,
 			MII_MARVELL_LED_CTRL);
-		devdbg(dev,"MII_MARVELL_LED_CTRL (1) = 0x%04x", reg);
+		netdev_dbg(dev->net, "MII_MARVELL_LED_CTRL (1) = 0x%04x\n", reg);
 
 		reg &= 0xf8ff;
 		reg |= (1 + 0x0100);
@@ -1079,7 +1087,7 @@ static int marvell_phy_init(struct usbnet *dev)
 
 		reg = asix_mdio_read(dev->net, dev->mii.phy_id,
 			MII_MARVELL_LED_CTRL);
-		devdbg(dev,"MII_MARVELL_LED_CTRL (2) = 0x%04x", reg);
+		netdev_dbg(dev->net, "MII_MARVELL_LED_CTRL (2) = 0x%04x\n", reg);
 		reg &= 0xfc0f;
 	}
 
@@ -1090,7 +1098,7 @@ static int marvell_led_status(struct usbnet *dev, u16 speed)
 {
 	u16 reg = asix_mdio_read(dev->net, dev->mii.phy_id, MARVELL_LED_MANUAL);
 
-	devdbg(dev, "marvell_led_status() read 0x%04x", reg);
+	netdev_dbg(dev->net, "marvell_led_status() read 0x%04x\n", reg);
 
 	/* Clear out the center LED bits - 0x03F0 */
 	reg &= 0xfc0f;
@@ -1106,7 +1114,7 @@ static int marvell_led_status(struct usbnet *dev, u16 speed)
 			reg |= 0x02f0;
 	}
 
-	devdbg(dev, "marvell_led_status() writing 0x%04x", reg);
+	netdev_dbg(dev->net, "marvell_led_status() writing 0x%04x\n", reg);
 	asix_mdio_write(dev->net, dev->mii.phy_id, MARVELL_LED_MANUAL, reg);
 
 	return 0;
@@ -1118,7 +1126,7 @@ static int ax88178_link_reset(struct usbnet *dev)
 	struct ethtool_cmd ecmd;
 	struct asix_data *data = (struct asix_data *)&dev->data;
 
-	devdbg(dev,"ax88178_link_reset()");
+	netdev_dbg(dev->net, "ax88178_link_reset()\n");
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
@@ -1138,7 +1146,8 @@ static int ax88178_link_reset(struct usbnet *dev)
 	else
 		mode &= ~AX_MEDIUM_FD;
 
-	devdbg(dev, "ax88178_link_reset() speed: %d duplex: %d setting mode to 0x%04x", ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88178_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
+		   ecmd.speed, ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -1188,7 +1197,7 @@ static int ax88178_change_mtu(struct net_device *net, int new_mtu)
 	struct usbnet *dev = netdev_priv(net);
 	int ll_mtu = new_mtu + net->hard_header_len + 4;
 
-	devdbg(dev, "ax88178_change_mtu() new_mtu=%d", new_mtu);
+	netdev_dbg(dev->net, "ax88178_change_mtu() new_mtu=%d\n", new_mtu);
 
 	if (new_mtu <= 0 || ll_mtu > 16384)
 		return -EINVAL;
diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
index c337ffc3304a..a4a85a6ed86d 100644
--- a/drivers/net/usb/cdc_eem.c
+++ b/drivers/net/usb/cdc_eem.c
@@ -73,7 +73,7 @@ static void eem_linkcmd(struct usbnet *dev, struct sk_buff *skb)
 		usb_free_urb(urb);
 fail:
 		dev_kfree_skb(skb);
-		devwarn(dev, "link cmd failure\n");
+		netdev_warn(dev->net, "link cmd failure\n");
 		return;
 	}
 }
@@ -212,7 +212,8 @@ static int eem_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 			 * b15:		1 (EEM command)
 			 */
 			if (header & BIT(14)) {
-				devdbg(dev, "reserved command %04x\n", header);
+				netdev_dbg(dev->net, "reserved command %04x\n",
+					   header);
 				continue;
 			}
 
@@ -255,8 +256,9 @@ static int eem_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 			case 1:		/* Echo response */
 			case 5:		/* Tickle */
 			default:	/* reserved */
-				devwarn(dev, "unexpected link command %d\n",
-						bmEEMCmd);
+				netdev_warn(dev->net,
+					    "unexpected link command %d\n",
+					    bmEEMCmd);
 				continue;
 			}
 
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 4f27f022fbf7..7e5a75269cc5 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -340,9 +340,9 @@ EXPORT_SYMBOL_GPL(usbnet_cdc_unbind);
 static void dumpspeed(struct usbnet *dev, __le32 *speeds)
 {
 	if (netif_msg_timer(dev))
-		devinfo(dev, "link speeds: %u kbps up, %u kbps down",
-			__le32_to_cpu(speeds[0]) / 1000,
-		__le32_to_cpu(speeds[1]) / 1000);
+		netdev_info(dev->net, "link speeds: %u kbps up, %u kbps down\n",
+			    __le32_to_cpu(speeds[0]) / 1000,
+			    __le32_to_cpu(speeds[1]) / 1000);
 }
 
 static void cdc_status(struct usbnet *dev, struct urb *urb)
@@ -362,8 +362,8 @@ static void cdc_status(struct usbnet *dev, struct urb *urb)
 	switch (event->bNotificationType) {
 	case USB_CDC_NOTIFY_NETWORK_CONNECTION:
 		if (netif_msg_timer(dev))
-			devdbg(dev, "CDC: carrier %s",
-					event->wValue ? "on" : "off");
+			netdev_dbg(dev->net, "CDC: carrier %s\n",
+				   event->wValue ? "on" : "off");
 		if (event->wValue)
 			netif_carrier_on(dev->net);
 		else
@@ -371,8 +371,8 @@ static void cdc_status(struct usbnet *dev, struct urb *urb)
 		break;
 	case USB_CDC_NOTIFY_SPEED_CHANGE:	/* tx/rx rates */
 		if (netif_msg_timer(dev))
-			devdbg(dev, "CDC: speed change (len %d)",
-					urb->actual_length);
+			netdev_dbg(dev->net, "CDC: speed change (len %d)\n",
+				   urb->actual_length);
 		if (urb->actual_length != (sizeof *event + 8))
 			set_bit(EVENT_STS_SPLIT, &dev->flags);
 		else
@@ -382,8 +382,8 @@ static void cdc_status(struct usbnet *dev, struct urb *urb)
 	 * but there are no standard formats for the response data.
 	 */
 	default:
-		deverr(dev, "CDC: unexpected notification %02x!",
-				 event->bNotificationType);
+		netdev_err(dev->net, "CDC: unexpected notification %02x!\n",
+			   event->bNotificationType);
 		break;
 	}
 }
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index c820fec62041..269339769f47 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -58,7 +58,7 @@ static int dm_read(struct usbnet *dev, u8 reg, u16 length, void *data)
 	void *buf;
 	int err = -ENOMEM;
 
-	devdbg(dev, "dm_read() reg=0x%02x length=%d", reg, length);
+	netdev_dbg(dev->net, "dm_read() reg=0x%02x length=%d\n", reg, length);
 
 	buf = kmalloc(length, GFP_KERNEL);
 	if (!buf)
@@ -89,7 +89,7 @@ static int dm_write(struct usbnet *dev, u8 reg, u16 length, void *data)
 	void *buf = NULL;
 	int err = -ENOMEM;
 
-	devdbg(dev, "dm_write() reg=0x%02x, length=%d", reg, length);
+	netdev_dbg(dev->net, "dm_write() reg=0x%02x, length=%d\n", reg, length);
 
 	if (data) {
 		buf = kmalloc(length, GFP_KERNEL);
@@ -112,7 +112,8 @@ static int dm_write(struct usbnet *dev, u8 reg, u16 length, void *data)
 
 static int dm_write_reg(struct usbnet *dev, u8 reg, u8 value)
 {
-	devdbg(dev, "dm_write_reg() reg=0x%02x, value=0x%02x", reg, value);
+	netdev_dbg(dev->net, "dm_write_reg() reg=0x%02x, value=0x%02x\n",
+		   reg, value);
 	return usb_control_msg(dev->udev,
 			       usb_sndctrlpipe(dev->udev, 0),
 			       DM_WRITE_REG,
@@ -142,13 +143,13 @@ static void dm_write_async_helper(struct usbnet *dev, u8 reg, u8 value,
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
-		deverr(dev, "Error allocating URB in dm_write_async_helper!");
+		netdev_err(dev->net, "Error allocating URB in dm_write_async_helper!\n");
 		return;
 	}
 
 	req = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC);
 	if (!req) {
-		deverr(dev, "Failed to allocate memory for control request");
+		netdev_err(dev->net, "Failed to allocate memory for control request\n");
 		usb_free_urb(urb);
 		return;
 	}
@@ -166,8 +167,8 @@ static void dm_write_async_helper(struct usbnet *dev, u8 reg, u8 value,
 
 	status = usb_submit_urb(urb, GFP_ATOMIC);
 	if (status < 0) {
-		deverr(dev, "Error submitting the control message: status=%d",
-		       status);
+		netdev_err(dev->net, "Error submitting the control message: status=%d\n",
+			   status);
 		kfree(req);
 		usb_free_urb(urb);
 	}
@@ -175,15 +176,15 @@ static void dm_write_async_helper(struct usbnet *dev, u8 reg, u8 value,
 
 static void dm_write_async(struct usbnet *dev, u8 reg, u16 length, void *data)
 {
-	devdbg(dev, "dm_write_async() reg=0x%02x length=%d", reg, length);
+	netdev_dbg(dev->net, "dm_write_async() reg=0x%02x length=%d\n", reg, length);
 
 	dm_write_async_helper(dev, reg, 0, length, data);
 }
 
 static void dm_write_reg_async(struct usbnet *dev, u8 reg, u8 value)
 {
-	devdbg(dev, "dm_write_reg_async() reg=0x%02x value=0x%02x",
-	       reg, value);
+	netdev_dbg(dev->net, "dm_write_reg_async() reg=0x%02x value=0x%02x\n",
+		   reg, value);
 
 	dm_write_async_helper(dev, reg, value, 0, NULL);
 }
@@ -211,7 +212,7 @@ static int dm_read_shared_word(struct usbnet *dev, int phy, u8 reg, __le16 *valu
 	}
 
 	if (i == DM_TIMEOUT) {
-		deverr(dev, "%s read timed out!", phy ? "phy" : "eeprom");
+		netdev_err(dev->net, "%s read timed out!\n", phy ? "phy" : "eeprom");
 		ret = -EIO;
 		goto out;
 	}
@@ -219,8 +220,8 @@ static int dm_read_shared_word(struct usbnet *dev, int phy, u8 reg, __le16 *valu
 	dm_write_reg(dev, DM_SHARED_CTRL, 0x0);
 	ret = dm_read(dev, DM_SHARED_DATA, 2, value);
 
-	devdbg(dev, "read shared %d 0x%02x returned 0x%04x, %d",
-	       phy, reg, *value, ret);
+	netdev_dbg(dev->net, "read shared %d 0x%02x returned 0x%04x, %d\n",
+		   phy, reg, *value, ret);
 
  out:
 	mutex_unlock(&dev->phy_mutex);
@@ -254,7 +255,7 @@ static int dm_write_shared_word(struct usbnet *dev, int phy, u8 reg, __le16 valu
 	}
 
 	if (i == DM_TIMEOUT) {
-		deverr(dev, "%s write timed out!", phy ? "phy" : "eeprom");
+		netdev_err(dev->net, "%s write timed out!\n", phy ? "phy" : "eeprom");
 		ret = -EIO;
 		goto out;
 	}
@@ -304,15 +305,15 @@ static int dm9601_mdio_read(struct net_device *netdev, int phy_id, int loc)
 	__le16 res;
 
 	if (phy_id) {
-		devdbg(dev, "Only internal phy supported");
+		netdev_dbg(dev->net, "Only internal phy supported\n");
 		return 0;
 	}
 
 	dm_read_shared_word(dev, 1, loc, &res);
 
-	devdbg(dev,
-	       "dm9601_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x",
-	       phy_id, loc, le16_to_cpu(res));
+	netdev_dbg(dev->net,
+		   "dm9601_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n",
+		   phy_id, loc, le16_to_cpu(res));
 
 	return le16_to_cpu(res);
 }
@@ -324,12 +325,12 @@ static void dm9601_mdio_write(struct net_device *netdev, int phy_id, int loc,
 	__le16 res = cpu_to_le16(val);
 
 	if (phy_id) {
-		devdbg(dev, "Only internal phy supported");
+		netdev_dbg(dev->net, "Only internal phy supported\n");
 		return;
 	}
 
-	devdbg(dev,"dm9601_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x",
-	       phy_id, loc, val);
+	netdev_dbg(dev->net, "dm9601_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n",
+		   phy_id, loc, val);
 
 	dm_write_shared_word(dev, 1, loc, res);
 }
@@ -592,7 +593,7 @@ static void dm9601_status(struct usbnet *dev, struct urb *urb)
 		}
 		else
 			netif_carrier_off(dev->net);
-		devdbg(dev, "Link Status is: %d", link);
+		netdev_dbg(dev->net, "Link Status is: %d\n", link);
 	}
 }
 
@@ -603,8 +604,8 @@ static int dm9601_link_reset(struct usbnet *dev)
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 
-	devdbg(dev, "link_reset() speed: %d duplex: %d",
-	       ecmd.speed, ecmd.duplex);
+	netdev_dbg(dev->net, "link_reset() speed: %d duplex: %d\n",
+		   ecmd.speed, ecmd.duplex);
 
 	return 0;
 }
diff --git a/drivers/net/usb/int51x1.c b/drivers/net/usb/int51x1.c
index 9ab5c1983a7d..3c228df57062 100644
--- a/drivers/net/usb/int51x1.c
+++ b/drivers/net/usb/int51x1.c
@@ -51,7 +51,7 @@ static int int51x1_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	int len;
 
 	if (!(pskb_may_pull(skb, INT51X1_HEADER_SIZE))) {
-		deverr(dev, "unexpected tiny rx frame");
+		netdev_err(dev->net, "unexpected tiny rx frame\n");
 		return 0;
 	}
 
@@ -138,25 +138,25 @@ static void int51x1_set_multicast(struct net_device *netdev)
 	if (netdev->flags & IFF_PROMISC) {
 		/* do not expect to see traffic of other PLCs */
 		filter |= PACKET_TYPE_PROMISCUOUS;
-		devinfo(dev, "promiscuous mode enabled");
+		netdev_info(dev->net, "promiscuous mode enabled\n");
 	} else if (!netdev_mc_empty(netdev) ||
 		  (netdev->flags & IFF_ALLMULTI)) {
 		filter |= PACKET_TYPE_ALL_MULTICAST;
-		devdbg(dev, "receive all multicast enabled");
+		netdev_dbg(dev->net, "receive all multicast enabled\n");
 	} else {
 		/* ~PROMISCUOUS, ~MULTICAST */
-		devdbg(dev, "receive own packets only");
+		netdev_dbg(dev->net, "receive own packets only\n");
 	}
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
-		devwarn(dev, "Error allocating URB");
+		netdev_warn(dev->net, "Error allocating URB\n");
 		return;
 	}
 
 	req = kmalloc(sizeof(*req), GFP_ATOMIC);
 	if (!req) {
-		devwarn(dev, "Error allocating control msg");
+		netdev_warn(dev->net, "Error allocating control msg\n");
 		goto out;
 	}
 
@@ -173,7 +173,8 @@ static void int51x1_set_multicast(struct net_device *netdev)
 
 	status = usb_submit_urb(urb, GFP_ATOMIC);
 	if (status < 0) {
-		devwarn(dev, "Error submitting control msg, sts=%d", status);
+		netdev_warn(dev->net, "Error submitting control msg, sts=%d\n",
+			    status);
 		goto out1;
 	}
 	return;
diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c
index aeb1ab03a9ee..f6994053c101 100644
--- a/drivers/net/usb/net1080.c
+++ b/drivers/net/usb/net1080.c
@@ -205,23 +205,24 @@ static inline void nc_dump_usbctl(struct usbnet *dev, u16 usbctl)
 {
 	if (!netif_msg_link(dev))
 		return;
-	devdbg(dev, "net1080 %s-%s usbctl 0x%x:%s%s%s%s%s;"
-			" this%s%s;"
-			" other%s%s; r/o 0x%x",
-		dev->udev->bus->bus_name, dev->udev->devpath,
-		usbctl,
-		(usbctl & USBCTL_ENABLE_LANG) ? " lang" : "",
-		(usbctl & USBCTL_ENABLE_MFGR) ? " mfgr" : "",
-		(usbctl & USBCTL_ENABLE_PROD) ? " prod" : "",
-		(usbctl & USBCTL_ENABLE_SERIAL) ? " serial" : "",
-		(usbctl & USBCTL_ENABLE_DEFAULTS) ? " defaults" : "",
-
-		(usbctl & USBCTL_FLUSH_OTHER) ? " FLUSH" : "",
-		(usbctl & USBCTL_DISCONN_OTHER) ? " DIS" : "",
-		(usbctl & USBCTL_FLUSH_THIS) ? " FLUSH" : "",
-		(usbctl & USBCTL_DISCONN_THIS) ? " DIS" : "",
-		usbctl & ~USBCTL_WRITABLE_MASK
-		);
+	netdev_dbg(dev->net, "net1080 %s-%s usbctl 0x%x:%s%s%s%s%s;"
+		   " this%s%s;"
+		   " other%s%s; r/o 0x%x\n",
+		   dev->udev->bus->bus_name, dev->udev->devpath,
+		   usbctl,
+		   (usbctl & USBCTL_ENABLE_LANG) ? " lang" : "",
+		   (usbctl & USBCTL_ENABLE_MFGR) ? " mfgr" : "",
+		   (usbctl & USBCTL_ENABLE_PROD) ? " prod" : "",
+		   (usbctl & USBCTL_ENABLE_SERIAL) ? " serial" : "",
+		   (usbctl & USBCTL_ENABLE_DEFAULTS) ? " defaults" : "",
+
+		   (usbctl & USBCTL_FLUSH_THIS) ? " FLUSH" : "",
+		   (usbctl & USBCTL_DISCONN_THIS) ? " DIS" : "",
+
+		   (usbctl & USBCTL_FLUSH_OTHER) ? " FLUSH" : "",
+		   (usbctl & USBCTL_DISCONN_OTHER) ? " DIS" : "",
+
+		   usbctl & ~USBCTL_WRITABLE_MASK);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -250,28 +251,25 @@ static inline void nc_dump_status(struct usbnet *dev, u16 status)
 {
 	if (!netif_msg_link(dev))
 		return;
-	devdbg(dev, "net1080 %s-%s status 0x%x:"
-			" this (%c) PKT=%d%s%s%s;"
-			" other PKT=%d%s%s%s; unspec 0x%x",
-		dev->udev->bus->bus_name, dev->udev->devpath,
-		status,
-
-		// XXX the packet counts don't seem right
-		// (1 at reset, not 0); maybe UNSPEC too
-
-		(status & STATUS_PORT_A) ? 'A' : 'B',
-		STATUS_PACKETS_THIS(status),
-		(status & STATUS_CONN_THIS) ? " CON" : "",
-		(status & STATUS_SUSPEND_THIS) ? " SUS" : "",
-		(status & STATUS_MAILBOX_THIS) ? " MBOX" : "",
-
-		STATUS_PACKETS_OTHER(status),
-		(status & STATUS_CONN_OTHER) ? " CON" : "",
-		(status & STATUS_SUSPEND_OTHER) ? " SUS" : "",
-		(status & STATUS_MAILBOX_OTHER) ? " MBOX" : "",
-
-		status & STATUS_UNSPEC_MASK
-		);
+	netdev_dbg(dev->net, "net1080 %s-%s status 0x%x: this (%c) PKT=%d%s%s%s; other PKT=%d%s%s%s; unspec 0x%x\n",
+		   dev->udev->bus->bus_name, dev->udev->devpath,
+		   status,
+
+		   // XXX the packet counts don't seem right
+		   // (1 at reset, not 0); maybe UNSPEC too
+
+		   (status & STATUS_PORT_A) ? 'A' : 'B',
+		   STATUS_PACKETS_THIS(status),
+		   (status & STATUS_CONN_THIS) ? " CON" : "",
+		   (status & STATUS_SUSPEND_THIS) ? " SUS" : "",
+		   (status & STATUS_MAILBOX_THIS) ? " MBOX" : "",
+
+		   STATUS_PACKETS_OTHER(status),
+		   (status & STATUS_CONN_OTHER) ? " CON" : "",
+		   (status & STATUS_SUSPEND_OTHER) ? " SUS" : "",
+		   (status & STATUS_MAILBOX_OTHER) ? " MBOX" : "",
+
+		   status & STATUS_UNSPEC_MASK);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -287,9 +285,9 @@ static inline void nc_dump_status(struct usbnet *dev, u16 status)
 static inline void nc_dump_ttl(struct usbnet *dev, u16 ttl)
 {
 	if (netif_msg_link(dev))
-		devdbg(dev, "net1080 %s-%s ttl 0x%x this = %d, other = %d",
-			dev->udev->bus->bus_name, dev->udev->devpath,
-			ttl, TTL_THIS(ttl), TTL_OTHER(ttl));
+		netdev_dbg(dev->net, "net1080 %s-%s ttl 0x%x this = %d, other = %d\n",
+			   dev->udev->bus->bus_name, dev->udev->devpath,
+			   ttl, TTL_THIS(ttl), TTL_OTHER(ttl));
 }
 
 /*-------------------------------------------------------------------------*/
@@ -335,10 +333,9 @@ static int net1080_reset(struct usbnet *dev)
 	dbg("%s: assigned TTL, %d ms", dev->net->name, NC_READ_TTL_MS);
 
 	if (netif_msg_link(dev))
-		devinfo(dev, "port %c, peer %sconnected",
-			(status & STATUS_PORT_A) ? 'A' : 'B',
-			(status & STATUS_CONN_OTHER) ? "" : "dis"
-			);
+		netdev_info(dev->net, "port %c, peer %sconnected\n",
+			    (status & STATUS_PORT_A) ? 'A' : 'B',
+			    (status & STATUS_CONN_OTHER) ? "" : "dis");
 	retval = 0;
 
 done:
@@ -416,7 +413,7 @@ static void nc_ensure_sync(struct usbnet *dev)
 		}
 
 		if (netif_msg_rx_err(dev))
-			devdbg(dev, "flush net1080; too many framing errors");
+			netdev_dbg(dev->net, "flush net1080; too many framing errors\n");
 		dev->frame_errors = 0;
 	}
 }
@@ -486,8 +483,8 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		return 0;
 	}
 #if 0
-	devdbg(dev, "frame <rx h %d p %d id %d", header->hdr_len,
-		header->packet_len, header->packet_id);
+	netdev_dbg(dev->net, "frame <rx h %d p %d id %d\n", header->hdr_len,
+		   header->packet_len, header->packet_id);
 #endif
 	dev->frame_errors = 0;
 	return 1;
@@ -547,9 +544,9 @@ encapsulate:
 	trailer = (struct nc_trailer *) skb_put(skb, sizeof *trailer);
 	put_unaligned(header->packet_id, &trailer->packet_id);
 #if 0
-	devdbg(dev, "frame >tx h %d p %d id %d",
-		header->hdr_len, header->packet_len,
-		header->packet_id);
+	netdev_dbg(dev->net, "frame >tx h %d p %d id %d\n",
+		   header->hdr_len, header->packet_len,
+		   header->packet_id);
 #endif
 	return skb;
 }
diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index 490fa8f55424..f01f02401856 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -57,8 +57,8 @@
  */
 void rndis_status(struct usbnet *dev, struct urb *urb)
 {
-	devdbg(dev, "rndis status urb, len %d stat %d",
-		urb->actual_length, urb->status);
+	netdev_dbg(dev->net, "rndis status urb, len %d stat %d\n",
+		   urb->actual_length, urb->status);
 	// FIXME for keepalives, respond immediately (asynchronously)
 	// if not an RNDIS status, do like cdc_status(dev,urb) does
 }
@@ -497,9 +497,9 @@ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 			     skb->len < msg_len ||
 			     (data_offset + data_len + 8) > msg_len)) {
 			dev->net->stats.rx_frame_errors++;
-			devdbg(dev, "bad rndis message %d/%d/%d/%d, len %d",
-				le32_to_cpu(hdr->msg_type),
-				msg_len, data_offset, data_len, skb->len);
+			netdev_dbg(dev->net, "bad rndis message %d/%d/%d/%d, len %d\n",
+				   le32_to_cpu(hdr->msg_type),
+				   msg_len, data_offset, data_len, skb->len);
 			return 0;
 		}
 		skb_pull(skb, 8 + data_offset);
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 48555d0e374d..d4cbcefbff38 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -78,7 +78,7 @@ static int smsc95xx_read_reg(struct usbnet *dev, u32 index, u32 *data)
 		00, index, buf, 4, USB_CTRL_GET_TIMEOUT);
 
 	if (unlikely(ret < 0))
-		devwarn(dev, "Failed to read register index 0x%08x", index);
+		netdev_warn(dev->net, "Failed to read register index 0x%08x\n", index);
 
 	le32_to_cpus(buf);
 	*data = *buf;
@@ -106,7 +106,7 @@ static int smsc95xx_write_reg(struct usbnet *dev, u32 index, u32 data)
 		00, index, buf, 4, USB_CTRL_SET_TIMEOUT);
 
 	if (unlikely(ret < 0))
-		devwarn(dev, "Failed to write register index 0x%08x", index);
+		netdev_warn(dev->net, "Failed to write register index 0x%08x\n", index);
 
 	kfree(buf);
 
@@ -138,7 +138,7 @@ static int smsc95xx_mdio_read(struct net_device *netdev, int phy_id, int idx)
 
 	/* confirm MII not busy */
 	if (smsc95xx_phy_wait_not_busy(dev)) {
-		devwarn(dev, "MII is busy in smsc95xx_mdio_read");
+		netdev_warn(dev->net, "MII is busy in smsc95xx_mdio_read\n");
 		mutex_unlock(&dev->phy_mutex);
 		return -EIO;
 	}
@@ -150,7 +150,7 @@ static int smsc95xx_mdio_read(struct net_device *netdev, int phy_id, int idx)
 	smsc95xx_write_reg(dev, MII_ADDR, addr);
 
 	if (smsc95xx_phy_wait_not_busy(dev)) {
-		devwarn(dev, "Timed out reading MII reg %02X", idx);
+		netdev_warn(dev->net, "Timed out reading MII reg %02X\n", idx);
 		mutex_unlock(&dev->phy_mutex);
 		return -EIO;
 	}
@@ -172,7 +172,7 @@ static void smsc95xx_mdio_write(struct net_device *netdev, int phy_id, int idx,
 
 	/* confirm MII not busy */
 	if (smsc95xx_phy_wait_not_busy(dev)) {
-		devwarn(dev, "MII is busy in smsc95xx_mdio_write");
+		netdev_warn(dev->net, "MII is busy in smsc95xx_mdio_write\n");
 		mutex_unlock(&dev->phy_mutex);
 		return;
 	}
@@ -187,7 +187,7 @@ static void smsc95xx_mdio_write(struct net_device *netdev, int phy_id, int idx,
 	smsc95xx_write_reg(dev, MII_ADDR, addr);
 
 	if (smsc95xx_phy_wait_not_busy(dev))
-		devwarn(dev, "Timed out writing MII reg %02X", idx);
+		netdev_warn(dev->net, "Timed out writing MII reg %02X\n", idx);
 
 	mutex_unlock(&dev->phy_mutex);
 }
@@ -205,7 +205,7 @@ static int smsc95xx_wait_eeprom(struct usbnet *dev)
 	} while (!time_after(jiffies, start_time + HZ));
 
 	if (val & (E2P_CMD_TIMEOUT_ | E2P_CMD_BUSY_)) {
-		devwarn(dev, "EEPROM read operation timeout");
+		netdev_warn(dev->net, "EEPROM read operation timeout\n");
 		return -EIO;
 	}
 
@@ -226,7 +226,7 @@ static int smsc95xx_eeprom_confirm_not_busy(struct usbnet *dev)
 		udelay(40);
 	} while (!time_after(jiffies, start_time + HZ));
 
-	devwarn(dev, "EEPROM is busy");
+	netdev_warn(dev->net, "EEPROM is busy\n");
 	return -EIO;
 }
 
@@ -308,7 +308,7 @@ static void smsc95xx_async_cmd_callback(struct urb *urb)
 	int status = urb->status;
 
 	if (status < 0)
-		devwarn(dev, "async callback failed with %d", status);
+		netdev_warn(dev->net, "async callback failed with %d\n", status);
 
 	kfree(usb_context);
 	usb_free_urb(urb);
@@ -323,13 +323,13 @@ static int smsc95xx_write_reg_async(struct usbnet *dev, u16 index, u32 *data)
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
-		devwarn(dev, "Error allocating URB");
+		netdev_warn(dev->net, "Error allocating URB\n");
 		return -ENOMEM;
 	}
 
 	usb_context = kmalloc(sizeof(struct usb_context), GFP_ATOMIC);
 	if (usb_context == NULL) {
-		devwarn(dev, "Error allocating control msg");
+		netdev_warn(dev->net, "Error allocating control msg\n");
 		usb_free_urb(urb);
 		return -ENOMEM;
 	}
@@ -348,7 +348,8 @@ static int smsc95xx_write_reg_async(struct usbnet *dev, u16 index, u32 *data)
 
 	status = usb_submit_urb(urb, GFP_ATOMIC);
 	if (status < 0) {
-		devwarn(dev, "Error submitting control msg, sts=%d", status);
+		netdev_warn(dev->net, "Error submitting control msg, sts=%d\n",
+			    status);
 		kfree(usb_context);
 		usb_free_urb(urb);
 	}
@@ -376,12 +377,12 @@ static void smsc95xx_set_multicast(struct net_device *netdev)
 
 	if (dev->net->flags & IFF_PROMISC) {
 		if (netif_msg_drv(dev))
-			devdbg(dev, "promiscuous mode enabled");
+			netdev_dbg(dev->net, "promiscuous mode enabled\n");
 		pdata->mac_cr |= MAC_CR_PRMS_;
 		pdata->mac_cr &= ~(MAC_CR_MCPAS_ | MAC_CR_HPFILT_);
 	} else if (dev->net->flags & IFF_ALLMULTI) {
 		if (netif_msg_drv(dev))
-			devdbg(dev, "receive all multicast enabled");
+			netdev_dbg(dev->net, "receive all multicast enabled\n");
 		pdata->mac_cr |= MAC_CR_MCPAS_;
 		pdata->mac_cr &= ~(MAC_CR_PRMS_ | MAC_CR_HPFILT_);
 	} else if (!netdev_mc_empty(dev->net)) {
@@ -401,20 +402,20 @@ static void smsc95xx_set_multicast(struct net_device *netdev)
 				else
 					hash_lo |= mask;
 			} else {
-				devwarn(dev, "dmi_addrlen != 6");
+				netdev_warn(dev->net, "dmi_addrlen != 6\n");
 			}
 			mc_list = mc_list->next;
 		}
 
 		if (count != ((u32) netdev_mc_count(dev->net)))
-			devwarn(dev, "mc_count != dev->mc_count");
+			netdev_warn(dev->net, "mc_count != dev->mc_count\n");
 
 		if (netif_msg_drv(dev))
-			devdbg(dev, "HASHH=0x%08X, HASHL=0x%08X", hash_hi,
-				hash_lo);
+			netdev_dbg(dev->net, "HASHH=0x%08X, HASHL=0x%08X\n",
+				   hash_hi, hash_lo);
 	} else {
 		if (netif_msg_drv(dev))
-			devdbg(dev, "receive own packets only");
+			netdev_dbg(dev->net, "receive own packets only\n");
 		pdata->mac_cr &=
 			~(MAC_CR_PRMS_ | MAC_CR_MCPAS_ | MAC_CR_HPFILT_);
 	}
@@ -434,7 +435,7 @@ static void smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex,
 
 	int ret = smsc95xx_read_reg(dev, AFC_CFG, &afc_cfg);
 	if (ret < 0) {
-		devwarn(dev, "error reading AFC_CFG");
+		netdev_warn(dev->net, "error reading AFC_CFG\n");
 		return;
 	}
 
@@ -452,12 +453,12 @@ static void smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex,
 			afc_cfg &= ~0xF;
 
 		if (netif_msg_link(dev))
-			devdbg(dev, "rx pause %s, tx pause %s",
-				(cap & FLOW_CTRL_RX ? "enabled" : "disabled"),
-				(cap & FLOW_CTRL_TX ? "enabled" : "disabled"));
+			netdev_dbg(dev->net, "rx pause %s, tx pause %s\n",
+				   cap & FLOW_CTRL_RX ? "enabled" : "disabled",
+				   cap & FLOW_CTRL_TX ? "enabled" : "disabled");
 	} else {
 		if (netif_msg_link(dev))
-			devdbg(dev, "half duplex");
+			netdev_dbg(dev->net, "half duplex\n");
 		flow = 0;
 		afc_cfg |= 0xF;
 	}
@@ -486,8 +487,8 @@ static int smsc95xx_link_reset(struct usbnet *dev)
 	rmtadv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
 
 	if (netif_msg_link(dev))
-		devdbg(dev, "speed: %d duplex: %d lcladv: %04x rmtadv: %04x",
-			ecmd.speed, ecmd.duplex, lcladv, rmtadv);
+		netdev_dbg(dev->net, "speed: %d duplex: %d lcladv: %04x rmtadv: %04x\n",
+			   ecmd.speed, ecmd.duplex, lcladv, rmtadv);
 
 	spin_lock_irqsave(&pdata->mac_cr_lock, flags);
 	if (ecmd.duplex != DUPLEX_FULL) {
@@ -511,7 +512,8 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
 	u32 intdata;
 
 	if (urb->actual_length != 4) {
-		devwarn(dev, "unexpected urb length %d", urb->actual_length);
+		netdev_warn(dev->net, "unexpected urb length %d\n",
+			    urb->actual_length);
 		return;
 	}
 
@@ -519,12 +521,13 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
 	le32_to_cpus(&intdata);
 
 	if (netif_msg_link(dev))
-		devdbg(dev, "intdata: 0x%08X", intdata);
+		netdev_dbg(dev->net, "intdata: 0x%08X\n", intdata);
 
 	if (intdata & INT_ENP_PHY_INT_)
 		usbnet_defer_kevent(dev, EVENT_LINK_RESET);
 	else
-		devwarn(dev, "unexpected interrupt, intdata=0x%08X", intdata);
+		netdev_warn(dev->net, "unexpected interrupt, intdata=0x%08X\n",
+			    intdata);
 }
 
 /* Enable or disable Tx & Rx checksum offload engines */
@@ -534,7 +537,7 @@ static int smsc95xx_set_csums(struct usbnet *dev)
 	u32 read_buf;
 	int ret = smsc95xx_read_reg(dev, COE_CR, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read COE_CR: %d", ret);
+		netdev_warn(dev->net, "Failed to read COE_CR: %d\n", ret);
 		return ret;
 	}
 
@@ -550,12 +553,12 @@ static int smsc95xx_set_csums(struct usbnet *dev)
 
 	ret = smsc95xx_write_reg(dev, COE_CR, read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write COE_CR: %d", ret);
+		netdev_warn(dev->net, "Failed to write COE_CR: %d\n", ret);
 		return ret;
 	}
 
 	if (netif_msg_hw(dev))
-		devdbg(dev, "COE_CR = 0x%08x", read_buf);
+		netdev_dbg(dev->net, "COE_CR = 0x%08x\n", read_buf);
 	return 0;
 }
 
@@ -580,8 +583,8 @@ static int smsc95xx_ethtool_set_eeprom(struct net_device *netdev,
 	struct usbnet *dev = netdev_priv(netdev);
 
 	if (ee->magic != LAN95XX_EEPROM_MAGIC) {
-		devwarn(dev, "EEPROM: magic value mismatch, magic = 0x%x",
-			ee->magic);
+		netdev_warn(dev->net, "EEPROM: magic value mismatch, magic = 0x%x\n",
+			    ee->magic);
 		return -EINVAL;
 	}
 
@@ -660,7 +663,7 @@ static void smsc95xx_init_mac_address(struct usbnet *dev)
 		if (is_valid_ether_addr(dev->net->dev_addr)) {
 			/* eeprom values are valid so use them */
 			if (netif_msg_ifup(dev))
-				devdbg(dev, "MAC address read from EEPROM");
+				netdev_dbg(dev->net, "MAC address read from EEPROM\n");
 			return;
 		}
 	}
@@ -668,7 +671,7 @@ static void smsc95xx_init_mac_address(struct usbnet *dev)
 	/* no eeprom, or eeprom values are invalid. generate random MAC */
 	random_ether_addr(dev->net->dev_addr);
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "MAC address set to random_ether_addr");
+		netdev_dbg(dev->net, "MAC address set to random_ether_addr\n");
 }
 
 static int smsc95xx_set_mac_address(struct usbnet *dev)
@@ -680,13 +683,13 @@ static int smsc95xx_set_mac_address(struct usbnet *dev)
 
 	ret = smsc95xx_write_reg(dev, ADDRL, addr_lo);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write ADDRL: %d", ret);
+		netdev_warn(dev->net, "Failed to write ADDRL: %d\n", ret);
 		return ret;
 	}
 
 	ret = smsc95xx_write_reg(dev, ADDRH, addr_hi);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write ADDRH: %d", ret);
+		netdev_warn(dev->net, "Failed to write ADDRH: %d\n", ret);
 		return ret;
 	}
 
@@ -748,7 +751,7 @@ static int smsc95xx_phy_initialize(struct usbnet *dev)
 	mii_nway_restart(&dev->mii);
 
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "phy initialised successfully");
+		netdev_dbg(dev->net, "phy initialised successfully\n");
 	return 0;
 }
 
@@ -760,13 +763,13 @@ static int smsc95xx_reset(struct usbnet *dev)
 	int ret = 0, timeout;
 
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "entering smsc95xx_reset");
+		netdev_dbg(dev->net, "entering smsc95xx_reset\n");
 
 	write_buf = HW_CFG_LRST_;
 	ret = smsc95xx_write_reg(dev, HW_CFG, write_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write HW_CFG_LRST_ bit in HW_CFG "
-			"register, ret = %d", ret);
+		netdev_warn(dev->net, "Failed to write HW_CFG_LRST_ bit in HW_CFG register, ret = %d\n",
+			    ret);
 		return ret;
 	}
 
@@ -774,7 +777,7 @@ static int smsc95xx_reset(struct usbnet *dev)
 	do {
 		ret = smsc95xx_read_reg(dev, HW_CFG, &read_buf);
 		if (ret < 0) {
-			devwarn(dev, "Failed to read HW_CFG: %d", ret);
+			netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret);
 			return ret;
 		}
 		msleep(10);
@@ -782,14 +785,14 @@ static int smsc95xx_reset(struct usbnet *dev)
 	} while ((read_buf & HW_CFG_LRST_) && (timeout < 100));
 
 	if (timeout >= 100) {
-		devwarn(dev, "timeout waiting for completion of Lite Reset");
+		netdev_warn(dev->net, "timeout waiting for completion of Lite Reset\n");
 		return ret;
 	}
 
 	write_buf = PM_CTL_PHY_RST_;
 	ret = smsc95xx_write_reg(dev, PM_CTRL, write_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write PM_CTRL: %d", ret);
+		netdev_warn(dev->net, "Failed to write PM_CTRL: %d\n", ret);
 		return ret;
 	}
 
@@ -797,7 +800,7 @@ static int smsc95xx_reset(struct usbnet *dev)
 	do {
 		ret = smsc95xx_read_reg(dev, PM_CTRL, &read_buf);
 		if (ret < 0) {
-			devwarn(dev, "Failed to read PM_CTRL: %d", ret);
+			netdev_warn(dev->net, "Failed to read PM_CTRL: %d\n", ret);
 			return ret;
 		}
 		msleep(10);
@@ -805,7 +808,7 @@ static int smsc95xx_reset(struct usbnet *dev)
 	} while ((read_buf & PM_CTL_PHY_RST_) && (timeout < 100));
 
 	if (timeout >= 100) {
-		devwarn(dev, "timeout waiting for PHY Reset");
+		netdev_warn(dev->net, "timeout waiting for PHY Reset\n");
 		return ret;
 	}
 
@@ -816,34 +819,34 @@ static int smsc95xx_reset(struct usbnet *dev)
 		return ret;
 
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "MAC Address: %pM", dev->net->dev_addr);
+		netdev_dbg(dev->net, "MAC Address: %pM\n", dev->net->dev_addr);
 
 	ret = smsc95xx_read_reg(dev, HW_CFG, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read HW_CFG: %d", ret);
+		netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret);
 		return ret;
 	}
 
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "Read Value from HW_CFG : 0x%08x", read_buf);
+		netdev_dbg(dev->net, "Read Value from HW_CFG : 0x%08x\n", read_buf);
 
 	read_buf |= HW_CFG_BIR_;
 
 	ret = smsc95xx_write_reg(dev, HW_CFG, read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write HW_CFG_BIR_ bit in HW_CFG "
-			"register, ret = %d", ret);
+		netdev_warn(dev->net, "Failed to write HW_CFG_BIR_ bit in HW_CFG register, ret = %d\n",
+			    ret);
 		return ret;
 	}
 
 	ret = smsc95xx_read_reg(dev, HW_CFG, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read HW_CFG: %d", ret);
+		netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret);
 		return ret;
 	}
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "Read Value from HW_CFG after writing "
-			"HW_CFG_BIR_: 0x%08x", read_buf);
+		netdev_dbg(dev->net, "Read Value from HW_CFG after writing HW_CFG_BIR_: 0x%08x\n",
+			   read_buf);
 
 	if (!turbo_mode) {
 		burst_cap = 0;
@@ -857,46 +860,46 @@ static int smsc95xx_reset(struct usbnet *dev)
 	}
 
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "rx_urb_size=%ld", (ulong)dev->rx_urb_size);
+		netdev_dbg(dev->net, "rx_urb_size=%ld\n", (ulong)dev->rx_urb_size);
 
 	ret = smsc95xx_write_reg(dev, BURST_CAP, burst_cap);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write BURST_CAP: %d", ret);
+		netdev_warn(dev->net, "Failed to write BURST_CAP: %d\n", ret);
 		return ret;
 	}
 
 	ret = smsc95xx_read_reg(dev, BURST_CAP, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read BURST_CAP: %d", ret);
+		netdev_warn(dev->net, "Failed to read BURST_CAP: %d\n", ret);
 		return ret;
 	}
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "Read Value from BURST_CAP after writing: 0x%08x",
-			read_buf);
+		netdev_dbg(dev->net, "Read Value from BURST_CAP after writing: 0x%08x\n",
+			   read_buf);
 
 	read_buf = DEFAULT_BULK_IN_DELAY;
 	ret = smsc95xx_write_reg(dev, BULK_IN_DLY, read_buf);
 	if (ret < 0) {
-		devwarn(dev, "ret = %d", ret);
+		netdev_warn(dev->net, "ret = %d\n", ret);
 		return ret;
 	}
 
 	ret = smsc95xx_read_reg(dev, BULK_IN_DLY, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read BULK_IN_DLY: %d", ret);
+		netdev_warn(dev->net, "Failed to read BULK_IN_DLY: %d\n", ret);
 		return ret;
 	}
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "Read Value from BULK_IN_DLY after writing: "
-			"0x%08x", read_buf);
+		netdev_dbg(dev->net, "Read Value from BULK_IN_DLY after writing: 0x%08x\n",
+			   read_buf);
 
 	ret = smsc95xx_read_reg(dev, HW_CFG, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read HW_CFG: %d", ret);
+		netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret);
 		return ret;
 	}
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "Read Value from HW_CFG: 0x%08x", read_buf);
+		netdev_dbg(dev->net, "Read Value from HW_CFG: 0x%08x\n", read_buf);
 
 	if (turbo_mode)
 		read_buf |= (HW_CFG_MEF_ | HW_CFG_BCE_);
@@ -908,41 +911,43 @@ static int smsc95xx_reset(struct usbnet *dev)
 
 	ret = smsc95xx_write_reg(dev, HW_CFG, read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write HW_CFG register, ret=%d", ret);
+		netdev_warn(dev->net, "Failed to write HW_CFG register, ret=%d\n",
+			    ret);
 		return ret;
 	}
 
 	ret = smsc95xx_read_reg(dev, HW_CFG, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read HW_CFG: %d", ret);
+		netdev_warn(dev->net, "Failed to read HW_CFG: %d\n", ret);
 		return ret;
 	}
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "Read Value from HW_CFG after writing: 0x%08x",
-			read_buf);
+		netdev_dbg(dev->net, "Read Value from HW_CFG after writing: 0x%08x\n",
+			   read_buf);
 
 	write_buf = 0xFFFFFFFF;
 	ret = smsc95xx_write_reg(dev, INT_STS, write_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write INT_STS register, ret=%d", ret);
+		netdev_warn(dev->net, "Failed to write INT_STS register, ret=%d\n",
+			    ret);
 		return ret;
 	}
 
 	ret = smsc95xx_read_reg(dev, ID_REV, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read ID_REV: %d", ret);
+		netdev_warn(dev->net, "Failed to read ID_REV: %d\n", ret);
 		return ret;
 	}
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "ID_REV = 0x%08x", read_buf);
+		netdev_dbg(dev->net, "ID_REV = 0x%08x\n", read_buf);
 
 	/* Configure GPIO pins as LED outputs */
 	write_buf = LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED |
 		LED_GPIO_CFG_FDX_LED;
 	ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, write_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write LED_GPIO_CFG register, ret=%d",
-			ret);
+		netdev_warn(dev->net, "Failed to write LED_GPIO_CFG register, ret=%d\n",
+			    ret);
 		return ret;
 	}
 
@@ -950,21 +955,21 @@ static int smsc95xx_reset(struct usbnet *dev)
 	write_buf = 0;
 	ret = smsc95xx_write_reg(dev, FLOW, write_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write FLOW: %d", ret);
+		netdev_warn(dev->net, "Failed to write FLOW: %d\n", ret);
 		return ret;
 	}
 
 	read_buf = AFC_CFG_DEFAULT;
 	ret = smsc95xx_write_reg(dev, AFC_CFG, read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write AFC_CFG: %d", ret);
+		netdev_warn(dev->net, "Failed to write AFC_CFG: %d\n", ret);
 		return ret;
 	}
 
 	/* Don't need mac_cr_lock during initialisation */
 	ret = smsc95xx_read_reg(dev, MAC_CR, &pdata->mac_cr);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read MAC_CR: %d", ret);
+		netdev_warn(dev->net, "Failed to read MAC_CR: %d\n", ret);
 		return ret;
 	}
 
@@ -973,7 +978,7 @@ static int smsc95xx_reset(struct usbnet *dev)
 	write_buf = (u32)ETH_P_8021Q;
 	ret = smsc95xx_write_reg(dev, VLAN1, write_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write VAN1: %d", ret);
+		netdev_warn(dev->net, "Failed to write VAN1: %d\n", ret);
 		return ret;
 	}
 
@@ -981,7 +986,7 @@ static int smsc95xx_reset(struct usbnet *dev)
 	ethtool_op_set_tx_hw_csum(netdev, pdata->use_tx_csum);
 	ret = smsc95xx_set_csums(dev);
 	if (ret < 0) {
-		devwarn(dev, "Failed to set csum offload: %d", ret);
+		netdev_warn(dev->net, "Failed to set csum offload: %d\n", ret);
 		return ret;
 	}
 
@@ -992,7 +997,7 @@ static int smsc95xx_reset(struct usbnet *dev)
 
 	ret = smsc95xx_read_reg(dev, INT_EP_CTL, &read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to read INT_EP_CTL: %d", ret);
+		netdev_warn(dev->net, "Failed to read INT_EP_CTL: %d\n", ret);
 		return ret;
 	}
 
@@ -1001,7 +1006,7 @@ static int smsc95xx_reset(struct usbnet *dev)
 
 	ret = smsc95xx_write_reg(dev, INT_EP_CTL, read_buf);
 	if (ret < 0) {
-		devwarn(dev, "Failed to write INT_EP_CTL: %d", ret);
+		netdev_warn(dev->net, "Failed to write INT_EP_CTL: %d\n", ret);
 		return ret;
 	}
 
@@ -1009,7 +1014,7 @@ static int smsc95xx_reset(struct usbnet *dev)
 	smsc95xx_start_rx_path(dev);
 
 	if (netif_msg_ifup(dev))
-		devdbg(dev, "smsc95xx_reset, return 0");
+		netdev_dbg(dev->net, "smsc95xx_reset, return 0\n");
 	return 0;
 }
 
@@ -1034,7 +1039,7 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	ret = usbnet_get_endpoints(dev, intf);
 	if (ret < 0) {
-		devwarn(dev, "usbnet_get_endpoints failed: %d", ret);
+		netdev_warn(dev->net, "usbnet_get_endpoints failed: %d\n", ret);
 		return ret;
 	}
 
@@ -1043,7 +1048,7 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	pdata = (struct smsc95xx_priv *)(dev->data[0]);
 	if (!pdata) {
-		devwarn(dev, "Unable to allocate struct smsc95xx_priv");
+		netdev_warn(dev->net, "Unable to allocate struct smsc95xx_priv\n");
 		return -ENOMEM;
 	}
 
@@ -1067,7 +1072,7 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
 	if (pdata) {
 		if (netif_msg_ifdown(dev))
-			devdbg(dev, "free pdata");
+			netdev_dbg(dev->net, "free pdata\n");
 		kfree(pdata);
 		pdata = NULL;
 		dev->data[0] = 0;
@@ -1102,7 +1107,8 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 		if (unlikely(header & RX_STS_ES_)) {
 			if (netif_msg_rx_err(dev))
-				devdbg(dev, "Error header=0x%08x", header);
+				netdev_dbg(dev->net, "Error header=0x%08x\n",
+					   header);
 			dev->net->stats.rx_errors++;
 			dev->net->stats.rx_dropped++;
 
@@ -1120,8 +1126,8 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 			/* ETH_FRAME_LEN + 4(CRC) + 2(COE) + 4(Vlan) */
 			if (unlikely(size > (ETH_FRAME_LEN + 12))) {
 				if (netif_msg_rx_err(dev))
-					devdbg(dev, "size err header=0x%08x",
-						header);
+					netdev_dbg(dev->net, "size err header=0x%08x\n",
+						   header);
 				return 0;
 			}
 
@@ -1137,7 +1143,7 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 			ax_skb = skb_clone(skb, GFP_ATOMIC);
 			if (unlikely(!ax_skb)) {
-				devwarn(dev, "Error allocating skb");
+				netdev_warn(dev->net, "Error allocating skb\n");
 				return 0;
 			}
 
@@ -1161,7 +1167,7 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 	}
 
 	if (unlikely(skb->len < 0)) {
-		devwarn(dev, "invalid rx length<0 %d", skb->len);
+		netdev_warn(dev->net, "invalid rx length<0 %d\n", skb->len);
 		return 0;
 	}
 
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 035fab04c0a0..8e732930d249 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -243,12 +243,12 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 	dev->net->stats.rx_bytes += skb->len;
 
 	if (netif_msg_rx_status (dev))
-		devdbg (dev, "< rx, len %zu, type 0x%x",
-			skb->len + sizeof (struct ethhdr), skb->protocol);
+		netdev_dbg(dev->net, "< rx, len %zu, type 0x%x\n",
+			   skb->len + sizeof (struct ethhdr), skb->protocol);
 	memset (skb->cb, 0, sizeof (struct skb_data));
 	status = netif_rx (skb);
 	if (status != NET_RX_SUCCESS && netif_msg_rx_err (dev))
-		devdbg (dev, "netif_rx status %d", status);
+		netdev_dbg(dev->net, "netif_rx status %d\n", status);
 }
 EXPORT_SYMBOL_GPL(usbnet_skb_return);
 
@@ -313,9 +313,9 @@ void usbnet_defer_kevent (struct usbnet *dev, int work)
 {
 	set_bit (work, &dev->flags);
 	if (!schedule_work (&dev->kevent))
-		deverr (dev, "kevent %d may have been dropped", work);
+		netdev_err(dev->net, "kevent %d may have been dropped\n", work);
 	else
-		devdbg (dev, "kevent %d scheduled", work);
+		netdev_dbg(dev->net, "kevent %d scheduled\n", work);
 }
 EXPORT_SYMBOL_GPL(usbnet_defer_kevent);
 
@@ -333,7 +333,7 @@ static void rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
 
 	if ((skb = alloc_skb (size + NET_IP_ALIGN, flags)) == NULL) {
 		if (netif_msg_rx_err (dev))
-			devdbg (dev, "no rx skb");
+			netdev_dbg(dev->net, "no rx skb\n");
 		usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
 		usb_free_urb (urb);
 		return;
@@ -364,12 +364,12 @@ static void rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
 			break;
 		case -ENODEV:
 			if (netif_msg_ifdown (dev))
-				devdbg (dev, "device gone");
+				netdev_dbg(dev->net, "device gone\n");
 			netif_device_detach (dev->net);
 			break;
 		default:
 			if (netif_msg_rx_err (dev))
-				devdbg (dev, "rx submit, %d", retval);
+				netdev_dbg(dev->net, "rx submit, %d\n", retval);
 			tasklet_schedule (&dev->bh);
 			break;
 		case 0:
@@ -377,7 +377,7 @@ static void rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
 		}
 	} else {
 		if (netif_msg_ifdown (dev))
-			devdbg (dev, "rx: stopped");
+			netdev_dbg(dev->net, "rx: stopped\n");
 		retval = -ENOLINK;
 	}
 	spin_unlock_irqrestore (&dev->rxq.lock, lockflags);
@@ -401,7 +401,7 @@ static inline void rx_process (struct usbnet *dev, struct sk_buff *skb)
 		usbnet_skb_return (dev, skb);
 	else {
 		if (netif_msg_rx_err (dev))
-			devdbg (dev, "drop");
+			netdev_dbg(dev->net, "drop\n");
 error:
 		dev->net->stats.rx_errors++;
 		skb_queue_tail (&dev->done, skb);
@@ -429,7 +429,7 @@ static void rx_complete (struct urb *urb)
 			dev->net->stats.rx_errors++;
 			dev->net->stats.rx_length_errors++;
 			if (netif_msg_rx_err (dev))
-				devdbg (dev, "rx length %d", skb->len);
+				netdev_dbg(dev->net, "rx length %d\n", skb->len);
 		}
 		break;
 
@@ -447,7 +447,7 @@ static void rx_complete (struct urb *urb)
 	case -ECONNRESET:		/* async unlink */
 	case -ESHUTDOWN:		/* hardware gone */
 		if (netif_msg_ifdown (dev))
-			devdbg (dev, "rx shutdown, code %d", urb_status);
+			netdev_dbg(dev->net, "rx shutdown, code %d\n", urb_status);
 		goto block;
 
 	/* we get controller i/o faults during khubd disconnect() delays.
@@ -461,7 +461,7 @@ static void rx_complete (struct urb *urb)
 		if (!timer_pending (&dev->delay)) {
 			mod_timer (&dev->delay, jiffies + THROTTLE_JIFFIES);
 			if (netif_msg_link (dev))
-				devdbg (dev, "rx throttle %d", urb_status);
+				netdev_dbg(dev->net, "rx throttle %d\n", urb_status);
 		}
 block:
 		entry->state = rx_cleanup;
@@ -478,7 +478,7 @@ block:
 		entry->state = rx_cleanup;
 		dev->net->stats.rx_errors++;
 		if (netif_msg_rx_err (dev))
-			devdbg (dev, "rx status %d", urb_status);
+			netdev_dbg(dev->net, "rx status %d\n", urb_status);
 		break;
 	}
 
@@ -493,7 +493,7 @@ block:
 		usb_free_urb (urb);
 	}
 	if (netif_msg_rx_err (dev))
-		devdbg (dev, "no read resubmitted");
+		netdev_dbg(dev->net, "no read resubmitted\n");
 }
 
 static void intr_complete (struct urb *urb)
@@ -511,14 +511,14 @@ static void intr_complete (struct urb *urb)
 	case -ENOENT:		/* urb killed */
 	case -ESHUTDOWN:	/* hardware gone */
 		if (netif_msg_ifdown (dev))
-			devdbg (dev, "intr shutdown, code %d", status);
+			netdev_dbg(dev->net, "intr shutdown, code %d\n", status);
 		return;
 
 	/* NOTE:  not throttling like RX/TX, since this endpoint
 	 * already polls infrequently
 	 */
 	default:
-		devdbg (dev, "intr status %d", status);
+		netdev_dbg(dev->net, "intr status %d\n", status);
 		break;
 	}
 
@@ -528,7 +528,7 @@ static void intr_complete (struct urb *urb)
 	memset(urb->transfer_buffer, 0, urb->transfer_buffer_length);
 	status = usb_submit_urb (urb, GFP_ATOMIC);
 	if (status != 0 && netif_msg_timer (dev))
-		deverr(dev, "intr resubmit --> %d", status);
+		netdev_err(dev->net, "intr resubmit --> %d\n", status);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -537,7 +537,7 @@ void usbnet_pause_rx(struct usbnet *dev)
 	set_bit(EVENT_RX_PAUSED, &dev->flags);
 
 	if (netif_msg_rx_status(dev))
-		devdbg(dev, "paused rx queue enabled");
+		netdev_dbg(dev->net, "paused rx queue enabled\n");
 }
 EXPORT_SYMBOL_GPL(usbnet_pause_rx);
 
@@ -556,7 +556,8 @@ void usbnet_resume_rx(struct usbnet *dev)
 	tasklet_schedule(&dev->bh);
 
 	if (netif_msg_rx_status(dev))
-		devdbg(dev, "paused rx queue disabled, %d skbs requeued", num);
+		netdev_dbg(dev->net, "paused rx queue disabled, %d skbs requeued\n",
+			   num);
 }
 EXPORT_SYMBOL_GPL(usbnet_resume_rx);
 
@@ -589,7 +590,7 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q)
 		// these (async) unlinks complete immediately
 		retval = usb_unlink_urb (urb);
 		if (retval != -EINPROGRESS && retval != 0)
-			devdbg (dev, "unlink urb err, %d", retval);
+			netdev_dbg(dev->net, "unlink urb err, %d\n", retval);
 		else
 			count++;
 	}
@@ -632,8 +633,8 @@ static void usbnet_terminate_urbs(struct usbnet *dev)
 			schedule_timeout(UNLINK_TIMEOUT_MS);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			if (netif_msg_ifdown(dev))
-				devdbg(dev, "waited for %d urb completions",
-					temp);
+				netdev_dbg(dev->net, "waited for %d urb completions\n",
+					   temp);
 	}
 	set_current_state(TASK_RUNNING);
 	dev->wait = NULL;
@@ -649,21 +650,20 @@ int usbnet_stop (struct net_device *net)
 	netif_stop_queue (net);
 
 	if (netif_msg_ifdown (dev))
-		devinfo (dev, "stop stats: rx/tx %ld/%ld, errs %ld/%ld",
-			net->stats.rx_packets, net->stats.tx_packets,
-			net->stats.rx_errors, net->stats.tx_errors
-			);
+		netdev_info(dev->net, "stop stats: rx/tx %ld/%ld, errs %ld/%ld\n",
+			    net->stats.rx_packets, net->stats.tx_packets,
+			    net->stats.rx_errors, net->stats.tx_errors);
 
 	/* allow minidriver to stop correctly (wireless devices to turn off
 	 * radio etc) */
 	if (info->stop) {
 		retval = info->stop(dev);
 		if (retval < 0 && netif_msg_ifdown(dev))
-			devinfo(dev,
-				"stop fail (%d) usbnet usb-%s-%s, %s",
-				retval,
-				dev->udev->bus->bus_name, dev->udev->devpath,
-				info->description);
+			netdev_info(dev->net,
+				    "stop fail (%d) usbnet usb-%s-%s, %s\n",
+				    retval,
+				    dev->udev->bus->bus_name, dev->udev->devpath,
+				    info->description);
 	}
 
 	if (!(info->flags & FLAG_AVOID_UNLINK_URBS))
@@ -703,29 +703,31 @@ int usbnet_open (struct net_device *net)
 
 	if ((retval = usb_autopm_get_interface(dev->intf)) < 0) {
 		if (netif_msg_ifup (dev))
-			devinfo (dev,
-				"resumption fail (%d) usbnet usb-%s-%s, %s",
-				retval,
-				dev->udev->bus->bus_name, dev->udev->devpath,
-			info->description);
+			netdev_info(dev->net,
+				    "resumption fail (%d) usbnet usb-%s-%s, %s\n",
+				    retval,
+				    dev->udev->bus->bus_name,
+				    dev->udev->devpath,
+				    info->description);
 		goto done_nopm;
 	}
 
 	// put into "known safe" state
 	if (info->reset && (retval = info->reset (dev)) < 0) {
 		if (netif_msg_ifup (dev))
-			devinfo (dev,
-				"open reset fail (%d) usbnet usb-%s-%s, %s",
-				retval,
-				dev->udev->bus->bus_name, dev->udev->devpath,
-			info->description);
+			netdev_info(dev->net,
+				    "open reset fail (%d) usbnet usb-%s-%s, %s\n",
+				    retval,
+				    dev->udev->bus->bus_name,
+				    dev->udev->devpath,
+				    info->description);
 		goto done;
 	}
 
 	// insist peer be connected
 	if (info->check_connect && (retval = info->check_connect (dev)) < 0) {
 		if (netif_msg_ifup (dev))
-			devdbg (dev, "can't open; %d", retval);
+			netdev_dbg(dev->net, "can't open; %d\n", retval);
 		goto done;
 	}
 
@@ -734,7 +736,7 @@ int usbnet_open (struct net_device *net)
 		retval = usb_submit_urb (dev->interrupt, GFP_KERNEL);
 		if (retval < 0) {
 			if (netif_msg_ifup (dev))
-				deverr (dev, "intr submit %d", retval);
+				netdev_err(dev->net, "intr submit %d\n", retval);
 			goto done;
 		}
 	}
@@ -756,10 +758,9 @@ int usbnet_open (struct net_device *net)
 		else
 			framing = "simple";
 
-		devinfo (dev, "open: enable queueing "
-				"(rx %d, tx %d) mtu %d %s framing",
-			(int)RX_QLEN (dev), (int)TX_QLEN (dev), dev->net->mtu,
-			framing);
+		netdev_info(dev->net, "open: enable queueing (rx %d, tx %d) mtu %d %s framing\n",
+			    (int)RX_QLEN(dev), (int)TX_QLEN(dev),
+			    dev->net->mtu, framing);
 	}
 
 	// delay posting reads until we're fully open
@@ -908,8 +909,8 @@ kevent (struct work_struct *work)
 		    status != -ESHUTDOWN) {
 			if (netif_msg_tx_err (dev))
 fail_pipe:
-				deverr (dev, "can't clear tx halt, status %d",
-					status);
+				netdev_err(dev->net, "can't clear tx halt, status %d\n",
+					   status);
 		} else {
 			clear_bit (EVENT_TX_HALT, &dev->flags);
 			if (status != -ESHUTDOWN)
@@ -928,8 +929,8 @@ fail_pipe:
 		    status != -ESHUTDOWN) {
 			if (netif_msg_rx_err (dev))
 fail_halt:
-				deverr (dev, "can't clear rx halt, status %d",
-					status);
+				netdev_err(dev->net, "can't clear rx halt, status %d\n",
+					   status);
 		} else {
 			clear_bit (EVENT_RX_HALT, &dev->flags);
 			tasklet_schedule (&dev->bh);
@@ -967,18 +968,18 @@ fail_lowmem:
 		if(info->link_reset && (retval = info->link_reset(dev)) < 0) {
 			usb_autopm_put_interface(dev->intf);
 skip_reset:
-			devinfo(dev, "link reset failed (%d) usbnet usb-%s-%s, %s",
-				retval,
-				dev->udev->bus->bus_name, dev->udev->devpath,
-				info->description);
+			netdev_info(dev->net, "link reset failed (%d) usbnet usb-%s-%s, %s\n",
+				    retval,
+				    dev->udev->bus->bus_name,
+				    dev->udev->devpath,
+				    info->description);
 		} else {
 			usb_autopm_put_interface(dev->intf);
 		}
 	}
 
 	if (dev->flags)
-		devdbg (dev, "kevent done, flags = 0x%lx",
-			dev->flags);
+		netdev_dbg(dev->net, "kevent done, flags = 0x%lx\n", dev->flags);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1015,14 +1016,15 @@ static void tx_complete (struct urb *urb)
 				mod_timer (&dev->delay,
 					jiffies + THROTTLE_JIFFIES);
 				if (netif_msg_link (dev))
-					devdbg (dev, "tx throttle %d",
-							urb->status);
+					netdev_dbg(dev->net, "tx throttle %d\n",
+						   urb->status);
 			}
 			netif_stop_queue (dev->net);
 			break;
 		default:
 			if (netif_msg_tx_err (dev))
-				devdbg (dev, "tx err %d", entry->urb->status);
+				netdev_dbg(dev->net, "tx err %d\n",
+					   entry->urb->status);
 			break;
 		}
 	}
@@ -1065,7 +1067,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 		skb = info->tx_fixup (dev, skb, GFP_ATOMIC);
 		if (!skb) {
 			if (netif_msg_tx_err (dev))
-				devdbg (dev, "can't tx_fixup skb");
+				netdev_dbg(dev->net, "can't tx_fixup skb\n");
 			goto drop;
 		}
 	}
@@ -1073,7 +1075,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 
 	if (!(urb = usb_alloc_urb (0, GFP_ATOMIC))) {
 		if (netif_msg_tx_err (dev))
-			devdbg (dev, "no urb");
+			netdev_dbg(dev->net, "no urb\n");
 		goto drop;
 	}
 
@@ -1113,7 +1115,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 		/* no use to process more packets */
 		netif_stop_queue(net);
 		spin_unlock_irqrestore(&dev->txq.lock, flags);
-		devdbg(dev, "Delaying transmission for resumption");
+		netdev_dbg(dev->net, "Delaying transmission for resumption\n");
 		goto deferred;
 	}
 #endif
@@ -1127,7 +1129,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 	default:
 		usb_autopm_put_interface_async(dev->intf);
 		if (netif_msg_tx_err (dev))
-			devdbg (dev, "tx: submit urb err %d", retval);
+			netdev_dbg(dev->net, "tx: submit urb err %d\n", retval);
 		break;
 	case 0:
 		net->trans_start = jiffies;
@@ -1139,15 +1141,15 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 
 	if (retval) {
 		if (netif_msg_tx_err (dev))
-			devdbg (dev, "drop, code %d", retval);
+			netdev_dbg(dev->net, "drop, code %d\n", retval);
 drop:
 		dev->net->stats.tx_dropped++;
 		if (skb)
 			dev_kfree_skb_any (skb);
 		usb_free_urb (urb);
 	} else if (netif_msg_tx_queued (dev)) {
-		devdbg (dev, "> tx, len %d, type 0x%x",
-			length, skb->protocol);
+		netdev_dbg(dev->net, "> tx, len %d, type 0x%x\n",
+			   length, skb->protocol);
 	}
 #ifdef CONFIG_PM
 deferred:
@@ -1179,7 +1181,7 @@ static void usbnet_bh (unsigned long param)
 			dev_kfree_skb (skb);
 			continue;
 		default:
-			devdbg (dev, "bogus skb state %d", entry->state);
+			netdev_dbg(dev->net, "bogus skb state %d\n", entry->state);
 		}
 	}
 
@@ -1208,8 +1210,8 @@ static void usbnet_bh (unsigned long param)
 					rx_submit (dev, urb, GFP_ATOMIC);
 			}
 			if (temp != dev->rxq.qlen && netif_msg_link (dev))
-				devdbg (dev, "rxqlen %d --> %d",
-						temp, dev->rxq.qlen);
+				netdev_dbg(dev->net, "rxqlen %d --> %d\n",
+					   temp, dev->rxq.qlen);
 			if (dev->rxq.qlen < qlen)
 				tasklet_schedule (&dev->bh);
 		}
@@ -1241,10 +1243,10 @@ void usbnet_disconnect (struct usb_interface *intf)
 	xdev = interface_to_usbdev (intf);
 
 	if (netif_msg_probe (dev))
-		devinfo (dev, "unregister '%s' usb-%s-%s, %s",
-			intf->dev.driver->name,
-			xdev->bus->bus_name, xdev->devpath,
-			dev->driver_info->description);
+		netdev_info(dev->net, "unregister '%s' usb-%s-%s, %s\n",
+			    intf->dev.driver->name,
+			    xdev->bus->bus_name, xdev->devpath,
+			    dev->driver_info->description);
 
 	net = dev->net;
 	unregister_netdev (net);
@@ -1408,11 +1410,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	if (status)
 		goto out3;
 	if (netif_msg_probe (dev))
-		devinfo (dev, "register '%s' at usb-%s-%s, %s, %pM",
-			udev->dev.driver->name,
-			xdev->bus->bus_name, xdev->devpath,
-			dev->driver_info->description,
-			net->dev_addr);
+		netdev_info(dev->net, "register '%s' at usb-%s-%s, %s, %pM\n",
+			    udev->dev.driver->name,
+			    xdev->bus->bus_name, xdev->devpath,
+			    dev->driver_info->description,
+			    net->dev_addr);
 
 	// ok, it's ready to go.
 	usb_set_intfdata (udev, dev);
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 14692bc51b51..65cbd060cc30 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -728,9 +728,9 @@ static int rndis_query_oid(struct usbnet *dev, __le32 oid, void *data, int *len)
 	ret = rndis_command(dev, u.header, buflen);
 	priv->current_command_oid = 0;
 	if (ret < 0)
-		devdbg(dev, "rndis_query_oid(%s): rndis_command() failed, %d "
-			"(%08x)", oid_to_string(oid), ret,
-			le32_to_cpu(u.get_c->status));
+		netdev_dbg(dev->net, "%s(%s): rndis_command() failed, %d (%08x)\n",
+			   __func__, oid_to_string(oid), ret,
+			   le32_to_cpu(u.get_c->status));
 
 	if (ret == 0) {
 		memcpy(data, u.buf + le32_to_cpu(u.get_c->offset) + 8, *len);
@@ -741,9 +741,9 @@ static int rndis_query_oid(struct usbnet *dev, __le32 oid, void *data, int *len)
 
 		ret = rndis_error_status(u.get_c->status);
 		if (ret < 0)
-			devdbg(dev, "rndis_query_oid(%s): device returned "
-				"error,  0x%08x (%d)", oid_to_string(oid),
-				le32_to_cpu(u.get_c->status), ret);
+			netdev_dbg(dev->net, "%s(%s): device returned error,  0x%08x (%d)\n",
+				   __func__, oid_to_string(oid),
+				   le32_to_cpu(u.get_c->status), ret);
 	}
 
 	mutex_unlock(&priv->command_lock);
@@ -791,17 +791,17 @@ static int rndis_set_oid(struct usbnet *dev, __le32 oid, void *data, int len)
 	ret = rndis_command(dev, u.header, buflen);
 	priv->current_command_oid = 0;
 	if (ret < 0)
-		devdbg(dev, "rndis_set_oid(%s): rndis_command() failed, %d "
-			"(%08x)", oid_to_string(oid), ret,
-			le32_to_cpu(u.set_c->status));
+		netdev_dbg(dev->net, "%s(%s): rndis_command() failed, %d (%08x)\n",
+			   __func__, oid_to_string(oid), ret,
+			   le32_to_cpu(u.set_c->status));
 
 	if (ret == 0) {
 		ret = rndis_error_status(u.set_c->status);
 
 		if (ret < 0)
-			devdbg(dev, "rndis_set_oid(%s): device returned error, "
-				"0x%08x (%d)", oid_to_string(oid),
-				le32_to_cpu(u.set_c->status), ret);
+			netdev_dbg(dev->net, "%s(%s): device returned error, 0x%08x (%d)\n",
+				   __func__, oid_to_string(oid),
+				   le32_to_cpu(u.set_c->status), ret);
 	}
 
 	mutex_unlock(&priv->command_lock);
@@ -870,11 +870,11 @@ static int rndis_set_config_parameter(struct usbnet *dev, char *param,
 #endif
 
 	if (value_type == 2)
-		devdbg(dev, "setting config parameter: %s, value: %s",
-						param, (u8 *)value);
+		netdev_dbg(dev->net, "setting config parameter: %s, value: %s\n",
+			   param, (u8 *)value);
 	else
-		devdbg(dev, "setting config parameter: %s, value: %d",
-						param, *(u32 *)value);
+		netdev_dbg(dev->net, "setting config parameter: %s, value: %d\n",
+			   param, *(u32 *)value);
 
 	infobuf->name_offs = cpu_to_le32(sizeof(*infobuf));
 	infobuf->name_length = cpu_to_le32(param_len);
@@ -897,20 +897,21 @@ static int rndis_set_config_parameter(struct usbnet *dev, char *param,
 	}
 
 #ifdef DEBUG
-	devdbg(dev, "info buffer (len: %d):", info_len);
+	netdev_dbg(dev->net, "info buffer (len: %d)\n", info_len);
 	for (i = 0; i < info_len; i += 12) {
 		u32 *tmp = (u32 *)((u8 *)infobuf + i);
-		devdbg(dev, "%08X:%08X:%08X",
-			cpu_to_be32(tmp[0]),
-			cpu_to_be32(tmp[1]),
-			cpu_to_be32(tmp[2]));
+		netdev_dbg(dev->net, "%08X:%08X:%08X\n",
+			   cpu_to_be32(tmp[0]),
+			   cpu_to_be32(tmp[1]),
+			   cpu_to_be32(tmp[2]));
 	}
 #endif
 
 	ret = rndis_set_oid(dev, OID_GEN_RNDIS_CONFIG_PARAMETER,
 							infobuf, info_len);
 	if (ret != 0)
-		devdbg(dev, "setting rndis config parameter failed, %d.", ret);
+		netdev_dbg(dev->net, "setting rndis config parameter failed, %d\n",
+			   ret);
 
 	kfree(infobuf);
 	return ret;
@@ -945,13 +946,13 @@ static int set_essid(struct usbnet *usbdev, struct ndis_80211_ssid *ssid)
 
 	ret = rndis_set_oid(usbdev, OID_802_11_SSID, ssid, sizeof(*ssid));
 	if (ret < 0) {
-		devwarn(usbdev, "setting SSID failed (%08X)", ret);
+		netdev_warn(usbdev->net, "setting SSID failed (%08X)\n", ret);
 		return ret;
 	}
 	if (ret == 0) {
 		memcpy(&priv->essid, ssid, sizeof(priv->essid));
 		priv->radio_on = true;
-		devdbg(usbdev, "set_essid: radio_on = true");
+		netdev_dbg(usbdev->net, "%s(): radio_on = true\n", __func__);
 	}
 
 	return ret;
@@ -963,7 +964,8 @@ static int set_bssid(struct usbnet *usbdev, u8 bssid[ETH_ALEN])
 
 	ret = rndis_set_oid(usbdev, OID_802_11_BSSID, bssid, ETH_ALEN);
 	if (ret < 0) {
-		devwarn(usbdev, "setting BSSID[%pM] failed (%08X)", bssid, ret);
+		netdev_warn(usbdev->net, "setting BSSID[%pM] failed (%08X)\n",
+			    bssid, ret);
 		return ret;
 	}
 
@@ -1021,7 +1023,8 @@ static int disassociate(struct usbnet *usbdev, bool reset_ssid)
 		ret = rndis_set_oid(usbdev, OID_802_11_DISASSOCIATE, NULL, 0);
 		if (ret == 0) {
 			priv->radio_on = false;
-			devdbg(usbdev, "disassociate: radio_on = false");
+			netdev_dbg(usbdev->net, "%s(): radio_on = false\n",
+				   __func__);
 
 			if (reset_ssid)
 				msleep(100);
@@ -1054,8 +1057,8 @@ static int set_auth_mode(struct usbnet *usbdev, u32 wpa_version,
 	__le32 tmp;
 	int auth_mode, ret;
 
-	devdbg(usbdev, "set_auth_mode: wpa_version=0x%x authalg=0x%x "
-		"keymgmt=0x%x", wpa_version, auth_type, keymgmt);
+	netdev_dbg(usbdev->net, "%s(): wpa_version=0x%x authalg=0x%x keymgmt=0x%x\n",
+		   __func__, wpa_version, auth_type, keymgmt);
 
 	if (wpa_version & NL80211_WPA_VERSION_2) {
 		if (keymgmt & RNDIS_WLAN_KEY_MGMT_802_1X)
@@ -1082,7 +1085,8 @@ static int set_auth_mode(struct usbnet *usbdev, u32 wpa_version,
 	ret = rndis_set_oid(usbdev, OID_802_11_AUTHENTICATION_MODE, &tmp,
 								sizeof(tmp));
 	if (ret != 0) {
-		devwarn(usbdev, "setting auth mode failed (%08X)", ret);
+		netdev_warn(usbdev->net, "setting auth mode failed (%08X)\n",
+			    ret);
 		return ret;
 	}
 
@@ -1098,7 +1102,8 @@ static int set_priv_filter(struct usbnet *usbdev)
 	struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev);
 	__le32 tmp;
 
-	devdbg(usbdev, "set_priv_filter: wpa_version=0x%x", priv->wpa_version);
+	netdev_dbg(usbdev->net, "%s(): wpa_version=0x%x\n",
+		   __func__, priv->wpa_version);
 
 	if (priv->wpa_version & NL80211_WPA_VERSION_2 ||
 	    priv->wpa_version & NL80211_WPA_VERSION_1)
@@ -1116,8 +1121,8 @@ static int set_encr_mode(struct usbnet *usbdev, int pairwise, int groupwise)
 	__le32 tmp;
 	int encr_mode, ret;
 
-	devdbg(usbdev, "set_encr_mode: cipher_pair=0x%x cipher_group=0x%x",
-		pairwise, groupwise);
+	netdev_dbg(usbdev->net, "%s(): cipher_pair=0x%x cipher_group=0x%x\n",
+		   __func__, pairwise, groupwise);
 
 	if (pairwise & RNDIS_WLAN_ALG_CCMP)
 		encr_mode = NDIS_80211_ENCR_CCMP_ENABLED;
@@ -1136,7 +1141,8 @@ static int set_encr_mode(struct usbnet *usbdev, int pairwise, int groupwise)
 	ret = rndis_set_oid(usbdev, OID_802_11_ENCRYPTION_STATUS, &tmp,
 								sizeof(tmp));
 	if (ret != 0) {
-		devwarn(usbdev, "setting encr mode failed (%08X)", ret);
+		netdev_warn(usbdev->net, "setting encr mode failed (%08X)\n",
+			    ret);
 		return ret;
 	}
 
@@ -1151,13 +1157,15 @@ static int set_infra_mode(struct usbnet *usbdev, int mode)
 	__le32 tmp;
 	int ret;
 
-	devdbg(usbdev, "set_infra_mode: infra_mode=0x%x", priv->infra_mode);
+	netdev_dbg(usbdev->net, "%s(): infra_mode=0x%x\n",
+		   __func__, priv->infra_mode);
 
 	tmp = cpu_to_le32(mode);
 	ret = rndis_set_oid(usbdev, OID_802_11_INFRASTRUCTURE_MODE, &tmp,
 								sizeof(tmp));
 	if (ret != 0) {
-		devwarn(usbdev, "setting infra mode failed (%08X)", ret);
+		netdev_warn(usbdev->net, "setting infra mode failed (%08X)\n",
+			    ret);
 		return ret;
 	}
 
@@ -1174,7 +1182,7 @@ static int set_rts_threshold(struct usbnet *usbdev, u32 rts_threshold)
 {
 	__le32 tmp;
 
-	devdbg(usbdev, "set_rts_threshold %i", rts_threshold);
+	netdev_dbg(usbdev->net, "%s(): %i\n", __func__, rts_threshold);
 
 	if (rts_threshold < 0 || rts_threshold > 2347)
 		rts_threshold = 2347;
@@ -1188,7 +1196,7 @@ static int set_frag_threshold(struct usbnet *usbdev, u32 frag_threshold)
 {
 	__le32 tmp;
 
-	devdbg(usbdev, "set_frag_threshold %i", frag_threshold);
+	netdev_dbg(usbdev->net, "%s(): %i\n", __func__, frag_threshold);
 
 	if (frag_threshold < 256 || frag_threshold > 2346)
 		frag_threshold = 2346;
@@ -1222,7 +1230,7 @@ static int set_channel(struct usbnet *usbdev, int channel)
 	unsigned int dsconfig;
 	int len, ret;
 
-	devdbg(usbdev, "set_channel(%d)", channel);
+	netdev_dbg(usbdev->net, "%s(%d)\n", __func__, channel);
 
 	/* this OID is valid only when not associated */
 	if (is_associated(usbdev))
@@ -1233,7 +1241,8 @@ static int set_channel(struct usbnet *usbdev, int channel)
 	len = sizeof(config);
 	ret = rndis_query_oid(usbdev, OID_802_11_CONFIGURATION, &config, &len);
 	if (ret < 0) {
-		devdbg(usbdev, "set_channel: querying configuration failed");
+		netdev_dbg(usbdev->net, "%s(): querying configuration failed\n",
+			   __func__);
 		return ret;
 	}
 
@@ -1241,7 +1250,7 @@ static int set_channel(struct usbnet *usbdev, int channel)
 	ret = rndis_set_oid(usbdev, OID_802_11_CONFIGURATION, &config,
 								sizeof(config));
 
-	devdbg(usbdev, "set_channel: %d -> %d", channel, ret);
+	netdev_dbg(usbdev->net, "%s(): %d -> %d\n", __func__, channel, ret);
 
 	return ret;
 }
@@ -1255,7 +1264,8 @@ static int add_wep_key(struct usbnet *usbdev, const u8 *key, int key_len,
 	u32 cipher;
 	int ret;
 
-	devdbg(usbdev, "add_wep_key(idx: %d, len: %d)", index, key_len);
+	netdev_dbg(usbdev->net, "%s(idx: %d, len: %d)\n",
+		   __func__, index, key_len);
 
 	if ((key_len != 5 && key_len != 13) || index < 0 || index > 3)
 		return -EINVAL;
@@ -1277,15 +1287,15 @@ static int add_wep_key(struct usbnet *usbdev, const u8 *key, int key_len,
 		ret = set_encr_mode(usbdev, RNDIS_WLAN_ALG_WEP,
 							RNDIS_WLAN_ALG_NONE);
 		if (ret)
-			devwarn(usbdev, "encryption couldn't be enabled (%08X)",
-									ret);
+			netdev_warn(usbdev->net, "encryption couldn't be enabled (%08X)\n",
+				    ret);
 	}
 
 	ret = rndis_set_oid(usbdev, OID_802_11_ADD_WEP, &ndis_key,
 							sizeof(ndis_key));
 	if (ret != 0) {
-		devwarn(usbdev, "adding encryption key %d failed (%08X)",
-							index+1, ret);
+		netdev_warn(usbdev->net, "adding encryption key %d failed (%08X)\n",
+			    index + 1, ret);
 		return ret;
 	}
 
@@ -1307,22 +1317,23 @@ static int add_wpa_key(struct usbnet *usbdev, const u8 *key, int key_len,
 	int ret;
 
 	if (index < 0 || index >= 4) {
-		devdbg(usbdev, "add_wpa_key: index out of range (%i)", index);
+		netdev_dbg(usbdev->net, "%s(): index out of range (%i)\n",
+			   __func__, index);
 		return -EINVAL;
 	}
 	if (key_len > sizeof(ndis_key.material) || key_len < 0) {
-		devdbg(usbdev, "add_wpa_key: key length out of range (%i)",
-			key_len);
+		netdev_dbg(usbdev->net, "%s(): key length out of range (%i)\n",
+			   __func__, key_len);
 		return -EINVAL;
 	}
 	if (flags & NDIS_80211_ADDKEY_SET_INIT_RECV_SEQ) {
 		if (!rx_seq || seq_len <= 0) {
-			devdbg(usbdev, "add_wpa_key: recv seq flag without"
-					"buffer");
+			netdev_dbg(usbdev->net, "%s(): recv seq flag without buffer\n",
+				   __func__);
 			return -EINVAL;
 		}
 		if (rx_seq && seq_len > sizeof(ndis_key.rsc)) {
-			devdbg(usbdev, "add_wpa_key: too big recv seq buffer");
+			netdev_dbg(usbdev->net, "%s(): too big recv seq buffer\n", __func__);
 			return -EINVAL;
 		}
 	}
@@ -1330,15 +1341,16 @@ static int add_wpa_key(struct usbnet *usbdev, const u8 *key, int key_len,
 	is_addr_ok = addr && !is_zero_ether_addr(addr) &&
 					!is_broadcast_ether_addr(addr);
 	if ((flags & NDIS_80211_ADDKEY_PAIRWISE_KEY) && !is_addr_ok) {
-		devdbg(usbdev, "add_wpa_key: pairwise but bssid invalid (%pM)",
-			addr);
+		netdev_dbg(usbdev->net, "%s(): pairwise but bssid invalid (%pM)\n",
+			   __func__, addr);
 		return -EINVAL;
 	}
 
-	devdbg(usbdev, "add_wpa_key(%i): flags:%i%i%i", index,
-			!!(flags & NDIS_80211_ADDKEY_TRANSMIT_KEY),
-			!!(flags & NDIS_80211_ADDKEY_PAIRWISE_KEY),
-			!!(flags & NDIS_80211_ADDKEY_SET_INIT_RECV_SEQ));
+	netdev_dbg(usbdev->net, "%s(%i): flags:%i%i%i\n",
+		   __func__, index,
+		   !!(flags & NDIS_80211_ADDKEY_TRANSMIT_KEY),
+		   !!(flags & NDIS_80211_ADDKEY_PAIRWISE_KEY),
+		   !!(flags & NDIS_80211_ADDKEY_SET_INIT_RECV_SEQ));
 
 	memset(&ndis_key, 0, sizeof(ndis_key));
 
@@ -1372,7 +1384,8 @@ static int add_wpa_key(struct usbnet *usbdev, const u8 *key, int key_len,
 
 	ret = rndis_set_oid(usbdev, OID_802_11_ADD_KEY, &ndis_key,
 					le32_to_cpu(ndis_key.size));
-	devdbg(usbdev, "add_wpa_key: OID_802_11_ADD_KEY -> %08X", ret);
+	netdev_dbg(usbdev->net, "%s(): OID_802_11_ADD_KEY -> %08X\n",
+		   __func__, ret);
 	if (ret != 0)
 		return ret;
 
@@ -1401,7 +1414,7 @@ static int restore_key(struct usbnet *usbdev, int key_idx)
 
 	key = priv->encr_keys[key_idx];
 
-	devdbg(usbdev, "restore_key: %i:%i", key_idx, key.len);
+	netdev_dbg(usbdev->net, "%s(): %i:%i\n", __func__, key_idx, key.len);
 
 	if (key.len == 0)
 		return 0;
@@ -1436,8 +1449,9 @@ static int remove_key(struct usbnet *usbdev, int index, const u8 *bssid)
 
 	is_wpa = is_wpa_key(priv, index);
 
-	devdbg(usbdev, "remove_key: %i:%s:%i", index, is_wpa ? "wpa" : "wep",
-		priv->encr_keys[index].len);
+	netdev_dbg(usbdev->net, "%s(): %i:%s:%i\n",
+		   __func__, index, is_wpa ? "wpa" : "wep",
+		   priv->encr_keys[index].len);
 
 	clear_key(priv, index);
 
@@ -1464,9 +1478,9 @@ static int remove_key(struct usbnet *usbdev, int index, const u8 *bssid)
 		ret = rndis_set_oid(usbdev, OID_802_11_REMOVE_WEP, &keyindex,
 							sizeof(keyindex));
 		if (ret != 0) {
-			devwarn(usbdev,
-				"removing encryption key %d failed (%08X)",
-				index, ret);
+			netdev_warn(usbdev->net,
+				    "removing encryption key %d failed (%08X)\n",
+				    index, ret);
 			return ret;
 		}
 	}
@@ -1498,9 +1512,9 @@ static void set_multicast_list(struct usbnet *usbdev)
 		size = min(priv->multicast_size, netdev_mc_count(usbdev->net));
 		buf = kmalloc(size * ETH_ALEN, GFP_KERNEL);
 		if (!buf) {
-			devwarn(usbdev,
-				"couldn't alloc %d bytes of memory",
-				size * ETH_ALEN);
+			netdev_warn(usbdev->net,
+				    "couldn't alloc %d bytes of memory\n",
+				    size * ETH_ALEN);
 			return;
 		}
 
@@ -1520,8 +1534,8 @@ static void set_multicast_list(struct usbnet *usbdev)
 		else
 			filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST;
 
-		devdbg(usbdev, "OID_802_3_MULTICAST_LIST(%d, max: %d) -> %d",
-						i, priv->multicast_size, ret);
+		netdev_dbg(usbdev->net, "OID_802_3_MULTICAST_LIST(%d, max: %d) -> %d\n",
+			   i, priv->multicast_size, ret);
 
 		kfree(buf);
 	}
@@ -1529,12 +1543,12 @@ static void set_multicast_list(struct usbnet *usbdev)
 	ret = rndis_set_oid(usbdev, OID_GEN_CURRENT_PACKET_FILTER, &filter,
 							sizeof(filter));
 	if (ret < 0) {
-		devwarn(usbdev, "couldn't set packet filter: %08x",
-							le32_to_cpu(filter));
+		netdev_warn(usbdev->net, "couldn't set packet filter: %08x\n",
+			    le32_to_cpu(filter));
 	}
 
-	devdbg(usbdev, "OID_GEN_CURRENT_PACKET_FILTER(%08x) -> %d",
-						le32_to_cpu(filter), ret);
+	netdev_dbg(usbdev->net, "OID_GEN_CURRENT_PACKET_FILTER(%08x) -> %d\n",
+		   le32_to_cpu(filter), ret);
 }
 
 /*
@@ -1592,7 +1606,8 @@ static int rndis_set_tx_power(struct wiphy *wiphy, enum tx_power_setting type,
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
 
-	devdbg(usbdev, "rndis_set_tx_power type:0x%x dbm:%i", type, dbm);
+	netdev_dbg(usbdev->net, "%s(): type:0x%x dbm:%i\n",
+		   __func__, type, dbm);
 
 	/* Device doesn't support changing txpower after initialization, only
 	 * turn off/on radio. Support 'auto' mode and setting same dBm that is
@@ -1615,7 +1630,7 @@ static int rndis_get_tx_power(struct wiphy *wiphy, int *dbm)
 
 	*dbm = get_bcm4320_power_dbm(priv);
 
-	devdbg(usbdev, "rndis_get_tx_power dbm:%i", *dbm);
+	netdev_dbg(usbdev->net, "%s(): dbm:%i\n", __func__, *dbm);
 
 	return 0;
 }
@@ -1629,7 +1644,7 @@ static int rndis_scan(struct wiphy *wiphy, struct net_device *dev,
 	int ret;
 	__le32 tmp;
 
-	devdbg(usbdev, "cfg80211.scan");
+	netdev_dbg(usbdev->net, "cfg80211.scan\n");
 
 	/* Get current bssid list from device before new scan, as new scan
 	 * clears internal bssid list.
@@ -1669,8 +1684,8 @@ static struct cfg80211_bss *rndis_bss_info_update(struct usbnet *usbdev,
 	int ie_len, bssid_len;
 	u8 *ie;
 
-	devdbg(usbdev, " found bssid: '%.32s' [%pM]", bssid->ssid.essid,
-							bssid->mac);
+	netdev_dbg(usbdev->net, " found bssid: '%.32s' [%pM]\n",
+		   bssid->ssid.essid, bssid->mac);
 
 	/* parse bssid structure */
 	bssid_len = le32_to_cpu(bssid->length);
@@ -1712,7 +1727,7 @@ static int rndis_check_bssid_list(struct usbnet *usbdev)
 	int ret = -EINVAL, len, count, bssid_len;
 	bool resized = false;
 
-	devdbg(usbdev, "check_bssid_list");
+	netdev_dbg(usbdev->net, "check_bssid_list\n");
 
 	len = CONTROL_BUFFER_SIZE;
 resize_buf:
@@ -1736,8 +1751,8 @@ resize_buf:
 	bssid = bssid_list->bssid;
 	bssid_len = le32_to_cpu(bssid->length);
 	count = le32_to_cpu(bssid_list->num_items);
-	devdbg(usbdev, "check_bssid_list: %d BSSIDs found (buflen: %d)", count,
-									len);
+	netdev_dbg(usbdev->net, "check_bssid_list: %d BSSIDs found (buflen: %d)\n",
+		   count, len);
 
 	while (count && ((void *)bssid + bssid_len) <= (buf + len)) {
 		rndis_bss_info_update(usbdev, bssid);
@@ -1759,7 +1774,7 @@ static void rndis_get_scan_results(struct work_struct *work)
 	struct usbnet *usbdev = priv->usbdev;
 	int ret;
 
-	devdbg(usbdev, "get_scan_results");
+	netdev_dbg(usbdev->net, "get_scan_results\n");
 
 	if (!priv->scan_request)
 		return;
@@ -1793,7 +1808,7 @@ static int rndis_connect(struct wiphy *wiphy, struct net_device *dev,
 
 	if (sme->crypto.n_ciphers_pairwise > 0 &&
 			pairwise == RNDIS_WLAN_ALG_NONE) {
-		deverr(usbdev, "Unsupported pairwise cipher");
+		netdev_err(usbdev->net, "Unsupported pairwise cipher\n");
 		return -ENOTSUPP;
 	}
 
@@ -1803,28 +1818,30 @@ static int rndis_connect(struct wiphy *wiphy, struct net_device *dev,
 
 	if (sme->crypto.n_akm_suites > 0 &&
 			keymgmt == RNDIS_WLAN_KEY_MGMT_NONE) {
-		deverr(usbdev, "Invalid keymgmt");
+		netdev_err(usbdev->net, "Invalid keymgmt\n");
 		return -ENOTSUPP;
 	}
 
-	devdbg(usbdev, "cfg80211.connect('%.32s':[%pM]:%d:[%d,0x%x:0x%x]:[0x%x:"
-			"0x%x]:0x%x)", sme->ssid, sme->bssid, chan,
-			sme->privacy, sme->crypto.wpa_versions, sme->auth_type,
-			groupwise, pairwise, keymgmt);
+	netdev_dbg(usbdev->net, "cfg80211.connect('%.32s':[%pM]:%d:[%d,0x%x:0x%x]:[0x%x:0x%x]:0x%x)\n",
+		   sme->ssid, sme->bssid, chan,
+		   sme->privacy, sme->crypto.wpa_versions, sme->auth_type,
+		   groupwise, pairwise, keymgmt);
 
 	if (is_associated(usbdev))
 		disassociate(usbdev, false);
 
 	ret = set_infra_mode(usbdev, NDIS_80211_INFRA_INFRA);
 	if (ret < 0) {
-		devdbg(usbdev, "connect: set_infra_mode failed, %d", ret);
+		netdev_dbg(usbdev->net, "connect: set_infra_mode failed, %d\n",
+			   ret);
 		goto err_turn_radio_on;
 	}
 
 	ret = set_auth_mode(usbdev, sme->crypto.wpa_versions, sme->auth_type,
 								keymgmt);
 	if (ret < 0) {
-		devdbg(usbdev, "connect: set_auth_mode failed, %d", ret);
+		netdev_dbg(usbdev->net, "connect: set_auth_mode failed, %d\n",
+			   ret);
 		goto err_turn_radio_on;
 	}
 
@@ -1832,14 +1849,16 @@ static int rndis_connect(struct wiphy *wiphy, struct net_device *dev,
 
 	ret = set_encr_mode(usbdev, pairwise, groupwise);
 	if (ret < 0) {
-		devdbg(usbdev, "connect: set_encr_mode failed, %d", ret);
+		netdev_dbg(usbdev->net, "connect: set_encr_mode failed, %d\n",
+			   ret);
 		goto err_turn_radio_on;
 	}
 
 	if (channel) {
 		ret = set_channel(usbdev, chan);
 		if (ret < 0) {
-			devdbg(usbdev, "connect: set_channel failed, %d", ret);
+			netdev_dbg(usbdev->net, "connect: set_channel failed, %d\n",
+				   ret);
 			goto err_turn_radio_on;
 		}
 	}
@@ -1848,8 +1867,8 @@ static int rndis_connect(struct wiphy *wiphy, struct net_device *dev,
 		priv->encr_tx_key_index = sme->key_idx;
 		ret = add_wep_key(usbdev, sme->key, sme->key_len, sme->key_idx);
 		if (ret < 0) {
-			devdbg(usbdev, "connect: add_wep_key failed, %d "
-				"(%d, %d)", ret, sme->key_len, sme->key_idx);
+			netdev_dbg(usbdev->net, "connect: add_wep_key failed, %d (%d, %d)\n",
+				   ret, sme->key_len, sme->key_idx);
 			goto err_turn_radio_on;
 		}
 	}
@@ -1858,7 +1877,8 @@ static int rndis_connect(struct wiphy *wiphy, struct net_device *dev,
 				!is_broadcast_ether_addr(sme->bssid)) {
 		ret = set_bssid(usbdev, sme->bssid);
 		if (ret < 0) {
-			devdbg(usbdev, "connect: set_bssid failed, %d", ret);
+			netdev_dbg(usbdev->net, "connect: set_bssid failed, %d\n",
+				   ret);
 			goto err_turn_radio_on;
 		}
 	} else
@@ -1880,7 +1900,7 @@ static int rndis_connect(struct wiphy *wiphy, struct net_device *dev,
 
 	ret = set_essid(usbdev, &ssid);
 	if (ret < 0)
-		devdbg(usbdev, "connect: set_essid failed, %d", ret);
+		netdev_dbg(usbdev->net, "connect: set_essid failed, %d\n", ret);
 	return ret;
 
 err_turn_radio_on:
@@ -1895,7 +1915,7 @@ static int rndis_disconnect(struct wiphy *wiphy, struct net_device *dev,
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
 
-	devdbg(usbdev, "cfg80211.disconnect(%d)", reason_code);
+	netdev_dbg(usbdev->net, "cfg80211.disconnect(%d)\n", reason_code);
 
 	priv->connected = false;
 	memset(priv->bssid, 0, ETH_ALEN);
@@ -1929,21 +1949,23 @@ static int rndis_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 		alg = RNDIS_WLAN_ALG_NONE;
 	}
 
-	devdbg(usbdev, "cfg80211.join_ibss('%.32s':[%pM]:%d:%d)", params->ssid,
-					params->bssid, chan, params->privacy);
+	netdev_dbg(usbdev->net, "cfg80211.join_ibss('%.32s':[%pM]:%d:%d)\n",
+		   params->ssid, params->bssid, chan, params->privacy);
 
 	if (is_associated(usbdev))
 		disassociate(usbdev, false);
 
 	ret = set_infra_mode(usbdev, NDIS_80211_INFRA_ADHOC);
 	if (ret < 0) {
-		devdbg(usbdev, "join_ibss: set_infra_mode failed, %d", ret);
+		netdev_dbg(usbdev->net, "join_ibss: set_infra_mode failed, %d\n",
+			   ret);
 		goto err_turn_radio_on;
 	}
 
 	ret = set_auth_mode(usbdev, 0, auth_type, RNDIS_WLAN_KEY_MGMT_NONE);
 	if (ret < 0) {
-		devdbg(usbdev, "join_ibss: set_auth_mode failed, %d", ret);
+		netdev_dbg(usbdev->net, "join_ibss: set_auth_mode failed, %d\n",
+			   ret);
 		goto err_turn_radio_on;
 	}
 
@@ -1951,15 +1973,16 @@ static int rndis_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 
 	ret = set_encr_mode(usbdev, alg, RNDIS_WLAN_ALG_NONE);
 	if (ret < 0) {
-		devdbg(usbdev, "join_ibss: set_encr_mode failed, %d", ret);
+		netdev_dbg(usbdev->net, "join_ibss: set_encr_mode failed, %d\n",
+			   ret);
 		goto err_turn_radio_on;
 	}
 
 	if (channel) {
 		ret = set_channel(usbdev, chan);
 		if (ret < 0) {
-			devdbg(usbdev, "join_ibss: set_channel failed, %d",
-				ret);
+			netdev_dbg(usbdev->net, "join_ibss: set_channel failed, %d\n",
+				   ret);
 			goto err_turn_radio_on;
 		}
 	}
@@ -1968,7 +1991,8 @@ static int rndis_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 				!is_broadcast_ether_addr(params->bssid)) {
 		ret = set_bssid(usbdev, params->bssid);
 		if (ret < 0) {
-			devdbg(usbdev, "join_ibss: set_bssid failed, %d", ret);
+			netdev_dbg(usbdev->net, "join_ibss: set_bssid failed, %d\n",
+				   ret);
 			goto err_turn_radio_on;
 		}
 	} else
@@ -1988,7 +2012,8 @@ static int rndis_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 
 	ret = set_essid(usbdev, &ssid);
 	if (ret < 0)
-		devdbg(usbdev, "join_ibss: set_essid failed, %d", ret);
+		netdev_dbg(usbdev->net, "join_ibss: set_essid failed, %d\n",
+			   ret);
 	return ret;
 
 err_turn_radio_on:
@@ -2002,7 +2027,7 @@ static int rndis_leave_ibss(struct wiphy *wiphy, struct net_device *dev)
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
 
-	devdbg(usbdev, "cfg80211.leave_ibss()");
+	netdev_dbg(usbdev->net, "cfg80211.leave_ibss()\n");
 
 	priv->connected = false;
 	memset(priv->bssid, 0, ETH_ALEN);
@@ -2028,8 +2053,8 @@ static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev,
 	struct usbnet *usbdev = priv->usbdev;
 	__le32 flags;
 
-	devdbg(usbdev, "rndis_add_key(%i, %pM, %08x)", key_index, mac_addr,
-							params->cipher);
+	netdev_dbg(usbdev->net, "%s(%i, %pM, %08x)\n",
+		   __func__, key_index, mac_addr, params->cipher);
 
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
@@ -2050,8 +2075,8 @@ static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev,
 				key_index, mac_addr, params->seq,
 				params->seq_len, params->cipher, flags);
 	default:
-		devdbg(usbdev, "rndis_add_key: unsupported cipher %08x",
-							params->cipher);
+		netdev_dbg(usbdev->net, "%s(): unsupported cipher %08x\n",
+			   __func__, params->cipher);
 		return -ENOTSUPP;
 	}
 }
@@ -2062,7 +2087,7 @@ static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev,
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
 
-	devdbg(usbdev, "rndis_del_key(%i, %pM)", key_index, mac_addr);
+	netdev_dbg(usbdev->net, "%s(%i, %pM)\n", __func__, key_index, mac_addr);
 
 	return remove_key(usbdev, key_index, mac_addr);
 }
@@ -2074,7 +2099,7 @@ static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev,
 	struct usbnet *usbdev = priv->usbdev;
 	struct rndis_wlan_encr_key key;
 
-	devdbg(usbdev, "rndis_set_default_key(%i)", key_index);
+	netdev_dbg(usbdev->net, "%s(%i)\n", __func__, key_index);
 
 	priv->encr_tx_key_index = key_index;
 
@@ -2188,7 +2213,8 @@ static void rndis_wlan_do_link_up_work(struct usbnet *usbdev)
 	if (ret < 0)
 		memset(bssid, 0, sizeof(bssid));
 
-	devdbg(usbdev, "link up work: [%pM] %s", bssid, roamed ? "roamed" : "");
+	netdev_dbg(usbdev->net, "link up work: [%pM]%s\n",
+		   bssid, roamed ? " roamed" : "");
 
 	/* Internal bss list in device always contains at least the currently
 	 * connected bss and we can get it to cfg80211 with
@@ -2270,8 +2296,8 @@ static void rndis_wlan_auth_indication(struct usbnet *usbdev,
 	/* must have at least one array entry */
 	if (len < offsetof(struct ndis_80211_status_indication, u) +
 				sizeof(struct ndis_80211_auth_request)) {
-		devinfo(usbdev, "authentication indication: "
-				"too short message (%i)", len);
+		netdev_info(usbdev->net, "authentication indication: too short message (%i)\n",
+			    len);
 		return;
 	}
 
@@ -2298,8 +2324,8 @@ static void rndis_wlan_auth_indication(struct usbnet *usbdev,
 			type = "group_error";
 		}
 
-		devinfo(usbdev, "authentication indication: %s (0x%08x)", type,
-				le32_to_cpu(auth_req->flags));
+		netdev_info(usbdev->net, "authentication indication: %s (0x%08x)\n",
+			    type, le32_to_cpu(auth_req->flags));
 
 		if (pairwise_error) {
 			key_type = NL80211_KEYTYPE_PAIRWISE;
@@ -2335,8 +2361,8 @@ static void rndis_wlan_pmkid_cand_list_indication(struct usbnet *usbdev,
 
 	if (len < offsetof(struct ndis_80211_status_indication, u) +
 				sizeof(struct ndis_80211_pmkid_cand_list)) {
-		devinfo(usbdev, "pmkid candidate list indication: "
-				"too short message (%i)", len);
+		netdev_info(usbdev->net, "pmkid candidate list indication: too short message (%i)\n",
+			    len);
 		return;
 	}
 
@@ -2346,18 +2372,16 @@ static void rndis_wlan_pmkid_cand_list_indication(struct usbnet *usbdev,
 			offsetof(struct ndis_80211_status_indication, u);
 
 	if (len < expected_len) {
-		devinfo(usbdev, "pmkid candidate list indication: "
-				"list larger than buffer (%i < %i)",
-				len, expected_len);
+		netdev_info(usbdev->net, "pmkid candidate list indication: list larger than buffer (%i < %i)\n",
+			    len, expected_len);
 		return;
 	}
 
 	cand_list = &indication->u.cand_list;
 
-	devinfo(usbdev, "pmkid candidate list indication: "
-			"version %i, candidates %i",
-			le32_to_cpu(cand_list->version),
-			le32_to_cpu(cand_list->num_candidates));
+	netdev_info(usbdev->net, "pmkid candidate list indication: version %i, candidates %i\n",
+		    le32_to_cpu(cand_list->version),
+		    le32_to_cpu(cand_list->num_candidates));
 
 	if (le32_to_cpu(cand_list->version) != 1)
 		return;
@@ -2366,8 +2390,8 @@ static void rndis_wlan_pmkid_cand_list_indication(struct usbnet *usbdev,
 		struct ndis_80211_pmkid_candidate *cand =
 						&cand_list->candidate_list[i];
 
-		devdbg(usbdev, "cand[%i]: flags: 0x%08x, bssid: %pM",
-				i, le32_to_cpu(cand->flags), cand->bssid);
+		netdev_dbg(usbdev->net, "cand[%i]: flags: 0x%08x, bssid: %pM\n",
+			   i, le32_to_cpu(cand->flags), cand->bssid);
 
 #if 0
 		struct iw_pmkid_cand pcand;
@@ -2398,15 +2422,14 @@ static void rndis_wlan_media_specific_indication(struct usbnet *usbdev,
 	len = le32_to_cpu(msg->length);
 
 	if (len < 8) {
-		devinfo(usbdev, "media specific indication, "
-				"ignore too short message (%i < 8)", len);
+		netdev_info(usbdev->net, "media specific indication, ignore too short message (%i < 8)\n",
+			    len);
 		return;
 	}
 
 	if (offset + len > buflen) {
-		devinfo(usbdev, "media specific indication, "
-				"too large to fit to buffer (%i > %i)",
-				offset + len, buflen);
+		netdev_info(usbdev->net, "media specific indication, too large to fit to buffer (%i > %i)\n",
+			    offset + len, buflen);
 		return;
 	}
 
@@ -2414,13 +2437,13 @@ static void rndis_wlan_media_specific_indication(struct usbnet *usbdev,
 
 	switch (le32_to_cpu(indication->status_type)) {
 	case NDIS_80211_STATUSTYPE_RADIOSTATE:
-		devinfo(usbdev, "radio state indication: %i",
-			le32_to_cpu(indication->u.radio_status));
+		netdev_info(usbdev->net, "radio state indication: %i\n",
+			    le32_to_cpu(indication->u.radio_status));
 		return;
 
 	case NDIS_80211_STATUSTYPE_MEDIASTREAMMODE:
-		devinfo(usbdev, "media stream mode indication: %i",
-			le32_to_cpu(indication->u.media_stream_mode));
+		netdev_info(usbdev->net, "media stream mode indication: %i\n",
+			    le32_to_cpu(indication->u.media_stream_mode));
 		return;
 
 	case NDIS_80211_STATUSTYPE_AUTHENTICATION:
@@ -2432,9 +2455,8 @@ static void rndis_wlan_media_specific_indication(struct usbnet *usbdev,
 		return;
 
 	default:
-		devinfo(usbdev, "media specific indication: "
-				"unknown status type 0x%08x",
-				le32_to_cpu(indication->status_type));
+		netdev_info(usbdev->net, "media specific indication: unknown status type 0x%08x\n",
+			    le32_to_cpu(indication->status_type));
 	}
 }
 
@@ -2451,14 +2473,13 @@ static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen)
 			 * and userspace to think that device is
 			 * roaming/reassociating when it isn't.
 			 */
-			devdbg(usbdev, "ignored OID_802_11_ADD_KEY triggered "
-					"'media connect'");
+			netdev_dbg(usbdev->net, "ignored OID_802_11_ADD_KEY triggered 'media connect'\n");
 			return;
 		}
 
 		usbnet_pause_rx(usbdev);
 
-		devinfo(usbdev, "media connect");
+		netdev_info(usbdev->net, "media connect\n");
 
 		/* queue work to avoid recursive calls into rndis_command */
 		set_bit(WORK_LINK_UP, &priv->work_pending);
@@ -2466,7 +2487,7 @@ static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen)
 		break;
 
 	case RNDIS_STATUS_MEDIA_DISCONNECT:
-		devinfo(usbdev, "media disconnect");
+		netdev_info(usbdev->net, "media disconnect\n");
 
 		/* queue work to avoid recursive calls into rndis_command */
 		set_bit(WORK_LINK_DOWN, &priv->work_pending);
@@ -2478,8 +2499,8 @@ static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen)
 		break;
 
 	default:
-		devinfo(usbdev, "indication: 0x%08x",
-				le32_to_cpu(msg->status));
+		netdev_info(usbdev->net, "indication: 0x%08x\n",
+			    le32_to_cpu(msg->status));
 		break;
 	}
 }
@@ -2544,8 +2565,8 @@ static void rndis_device_poller(struct work_struct *work)
 	if (ret == 0)
 		priv->last_qual = level_to_qual(le32_to_cpu(rssi));
 
-	devdbg(usbdev, "dev-poller: OID_802_11_RSSI -> %d, rssi:%d, qual: %d",
-		ret, le32_to_cpu(rssi), level_to_qual(le32_to_cpu(rssi)));
+	netdev_dbg(usbdev->net, "dev-poller: OID_802_11_RSSI -> %d, rssi:%d, qual: %d\n",
+		   ret, le32_to_cpu(rssi), level_to_qual(le32_to_cpu(rssi)));
 
 	/* Workaround transfer stalls on poor quality links.
 	 * TODO: find right way to fix these stalls (as stalls do not happen
@@ -2838,11 +2859,11 @@ static int rndis_wlan_reset(struct usbnet *usbdev)
 	struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev);
 	int retval;
 
-	devdbg(usbdev, "rndis_wlan_reset");
+	netdev_dbg(usbdev->net, "%s()\n", __func__);
 
 	retval = rndis_reset(usbdev);
 	if (retval)
-		devwarn(usbdev, "rndis_reset() failed: %d", retval);
+		netdev_warn(usbdev->net, "rndis_reset failed: %d\n", retval);
 
 	/* rndis_reset cleared multicast list, so restore here.
 	   (set_multicast_list() also turns on current packet filter) */
@@ -2860,7 +2881,7 @@ static int rndis_wlan_stop(struct usbnet *usbdev)
 	int retval;
 	__le32 filter;
 
-	devdbg(usbdev, "rndis_wlan_stop");
+	netdev_dbg(usbdev->net, "%s()\n", __func__);
 
 	retval = disassociate(usbdev, false);
 
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 8ce61359bf73..df1e83dd9a54 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -214,25 +214,4 @@ extern void usbnet_set_msglevel (struct net_device *, u32);
 extern void usbnet_get_drvinfo (struct net_device *, struct ethtool_drvinfo *);
 extern int usbnet_nway_reset(struct net_device *net);
 
-/* messaging support includes the interface name, so it must not be
- * used before it has one ... notably, in minidriver bind() calls.
- */
-#ifdef DEBUG
-#define devdbg(usbnet, fmt, arg...) \
-	printk(KERN_DEBUG "%s: " fmt "\n" , (usbnet)->net->name , ## arg)
-#else
-#define devdbg(usbnet, fmt, arg...) \
-	({ if (0) printk(KERN_DEBUG "%s: " fmt "\n" , (usbnet)->net->name , \
-		## arg); 0; })
-#endif
-
-#define deverr(usbnet, fmt, arg...) \
-	printk(KERN_ERR "%s: " fmt "\n" , (usbnet)->net->name , ## arg)
-#define devwarn(usbnet, fmt, arg...) \
-	printk(KERN_WARNING "%s: " fmt "\n" , (usbnet)->net->name , ## arg)
-
-#define devinfo(usbnet, fmt, arg...) \
-	printk(KERN_INFO "%s: " fmt "\n" , (usbnet)->net->name , ## arg); \
-
-
 #endif /* __LINUX_USB_USBNET_H */
-- 
cgit v1.2.3


From 99405162598176e830d17ae6d4f3d9e070ad900c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 14 Feb 2010 01:01:10 +0000
Subject: tun: socket filter support

This patch adds Linux Socket Filter support to
tun driver.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 26 ++++++++++++++++++++++++++
 include/linux/if_tun.h |  3 +++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5adb3d150552..ce1efa4c0b0d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -61,6 +61,7 @@
 #include <linux/crc32.h>
 #include <linux/nsproxy.h>
 #include <linux/virtio_net.h>
+#include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
@@ -366,6 +367,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!check_filter(&tun->txflt, skb))
 		goto drop;
 
+	if (tun->socket.sk->sk_filter &&
+	    sk_filter(tun->socket.sk, skb))
+		goto drop;
+
 	if (skb_queue_len(&tun->socket.sk->sk_receive_queue) >= dev->tx_queue_len) {
 		if (!(tun->flags & TUN_ONE_QUEUE)) {
 			/* Normal queueing mode. */
@@ -1162,6 +1167,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
+	struct sock_fprog fprog;
 	struct ifreq ifr;
 	int sndbuf;
 	int ret;
@@ -1309,6 +1315,26 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		tun->socket.sk->sk_sndbuf = sndbuf;
 		break;
 
+	case TUNATTACHFILTER:
+		/* Can be set only for TAPs */
+		ret = -EINVAL;
+		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+			break;
+		ret = -EFAULT;
+		if (copy_from_user(&fprog, argp, sizeof(fprog)))
+			break;
+
+		ret = sk_attach_filter(&fprog, tun->socket.sk);
+		break;
+
+	case TUNDETACHFILTER:
+		/* Can be set only for TAPs */
+		ret = -EINVAL;
+		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+			break;
+		ret = sk_detach_filter(tun->socket.sk);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 404abe00162c..1350a246893a 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -18,6 +18,7 @@
 
 #include <linux/types.h>
 #include <linux/if_ether.h>
+#include <linux/filter.h>
 
 /* Read queue size */
 #define TUN_READQ_SIZE	500
@@ -48,6 +49,8 @@
 #define TUNGETIFF      _IOR('T', 210, unsigned int)
 #define TUNGETSNDBUF   _IOR('T', 211, int)
 #define TUNSETSNDBUF   _IOW('T', 212, int)
+#define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog)
+#define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
cgit v1.2.3


From 6a443a0f72ad7706345412dbd2e4d4981fdfce39 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Wed, 17 Feb 2010 15:17:04 +0000
Subject: tg3: Push phylib definitions to phylib

This patch pushes phylib definitions out to phylib headers.  For phy
IDs, this removes some code duplication.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c |  5 +----
 drivers/net/tg3.c          | 32 ++++++++++++++++----------------
 drivers/net/tg3.h          | 14 +++-----------
 include/linux/brcmphy.h    | 11 +++++++++++
 4 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 33c4b12a63ba..f482fc4f8cf1 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -18,9 +18,6 @@
 #include <linux/phy.h>
 #include <linux/brcmphy.h>
 
-#define PHY_ID_BCM50610		0x0143bd60
-#define PHY_ID_BCM50610M	0x0143bd70
-#define PHY_ID_BCM57780		0x03625d90
 
 #define BRCM_PHY_MODEL(phydev) \
 	((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask)
@@ -823,7 +820,7 @@ static struct phy_driver bcm57780_driver = {
 };
 
 static struct phy_driver bcmac131_driver = {
-	.phy_id		= 0x0143bc70,
+	.phy_id		= PHY_ID_BCMAC131,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCMAC131",
 	.features	= PHY_BASIC_FEATURES |
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 965efa92b020..994bac0b018b 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -955,17 +955,17 @@ static void tg3_mdio_config_5785(struct tg3 *tp)
 
 	phydev = tp->mdio_bus->phy_map[TG3_PHY_MII_ADDR];
 	switch (phydev->drv->phy_id & phydev->drv->phy_id_mask) {
-	case TG3_PHY_ID_BCM50610:
-	case TG3_PHY_ID_BCM50610M:
+	case PHY_ID_BCM50610:
+	case PHY_ID_BCM50610M:
 		val = MAC_PHYCFG2_50610_LED_MODES;
 		break;
-	case TG3_PHY_ID_BCMAC131:
+	case PHY_ID_BCMAC131:
 		val = MAC_PHYCFG2_AC131_LED_MODES;
 		break;
-	case TG3_PHY_ID_RTL8211C:
+	case PHY_ID_RTL8211C:
 		val = MAC_PHYCFG2_RTL8211C_LED_MODES;
 		break;
-	case TG3_PHY_ID_RTL8201E:
+	case PHY_ID_RTL8201E:
 		val = MAC_PHYCFG2_RTL8201E_LED_MODES;
 		break;
 	default:
@@ -1115,12 +1115,12 @@ static int tg3_mdio_init(struct tg3 *tp)
 	}
 
 	switch (phydev->drv->phy_id & phydev->drv->phy_id_mask) {
-	case TG3_PHY_ID_BCM57780:
+	case PHY_ID_BCM57780:
 		phydev->interface = PHY_INTERFACE_MODE_GMII;
 		phydev->dev_flags |= PHY_BRCM_AUTO_PWRDWN_ENABLE;
 		break;
-	case TG3_PHY_ID_BCM50610:
-	case TG3_PHY_ID_BCM50610M:
+	case PHY_ID_BCM50610:
+	case PHY_ID_BCM50610M:
 		phydev->dev_flags |= PHY_BRCM_CLEAR_RGMII_MODE |
 				     PHY_BRCM_RX_REFCLK_UNUSED |
 				     PHY_BRCM_DIS_TXCRXC_NOENRGY |
@@ -1132,11 +1132,11 @@ static int tg3_mdio_init(struct tg3 *tp)
 		if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_TX_EN)
 			phydev->dev_flags |= PHY_BRCM_EXT_IBND_TX_ENABLE;
 		/* fallthru */
-	case TG3_PHY_ID_RTL8211C:
+	case PHY_ID_RTL8211C:
 		phydev->interface = PHY_INTERFACE_MODE_RGMII;
 		break;
-	case TG3_PHY_ID_RTL8201E:
-	case TG3_PHY_ID_BCMAC131:
+	case PHY_ID_RTL8201E:
+	case PHY_ID_BCMAC131:
 		phydev->interface = PHY_INTERFACE_MODE_MII;
 		phydev->dev_flags |= PHY_BRCM_AUTO_PWRDWN_ENABLE;
 		tp->tg3_flags3 |= TG3_FLG3_PHY_IS_FET;
@@ -2563,11 +2563,11 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state)
 			phy_start_aneg(phydev);
 
 			phyid = phydev->drv->phy_id & phydev->drv->phy_id_mask;
-			if (phyid != TG3_PHY_ID_BCMAC131) {
-				phyid &= TG3_PHY_OUI_MASK;
-				if (phyid == TG3_PHY_OUI_1 ||
-				    phyid == TG3_PHY_OUI_2 ||
-				    phyid == TG3_PHY_OUI_3)
+			if (phyid != PHY_ID_BCMAC131) {
+				phyid &= PHY_BCM_OUI_MASK;
+				if (phyid == PHY_BCM_OUI_1 ||
+				    phyid == PHY_BCM_OUI_2 ||
+				    phyid == PHY_BCM_OUI_3)
 					do_low_power = true;
 			}
 		}
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 800dec463e98..574a1cc4d353 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2943,22 +2943,14 @@ struct tg3 {
 #define TG3_PHY_ID_BCM57765		0x5c0d8a40
 #define TG3_PHY_ID_BCM5906		0xdc00ac40
 #define TG3_PHY_ID_BCM8002		0x60010140
-#define TG3_PHY_ID_BCM50610		0x0143bd60
-#define TG3_PHY_ID_BCM50610M		0x0143bd70
-#define TG3_PHY_ID_BCMAC131		0x0143bc70
-#define TG3_PHY_ID_RTL8211C		0x001cc910
-#define TG3_PHY_ID_RTL8201E		0x00008200
-#define TG3_PHY_ID_BCM57780		0x03625d90
 #define TG3_PHY_ID_INVALID		0xffffffff
 
+#define PHY_ID_RTL8211C			0x001cc910
+#define PHY_ID_RTL8201E			0x00008200
+
 #define TG3_PHY_ID_REV_MASK		0x0000000f
 #define TG3_PHY_REV_BCM5401_B0		0x1
 
-#define TG3_PHY_OUI_MASK		0xfffffc00
-#define TG3_PHY_OUI_1			0x00206000
-#define TG3_PHY_OUI_2			0x0143bc00
-#define TG3_PHY_OUI_3			0x03625c00
-
 	/* This macro assumes the passed PHY ID is
 	 * already masked with TG3_PHY_ID_MASK.
 	 */
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 2b31b91f5871..7f437ca1ed44 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -1,3 +1,14 @@
+#define PHY_ID_BCM50610			0x0143bd60
+#define PHY_ID_BCM50610M		0x0143bd70
+#define PHY_ID_BCMAC131			0x0143bc70
+#define PHY_ID_BCM57780			0x03625d90
+
+#define PHY_BCM_OUI_MASK		0xfffffc00
+#define PHY_BCM_OUI_1			0x00206000
+#define PHY_BCM_OUI_2			0x0143bc00
+#define PHY_BCM_OUI_3			0x03625c00
+
+
 #define PHY_BCM_FLAGS_MODE_COPPER	0x00000001
 #define PHY_BCM_FLAGS_MODE_1000BX	0x00000002
 #define PHY_BCM_FLAGS_INTF_SGMII	0x00000010
-- 
cgit v1.2.3


From 3b77fd8ee6a8ae34e349651e9d5f5000d1cc206e Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 17 Feb 2010 12:21:45 -0800
Subject: Input: add KEY_RFKILL

Most laptops have keys that are intended to toggle all device state, not
just wifi. These are currently generally mapped to KEY_WLAN. As a result,
rfkill will only kill or enable wifi in response to the key press. This
confuses users and can make it difficult for them to enable bluetooth
and wwan devices.

This patch adds a new keycode, KEY_RFKILL. It indicates that the system
should toggle the state of all rfkillable devices.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 735ceaf1bc2d..663208afb64c 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -376,6 +376,7 @@ struct input_absinfo {
 #define KEY_DISPLAY_OFF		245	/* display device to off state */
 
 #define KEY_WIMAX		246
+#define KEY_RFKILL		247	/* Key that controls all radios */
 
 /* Range 248 - 255 is reserved for special needs of AT keyboard driver */
 
-- 
cgit v1.2.3


From 501c774cb13c3ef8fb7fc5f08fa19473f7d9a0db Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 18 Feb 2010 05:46:50 +0000
Subject: net/macvtap: add vhost support

This adds support for passing a macvtap file descriptor into
vhost-net, much like we already do for tun/tap.

Most of the new code is taken from the respective patch
in the tun driver and may get consolidated in the future.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c      | 98 ++++++++++++++++++++++++++++++++++++----------
 drivers/vhost/Kconfig      |  2 +-
 drivers/vhost/net.c        |  8 +++-
 include/linux/if_macvlan.h | 13 ++++++
 4 files changed, 97 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 705099749766..e354501ab297 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -58,6 +58,8 @@ static unsigned int macvtap_major;
 static struct class *macvtap_class;
 static struct cdev macvtap_cdev;
 
+static const struct proto_ops macvtap_socket_ops;
+
 /*
  * RCU usage:
  * The macvtap_queue and the macvlan_dev are loosely coupled, the
@@ -176,7 +178,7 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
 		return -ENOLINK;
 
 	skb_queue_tail(&q->sk.sk_receive_queue, skb);
-	wake_up(q->sk.sk_sleep);
+	wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND);
 	return 0;
 }
 
@@ -242,7 +244,7 @@ static void macvtap_sock_write_space(struct sock *sk)
 		return;
 
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_sync(sk->sk_sleep);
+		wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
@@ -270,6 +272,8 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	init_waitqueue_head(&q->sock.wait);
 	q->sock.type = SOCK_RAW;
 	q->sock.state = SS_CONNECTED;
+	q->sock.file = file;
+	q->sock.ops = &macvtap_socket_ops;
 	sock_init_data(&q->sock, &q->sk);
 	q->sk.sk_write_space = macvtap_sock_write_space;
 
@@ -387,32 +391,20 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
 
 	rcu_read_lock_bh();
 	vlan = rcu_dereference(q->vlan);
-	macvlan_count_rx(vlan, len, ret == 0, 0);
+	if (vlan)
+		macvlan_count_rx(vlan, len, ret == 0, 0);
 	rcu_read_unlock_bh();
 
 	return ret ? ret : len;
 }
 
-static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
-				unsigned long count, loff_t pos)
+static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
+			       const struct iovec *iv, unsigned long len,
+			       int noblock)
 {
-	struct file *file = iocb->ki_filp;
-	struct macvtap_queue *q = file->private_data;
-
 	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
-	ssize_t len, ret = 0;
-
-	if (!q) {
-		ret = -ENOLINK;
-		goto out;
-	}
-
-	len = iov_length(iv, count);
-	if (len < 0) {
-		ret = -EINVAL;
-		goto out;
-	}
+	ssize_t ret = 0;
 
 	add_wait_queue(q->sk.sk_sleep, &wait);
 	while (len) {
@@ -421,7 +413,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
 		/* Read frames from the queue */
 		skb = skb_dequeue(&q->sk.sk_receive_queue);
 		if (!skb) {
-			if (file->f_flags & O_NONBLOCK) {
+			if (noblock) {
 				ret = -EAGAIN;
 				break;
 			}
@@ -440,7 +432,24 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
 
 	current->state = TASK_RUNNING;
 	remove_wait_queue(q->sk.sk_sleep, &wait);
+	return ret;
+}
+
+static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
+				unsigned long count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct macvtap_queue *q = file->private_data;
+	ssize_t len, ret = 0;
 
+	len = iov_length(iv, count);
+	if (len < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
+	ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
 out:
 	return ret;
 }
@@ -538,6 +547,53 @@ static const struct file_operations macvtap_fops = {
 #endif
 };
 
+static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
+			   struct msghdr *m, size_t total_len)
+{
+	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
+	return macvtap_get_user(q, m->msg_iov, total_len,
+			    m->msg_flags & MSG_DONTWAIT);
+}
+
+static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
+			   struct msghdr *m, size_t total_len,
+			   int flags)
+{
+	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
+	int ret;
+	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+		return -EINVAL;
+	ret = macvtap_do_read(q, iocb, m->msg_iov, total_len,
+			  flags & MSG_DONTWAIT);
+	if (ret > total_len) {
+		m->msg_flags |= MSG_TRUNC;
+		ret = flags & MSG_TRUNC ? ret : total_len;
+	}
+	return ret;
+}
+
+/* Ops structure to mimic raw sockets with tun */
+static const struct proto_ops macvtap_socket_ops = {
+	.sendmsg = macvtap_sendmsg,
+	.recvmsg = macvtap_recvmsg,
+};
+
+/* Get an underlying socket object from tun file.  Returns error unless file is
+ * attached to a device.  The returned object works like a packet socket, it
+ * can be used for sock_sendmsg/sock_recvmsg.  The caller is responsible for
+ * holding a reference to the file for as long as the socket is in use. */
+struct socket *macvtap_get_socket(struct file *file)
+{
+	struct macvtap_queue *q;
+	if (file->f_op != &macvtap_fops)
+		return ERR_PTR(-EINVAL);
+	q = file->private_data;
+	if (!q)
+		return ERR_PTR(-EBADFD);
+	return &q->sock;
+}
+EXPORT_SYMBOL_GPL(macvtap_get_socket);
+
 static int macvtap_init(void)
 {
 	int err;
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 9e9355367bb3..e4e2fd1b5107 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -1,6 +1,6 @@
 config VHOST_NET
 	tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)"
-	depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL
+	depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) && EXPERIMENTAL
 	---help---
 	  This kernel module can be loaded in host kernel to accelerate
 	  guest networking with virtio_net. Not to be confused with virtio_net
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 4c8928319e1d..91a324cc2298 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -22,6 +22,7 @@
 #include <linux/if_packet.h>
 #include <linux/if_arp.h>
 #include <linux/if_tun.h>
+#include <linux/if_macvlan.h>
 
 #include <net/sock.h>
 
@@ -452,13 +453,16 @@ err:
 	return ERR_PTR(r);
 }
 
-static struct socket *get_tun_socket(int fd)
+static struct socket *get_tap_socket(int fd)
 {
 	struct file *file = fget(fd);
 	struct socket *sock;
 	if (!file)
 		return ERR_PTR(-EBADF);
 	sock = tun_get_socket(file);
+	if (!IS_ERR(sock))
+		return sock;
+	sock = macvtap_get_socket(file);
 	if (IS_ERR(sock))
 		fput(file);
 	return sock;
@@ -473,7 +477,7 @@ static struct socket *get_socket(int fd)
 	sock = get_raw_socket(fd);
 	if (!IS_ERR(sock))
 		return sock;
-	sock = get_tun_socket(fd);
+	sock = get_tap_socket(fd);
 	if (!IS_ERR(sock))
 		return sock;
 	return ERR_PTR(-ENOTSOCK);
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index f9cb9ba1475d..b78a712247da 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -7,6 +7,19 @@
 #include <linux/netlink.h>
 #include <net/netlink.h>
 
+#if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE)
+struct socket *macvtap_get_socket(struct file *);
+#else
+#include <linux/err.h>
+#include <linux/errno.h>
+struct file;
+struct socket;
+static inline struct socket *macvtap_get_socket(struct file *f)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif /* CONFIG_MACVTAP */
+
 struct macvlan_port;
 struct macvtap_queue;
 
-- 
cgit v1.2.3


From 3ffe533c87281b68d469b279ff3a5056f9c75862 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 18 Feb 2010 08:25:24 +0000
Subject: ipv6: drop unused "dev" arg of icmpv6_send()

Dunno, what was the idea, it wasn't used for a long time.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c |  2 +-
 include/linux/icmpv6.h                  |  3 +--
 net/ipv4/ip_gre.c                       |  2 +-
 net/ipv6/exthdrs.c                      |  2 +-
 net/ipv6/icmp.c                         |  5 ++---
 net/ipv6/ip6_input.c                    |  3 +--
 net/ipv6/ip6_output.c                   | 11 +++++------
 net/ipv6/ip6_tunnel.c                   |  6 +++---
 net/ipv6/mip6.c                         |  2 +-
 net/ipv6/netfilter/ip6t_REJECT.c        |  2 +-
 net/ipv6/reassembly.c                   |  2 +-
 net/ipv6/route.c                        |  4 ++--
 net/ipv6/sit.c                          |  2 +-
 net/ipv6/tunnel6.c                      |  4 ++--
 net/ipv6/udp.c                          |  5 ++---
 net/ipv6/xfrm6_output.c                 |  2 +-
 net/netfilter/ipvs/ip_vs_core.c         |  5 ++---
 net/netfilter/ipvs/ip_vs_xmit.c         | 10 +++++-----
 18 files changed, 33 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 30bdf427ee6d..83a7751c38d6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1374,7 +1374,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (skb->protocol == htons(ETH_P_IPV6))
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 #endif
 		dev_kfree_skb_any(skb);
 
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index c0d8357917e2..4c4c74ec5987 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -174,8 +174,7 @@ struct icmp6_filter {
 
 extern void				icmpv6_send(struct sk_buff *skb,
 						    u8 type, u8 code,
-						    __u32 info, 
-						    struct net_device *dev);
+						    __u32 info);
 
 extern int				icmpv6_init(void);
 extern int				icmpv6_err_convert(u8 type, u8 code,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a2a5983dbf03..c0c5274d0271 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -793,7 +793,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		}
 
 		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			ip_rt_put(rt);
 			goto tx_error;
 		}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4bac362b1335..074f2c084f9f 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -481,7 +481,7 @@ looped_back:
 			IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
-				    0, skb->dev);
+				    0);
 			kfree_skb(skb);
 			return -1;
 		}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9a37379d741a..eb9abe24bdf0 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -114,7 +114,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
  */
 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
-	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
 	kfree_skb(skb);
 }
 
@@ -300,8 +300,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {}
 /*
  *	Send an ICMP message in response to a packet in error
  */
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-		 struct net_device *dev)
+void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 {
 	struct net *net = dev_net(skb->dev);
 	struct inet6_dev *idev = NULL;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 237e2dba6e94..e28f9203deca 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -216,8 +216,7 @@ resubmit:
 				IP6_INC_STATS_BH(net, idev,
 						 IPSTATS_MIB_INUNKNOWNPROTOS);
 				icmpv6_send(skb, ICMPV6_PARAMPROB,
-					    ICMPV6_UNK_NEXTHDR, nhoff,
-					    skb->dev);
+					    ICMPV6_UNK_NEXTHDR, nhoff);
 			}
 		} else
 			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index eb6d09728633..1a5fe9ad1947 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -267,7 +267,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	if (net_ratelimit())
 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 	skb->dev = dst->dev;
-	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return -EMSGSIZE;
@@ -441,8 +441,7 @@ int ip6_forward(struct sk_buff *skb)
 	if (hdr->hop_limit <= 1) {
 		/* Force OUTPUT device used as source address */
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
-			    0, skb->dev);
+		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 		IP6_INC_STATS_BH(net,
 				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 
@@ -504,7 +503,7 @@ int ip6_forward(struct sk_buff *skb)
 			goto error;
 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
-				ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
+				    ICMPV6_NOT_NEIGHBOUR, 0);
 			goto error;
 		}
 	}
@@ -512,7 +511,7 @@ int ip6_forward(struct sk_buff *skb)
 	if (skb->len > dst_mtu(dst)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst));
 		IP6_INC_STATS_BH(net,
 				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 		IP6_INC_STATS_BH(net,
@@ -627,7 +626,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 */
 	if (!skb->local_df) {
 		skb->dev = skb_dst(skb)->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_FRAGFAILS);
 		kfree_skb(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9b02492d8706..138980eec214 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -622,7 +622,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (rt && rt->rt6i_dev)
 			skb2->dev = rt->rt6i_dev;
 
-		icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
+		icmpv6_send(skb2, rel_type, rel_code, rel_info);
 
 		if (rt)
 			dst_release(&rt->u.dst);
@@ -1014,7 +1014,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
 		if (tel->encap_limit == 0) {
 			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+				    ICMPV6_HDR_FIELD, offset + 2);
 			return -1;
 		}
 		encap_limit = tel->encap_limit - 1;
@@ -1033,7 +1033,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
 	if (err != 0) {
 		if (err == -EMSGSIZE)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		return -1;
 	}
 
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index f797e8c6f3b3..2794b6002836 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -56,7 +56,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen)
 
 static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
-	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
 }
 
 static int mip6_mh_len(int type)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8311ca31816a..dd8afbaf00a8 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -169,7 +169,7 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
 	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
 		skb_in->dev = net->loopback_dev;
 
-	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
+	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
 }
 
 static unsigned int
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index b2847ed6a7d9..a555156e9779 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -228,7 +228,7 @@ static void ip6_frag_expire(unsigned long data)
 	   pointer directly, device might already disappeared.
 	 */
 	fq->q.fragments->dev = dev;
-	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
 out_rcu_unlock:
 	rcu_read_unlock();
 out:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8500156f2637..88c0a5c49ae8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -909,7 +909,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 {
 	struct rt6_info *rt;
 
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 
 	rt = (struct rt6_info *) skb_dst(skb);
 	if (rt) {
@@ -1884,7 +1884,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
 			      ipstats_mib_noroutes);
 		break;
 	}
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
 	kfree_skb(skb);
 	return 0;
 }
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 96eb2d4641c4..b1eea811be48 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -743,7 +743,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 		if (skb->len > mtu) {
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			ip_rt_put(rt);
 			goto tx_error;
 		}
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 51e2832d13a6..e17bc1dfc1a4 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -98,7 +98,7 @@ static int tunnel6_rcv(struct sk_buff *skb)
 		if (!handler->handler(skb))
 			return 0;
 
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 
 drop:
 	kfree_skb(skb);
@@ -116,7 +116,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
 		if (!handler->handler(skb))
 			return 0;
 
-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, skb->dev);
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 
 drop:
 	kfree_skb(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a7af9d68cd6c..52b8347ae3b2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -680,12 +680,11 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
 int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
+	struct net *net = dev_net(skb->dev);
 	struct sock *sk;
 	struct udphdr *uh;
-	struct net_device *dev = skb->dev;
 	struct in6_addr *saddr, *daddr;
 	u32 ulen = 0;
-	struct net *net = dev_net(skb->dev);
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		goto short_packet;
@@ -744,7 +743,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
 				proto == IPPROTO_UDPLITE);
 
-		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 
 		kfree_skb(skb);
 		return 0;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c4f4eef032a3..0c92112dcba3 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -38,7 +38,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 
 	if (!skb->local_df && skb->len > mtu) {
 		skb->dev = dst->dev;
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		ret = -EMSGSIZE;
 	}
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 72e96d823ebf..44590887a92c 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -515,8 +515,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	 */
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6)
-		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
-			    skb->dev);
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 	else
 #endif
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -1048,7 +1047,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 						icmpv6_send(skb,
 							    ICMPV6_DEST_UNREACH,
 							    ICMPV6_PORT_UNREACH,
-							    0, skb->dev);
+							    0);
 					else
 #endif
 						icmp_send(skb,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 30b3189bd29c..223b5018c7dc 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -311,7 +311,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -454,7 +454,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
 		goto tx_error;
@@ -672,7 +672,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		dst_release(&rt->u.dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -814,7 +814,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		dst_release(&rt->u.dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -965,7 +965,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
-- 
cgit v1.2.3


From 36e31b0af58728071e8023cf8e20c5166b700717 Mon Sep 17 00:00:00 2001
From: Andreas Petlund <apetlund@simula.no>
Date: Thu, 18 Feb 2010 02:47:01 +0000
Subject: net: TCP thin linear timeouts

This patch will make TCP use only linear timeouts if the
stream is thin. This will help to avoid the very high latencies
that thin stream suffer because of exponential backoff. This
mechanism is only active if enabled by iocontrol or syscontrol
and the stream is identified as thin. A maximum of 6 linear
timeouts is tried before exponential backoff is resumed.

Signed-off-by: Andreas Petlund <apetlund@simula.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
 include/linux/tcp.h                    |  5 ++++-
 include/net/tcp.h                      |  4 ++++
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv4/tcp.c                         |  7 +++++++
 net/ipv4/tcp_timer.c                   | 21 ++++++++++++++++++++-
 6 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 2dc7a1d97686..f147310d9af4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -487,6 +487,18 @@ tcp_dma_copybreak - INTEGER
 	and CONFIG_NET_DMA is enabled.
 	Default: 4096
 
+tcp_thin_linear_timeouts - BOOLEAN
+	Enable dynamic triggering of linear timeouts for thin streams.
+	If set, a check is performed upon retransmission by timeout to
+	determine if the stream is thin (less than 4 packets in flight).
+	As long as the stream is found to be thin, up to 6 linear
+	timeouts may be performed before exponential backoff mode is
+	initiated. This improves retransmission latency for
+	non-aggressive thin streams, often found to be time-dependent.
+	For more information on thin streams, see
+	Documentation/networking/tcp-thin.txt
+	Default: 0
+
 UDP variables:
 
 udp_mem - vector of 3 INTEGERs: min, pressure, max
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7fee8a4df931..3ba8b074612f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -103,6 +103,7 @@ enum {
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
 #define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
+#define TCP_THIN_LINEAR_TIMEOUTS 16      /* Use linear timeouts for thin streams*/
 
 /* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
@@ -340,7 +341,9 @@ struct tcp_sock {
 	u32	frto_highmark;	/* snd_nxt when RTO occurred */
 	u16	advmss;		/* Advertised MSS			*/
 	u8	frto_counter;	/* Number of new acks after RTO */
-	u8	nonagle;	/* Disable Nagle algorithm?             */
+	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
+		thin_lto    : 1,/* Use linear timeouts for thin streams */
+		unused      : 3;
 
 /* RTT measurement */
 	u32	srtt;		/* smoothed round trip time << 3	*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0bdc3f640247..6278fc734abd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_NAGLE_CORK		2	/* Socket is corked	    */
 #define TCP_NAGLE_PUSH		4	/* Cork is overridden for already queued data */
 
+/* TCP thin-stream limits */
+#define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
+
 extern struct inet_timewait_death_row tcp_death_row;
 
 /* sysctl variables for tcp */
@@ -241,6 +244,7 @@ extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
 extern int sysctl_tcp_cookie_size;
+extern int sysctl_tcp_thin_linear_timeouts;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712ce3994..e6a2460587d4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -575,6 +575,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname       = "tcp_thin_linear_timeouts",
+		.data           = &sysctl_tcp_thin_linear_timeouts,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec
+	},
 	{
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e471d037fcc9..21bae9afefea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,6 +2229,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		}
 		break;
 
+	case TCP_THIN_LINEAR_TIMEOUTS:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->thin_lto = val;
+		break;
+
 	case TCP_CORK:
 		/* When set indicates to always queue non-full frames.
 		 * Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index de7d1bf9114f..a17629b8912e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
 int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
 int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
 int sysctl_tcp_orphan_retries __read_mostly;
+int sysctl_tcp_thin_linear_timeouts __read_mostly;
 
 static void tcp_write_timer(unsigned long);
 static void tcp_delack_timer(unsigned long);
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk)
 	icsk->icsk_retransmits++;
 
 out_reset_timer:
-	icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
+	 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
+	 * might be increased if the stream oscillates between thin and thick,
+	 * thus the old value might already be too high compared to the value
+	 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
+	 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
+	 * exponential backoff behaviour to avoid continue hammering
+	 * linear-timeout retransmissions into a black hole
+	 */
+	if (sk->sk_state == TCP_ESTABLISHED &&
+	    (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+	    tcp_stream_is_thin(tp) &&
+	    icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
+		icsk->icsk_backoff = 0;
+		icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+	} else {
+		/* Use normal (exponential) backoff */
+		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+	}
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
 	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
 		__sk_dst_reset(sk);
-- 
cgit v1.2.3


From 7e38017557bc0b87434d184f8804cadb102bb903 Mon Sep 17 00:00:00 2001
From: Andreas Petlund <apetlund@simula.no>
Date: Thu, 18 Feb 2010 04:48:19 +0000
Subject: net: TCP thin dupack

This patch enables fast retransmissions after one dupACK for
TCP if the stream is identified as thin. This will reduce
latencies for thin streams that are not able to trigger fast
retransmissions due to high packet interarrival time. This
mechanism is only active if enabled by iocontrol or syscontrol
and the stream is identified as thin.

Signed-off-by: Andreas Petlund <apetlund@simula.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
 include/linux/tcp.h                    |  4 +++-
 include/net/tcp.h                      |  1 +
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv4/tcp.c                         |  7 +++++++
 net/ipv4/tcp_input.c                   | 12 ++++++++++++
 6 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f147310d9af4..2571a62d923e 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -499,6 +499,18 @@ tcp_thin_linear_timeouts - BOOLEAN
 	Documentation/networking/tcp-thin.txt
 	Default: 0
 
+tcp_thin_dupack - BOOLEAN
+	Enable dynamic triggering of retransmissions after one dupACK
+	for thin streams. If set, a check is performed upon reception
+	of a dupACK to determine if the stream is thin (less than 4
+	packets in flight). As long as the stream is found to be thin,
+	data is retransmitted on the first received dupACK. This
+	improves retransmission latency for non-aggressive thin
+	streams, often found to be time-dependent.
+	For more information on thin streams, see
+	Documentation/networking/tcp-thin.txt
+	Default: 0
+
 UDP variables:
 
 udp_mem - vector of 3 INTEGERs: min, pressure, max
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3ba8b074612f..a778ee024590 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -104,6 +104,7 @@ enum {
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
 #define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
 #define TCP_THIN_LINEAR_TIMEOUTS 16      /* Use linear timeouts for thin streams*/
+#define TCP_THIN_DUPACK         17      /* Fast retrans. after 1 dupack */
 
 /* for TCP_INFO socket option */
 #define TCPI_OPT_TIMESTAMPS	1
@@ -343,7 +344,8 @@ struct tcp_sock {
 	u8	frto_counter;	/* Number of new acks after RTO */
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
-		unused      : 3;
+		thin_dupack : 1,/* Fast retransmit on first dupack      */
+		unused      : 2;
 
 /* RTT measurement */
 	u32	srtt;		/* smoothed round trip time << 3	*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6278fc734abd..56f0aec40ed6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -245,6 +245,7 @@ extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_max_ssthresh;
 extern int sysctl_tcp_cookie_size;
 extern int sysctl_tcp_thin_linear_timeouts;
+extern int sysctl_tcp_thin_dupack;
 
 extern atomic_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e6a2460587d4..c1bc074f61b7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -582,6 +582,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec
 	},
+        {
+		.procname       = "tcp_thin_dupack",
+		.data           = &sysctl_tcp_thin_dupack,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec
+	},
 	{
 		.procname	= "udp_mem",
 		.data		= &sysctl_udp_mem,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 21bae9afefea..5901010fad55 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2236,6 +2236,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->thin_lto = val;
 		break;
 
+	case TCP_THIN_DUPACK:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->thin_dupack = val;
+		break;
+
 	case TCP_CORK:
 		/* When set indicates to always queue non-full frames.
 		 * Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3fddc69ccccc..788851ca8c5d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2;
 int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_nometrics_save __read_mostly;
 
+int sysctl_tcp_thin_dupack __read_mostly;
+
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_abc __read_mostly;
 
@@ -2447,6 +2449,16 @@ static int tcp_time_to_recover(struct sock *sk)
 		return 1;
 	}
 
+	/* If a thin stream is detected, retransmit after first
+	 * received dupack. Employ only if SACK is supported in order
+	 * to avoid possible corner-case series of spurious retransmissions
+	 * Use only if there are no unsent data.
+	 */
+	if ((tp->thin_dupack || sysctl_tcp_thin_dupack) &&
+	    tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
+	    tcp_is_sack(tp) && !tcp_send_head(sk))
+		return 1;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 72032fdbcde8b333e65b3430e1bcb4358e2d6716 Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Thu, 18 Feb 2010 03:35:07 +0000
Subject: xfrm: Introduce LINUX_MIB_XFRMFWDHDRERROR

XFRMINHDRERROR counter is ambigous when validating forwarding
path. It makes it tricky to debug when you have both in and fwd
validation.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h   | 1 +
 net/xfrm/xfrm_policy.c | 3 +--
 net/xfrm/xfrm_proc.c   | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 0f953fe40413..e28f5a0182e8 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -257,6 +257,7 @@ enum
 	LINUX_MIB_XFRMOUTPOLBLOCK,		/* XfrmOutPolBlock */
 	LINUX_MIB_XFRMOUTPOLDEAD,		/* XfrmOutPolDead */
 	LINUX_MIB_XFRMOUTPOLERROR,		/* XfrmOutPolError */
+	LINUX_MIB_XFRMFWDHDRERROR,		/* XfrmFwdHdrError*/
 	__LINUX_MIB_XFRMMAX
 };
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2c5d93181f13..4368e7b88469 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2045,8 +2045,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 	int res;
 
 	if (xfrm_decode_session(skb, &fl, family) < 0) {
-		/* XXX: we should have something like FWDHDRERROR here. */
-		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
 		return 0;
 	}
 
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 003f2c437ac3..58d9ae005597 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -41,6 +41,7 @@ static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK),
 	SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD),
 	SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR),
+	SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.3


From 4bac6b180771f7ef5275b1a6d88e630ca3a3d6f0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 19 Feb 2010 08:03:28 +0100
Subject: netfilter: restore POST_ROUTING hook in NF_HOOK_COND

Commit 2249065 ("netfilter: get rid of the grossness in netfilter.h")
inverted the logic for conditional hook invocation, breaking the
POST_ROUTING hook invoked by ip_output().

Correct the logic and remove an unnecessary initialization.

Reported-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 70079454ffd0..89341c32631a 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -212,8 +212,9 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb,
 	     struct net_device *in, struct net_device *out,
 	     int (*okfn)(struct sk_buff *), bool cond)
 {
-	int ret = 1;
-	if (cond ||
+	int ret;
+
+	if (!cond ||
 	    (ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN) == 1))
 		ret = okfn(skb);
 	return ret;
-- 
cgit v1.2.3


From c44dcc56d2b5c79ba3063d20f76e5347e2e418f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 11 Feb 2010 02:24:46 -0500
Subject: switch inotify_user to anon_inode

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/inotify/inotify_user.c | 59 ++++------------------------------------
 include/linux/magic.h            |  1 -
 2 files changed, 6 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a94e8bd8eb1f..472cdf29ef82 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -29,14 +29,12 @@
 #include <linux/init.h> /* module_init */
 #include <linux/inotify.h>
 #include <linux/kernel.h> /* roundup() */
-#include <linux/magic.h> /* superblock magic number */
-#include <linux/mount.h> /* mntget */
 #include <linux/namei.h> /* LOOKUP_FOLLOW */
-#include <linux/path.h> /* struct path */
 #include <linux/sched.h> /* struct user */
 #include <linux/slab.h> /* struct kmem_cache */
 #include <linux/syscalls.h>
 #include <linux/types.h>
+#include <linux/anon_inodes.h>
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
@@ -45,8 +43,6 @@
 
 #include <asm/ioctls.h>
 
-static struct vfsmount *inotify_mnt __read_mostly;
-
 /* these are configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_user_instances __read_mostly;
 static int inotify_max_queued_events __read_mostly;
@@ -645,9 +641,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 {
 	struct fsnotify_group *group;
 	struct user_struct *user;
-	struct file *filp;
-	struct path path;
-	int fd, ret;
+	int ret;
 
 	/* Check the IN_* constants for consistency.  */
 	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
@@ -656,10 +650,6 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
-	fd = get_unused_fd_flags(flags & O_CLOEXEC);
-	if (fd < 0)
-		return fd;
-
 	user = get_current_user();
 	if (unlikely(atomic_read(&user->inotify_devs) >=
 			inotify_max_user_instances)) {
@@ -676,27 +666,14 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 
 	atomic_inc(&user->inotify_devs);
 
-	path.mnt = inotify_mnt;
-	path.dentry = inotify_mnt->mnt_root;
-	path_get(&path);
-	filp = alloc_file(&path, FMODE_READ, &inotify_fops);
-	if (!filp)
-		goto Enfile;
+	ret = anon_inode_getfd("inotify", &inotify_fops, group,
+				  O_RDONLY | flags);
+	if (ret >= 0)
+		return ret;
 
-	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	filp->private_data = group;
-
-	fd_install(fd, filp);
-
-	return fd;
-
-Enfile:
-	ret = -ENFILE;
-	path_put(&path);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
-	put_unused_fd(fd);
 	return ret;
 }
 
@@ -783,20 +760,6 @@ out:
 	return ret;
 }
 
-static int
-inotify_get_sb(struct file_system_type *fs_type, int flags,
-	       const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	return get_sb_pseudo(fs_type, "inotify", NULL,
-			INOTIFYFS_SUPER_MAGIC, mnt);
-}
-
-static struct file_system_type inotify_fs_type = {
-    .name	= "inotifyfs",
-    .get_sb	= inotify_get_sb,
-    .kill_sb	= kill_anon_super,
-};
-
 /*
  * inotify_user_setup - Our initialization function.  Note that we cannnot return
  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
@@ -804,16 +767,6 @@ static struct file_system_type inotify_fs_type = {
  */
 static int __init inotify_user_setup(void)
 {
-	int ret;
-
-	ret = register_filesystem(&inotify_fs_type);
-	if (unlikely(ret))
-		panic("inotify: register_filesystem returned %d!\n", ret);
-
-	inotify_mnt = kern_mount(&inotify_fs_type);
-	if (IS_ERR(inotify_mnt))
-		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
-
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
 	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
 
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 76285e01b39e..eb9800f05782 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -52,7 +52,6 @@
 #define CGROUP_SUPER_MAGIC	0x27e0eb
 
 #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
-#define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
 
 #define STACK_END_MAGIC		0x57AC6E9D
 
-- 
cgit v1.2.3


From 4e70af56319e56423d6eb1ce25fc321cdf8cd41d Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Mon, 15 Feb 2010 11:16:11 +0000
Subject: fs: inode - remove 8 bytes of padding on 64bits allowing 1 more
 objects/slab under slub

This removes 8 bytes of padding from struct inode on 64bit builds, and
so allows 1 more object/slab in the inode_cache when using slub.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
----
patch against 2.6.33-rc8
compiled & tested on x86_64 AMDX2

I've been running this patch for over a week with no obvious problems
regards
Richard
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index b1bcb275b596..ebb1cd5bc241 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -729,6 +729,7 @@ struct inode {
 	uid_t			i_uid;
 	gid_t			i_gid;
 	dev_t			i_rdev;
+	unsigned int		i_blkbits;
 	u64			i_version;
 	loff_t			i_size;
 #ifdef __NEED_I_SIZE_ORDERED
@@ -738,7 +739,6 @@ struct inode {
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
 	blkcnt_t		i_blocks;
-	unsigned int		i_blkbits;
 	unsigned short          i_bytes;
 	umode_t			i_mode;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
-- 
cgit v1.2.3


From ffb9eb3d8b450c22bbbc688c6b630141ac476fd9 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Wed, 17 Feb 2010 17:58:10 +0200
Subject: nl80211: add power save commands

The most needed command from nl80211, which Wireless Extensions had,
is support for power save mode. Add a simple command to make it possible
to enable and disable power save via nl80211.

I was also planning about extending the interface, for example adding the
timeout value, but after thinking more about this I decided not to do it.
Basically there were three reasons:

Firstly, the parameters for power save are very much hardware dependent.
Trying to find a unified interface which would work with all hardware, and
still make sense to users, will be very difficult.

Secondly, IEEE 802.11 power save implementation in Linux is still in state
of flux. We have a long way to still to go and there is no way to predict
what kind of implementation we will have after few years. And because we
need to support nl80211 interface a long time, practically forever, adding
now parameters to nl80211 might create maintenance problems later on.

Third issue are the users. Power save parameters are mostly used for
debugging, so debugfs is better, more flexible, interface for this.
For example, wpa_supplicant currently doesn't configure anything related
to power save mode. It's better to strive that kernel can automatically
optimise the power save parameters, like with help of pm qos network
and other traffic parameters.

Later on, when we have better understanding of power save, we can extend
this command with more features, if there's a need for that.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |  10 ++++
 include/net/cfg80211.h     |   7 +--
 net/wireless/core.c        |  16 +++---
 net/wireless/nl80211.c     | 131 +++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/wext-compat.c |  10 ++--
 5 files changed, 159 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 8e6384f8fda6..28ba20fda3e2 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -416,6 +416,9 @@ enum nl80211_commands {
 	NL80211_CMD_ACTION,
 	NL80211_CMD_ACTION_TX_STATUS,
 
+	NL80211_CMD_SET_POWER_SAVE,
+	NL80211_CMD_GET_POWER_SAVE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -837,6 +840,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_ACK,
 
+	NL80211_ATTR_PS_STATE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1573,4 +1578,9 @@ enum nl80211_band {
 	NL80211_BAND_5GHZ,
 };
 
+enum nl80211_ps_state {
+	NL80211_PS_DISABLED,
+	NL80211_PS_ENABLED,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7188934b64d3..3d134a1fb96b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1150,7 +1150,6 @@ struct cfg80211_ops {
 			  enum nl80211_channel_type channel_type,
 			  const u8 *buf, size_t len, u64 *cookie);
 
-	/* some temporary stuff to finish wext */
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
 };
@@ -1489,6 +1488,9 @@ struct wireless_dev {
 	struct cfg80211_internal_bss *auth_bsses[MAX_AUTH_BSSES];
 	struct cfg80211_internal_bss *current_bss; /* associated / joined */
 
+	bool ps;
+	int ps_timeout;
+
 #ifdef CONFIG_CFG80211_WEXT
 	/* wext data */
 	struct {
@@ -1500,8 +1502,7 @@ struct wireless_dev {
 		u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
 		u8 ssid[IEEE80211_MAX_SSID_LEN];
 		s8 default_key, default_mgmt_key;
-		bool ps, prev_bssid_valid;
-		int ps_timeout;
+		bool prev_bssid_valid;
 	} wext;
 #endif
 };
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 51908dc2ea00..7fdb9409ad2a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -698,19 +698,21 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		wdev->wext.default_key = -1;
 		wdev->wext.default_mgmt_key = -1;
 		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
+#endif
+
 		if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT)
-			wdev->wext.ps = true;
+			wdev->ps = true;
 		else
-			wdev->wext.ps = false;
-		wdev->wext.ps_timeout = 100;
+			wdev->ps = false;
+		wdev->ps_timeout = 100;
 		if (rdev->ops->set_power_mgmt)
 			if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
-						      wdev->wext.ps,
-						      wdev->wext.ps_timeout)) {
+						      wdev->ps,
+						      wdev->ps_timeout)) {
 				/* assume this means it's off */
-				wdev->wext.ps = false;
+				wdev->ps = false;
 			}
-#endif
+
 		if (!dev->ethtool_ops)
 			dev->ethtool_ops = &cfg80211_ethtool_ops;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 328112081358..b0495a1da22e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -148,6 +148,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_FRAME] = { .type = NLA_BINARY,
 				 .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
+	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
 };
 
 /* policy for the attributes */
@@ -4663,6 +4664,124 @@ unlock_rtnl:
 	return err;
 }
 
+static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	struct net_device *dev;
+	u8 ps_state;
+	bool state;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_PS_STATE]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]);
+
+	if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rdev;
+
+	wdev = dev->ieee80211_ptr;
+
+	if (!rdev->ops->set_power_mgmt) {
+		err = -EOPNOTSUPP;
+		goto unlock_rdev;
+	}
+
+	state = (ps_state == NL80211_PS_ENABLED) ? true : false;
+
+	if (state == wdev->ps)
+		goto unlock_rdev;
+
+	wdev->ps = state;
+
+	if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, wdev->ps,
+				      wdev->ps_timeout))
+		/* assume this means it's off */
+		wdev->ps = false;
+
+unlock_rdev:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+	rtnl_unlock();
+
+out:
+	return err;
+}
+
+static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	enum nl80211_ps_state ps_state;
+	struct wireless_dev *wdev;
+	struct net_device *dev;
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	wdev = dev->ieee80211_ptr;
+
+	if (!rdev->ops->set_power_mgmt) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_GET_POWER_SAVE);
+	if (!hdr) {
+		err = -ENOMEM;
+		goto free_msg;
+	}
+
+	if (wdev->ps)
+		ps_state = NL80211_PS_ENABLED;
+	else
+		ps_state = NL80211_PS_DISABLED;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_PS_STATE, ps_state);
+
+	genlmsg_end(msg, hdr);
+	err = genlmsg_reply(msg, info);
+	goto out;
+
+nla_put_failure:
+	err = -ENOBUFS;
+
+free_msg:
+	nlmsg_free(msg);
+
+out:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+
+unlock_rtnl:
+	rtnl_unlock();
+
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4955,6 +5074,18 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_POWER_SAVE,
+		.doit = nl80211_set_power_save,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_GET_POWER_SAVE,
+		.doit = nl80211_get_power_save,
+		.policy = nl80211_policy,
+		/* can be retrieved by unprivileged users */
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index b17eeae448d5..9ab51838849e 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1099,8 +1099,8 @@ int cfg80211_wext_siwpower(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
-	bool ps = wdev->wext.ps;
-	int timeout = wdev->wext.ps_timeout;
+	bool ps = wdev->ps;
+	int timeout = wdev->ps_timeout;
 	int err;
 
 	if (wdev->iftype != NL80211_IFTYPE_STATION)
@@ -1133,8 +1133,8 @@ int cfg80211_wext_siwpower(struct net_device *dev,
 	if (err)
 		return err;
 
-	wdev->wext.ps = ps;
-	wdev->wext.ps_timeout = timeout;
+	wdev->ps = ps;
+	wdev->ps_timeout = timeout;
 
 	return 0;
 
@@ -1147,7 +1147,7 @@ int cfg80211_wext_giwpower(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 
-	wrq->disabled = !wdev->wext.ps;
+	wrq->disabled = !wdev->ps;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 224e1542b6ca2d38dc0c7ea65fb6760c082b1309 Mon Sep 17 00:00:00 2001
From: Maulik Mankad <x0082077@ti.com>
Date: Wed, 17 Feb 2010 14:09:29 -0800
Subject: USB: Add empty functions in otg.h

Add empty functions for usb_nop_xceiv_register()
and usb_nop_xceiv_unregister() in otg.h so that
these functions can be called even when
CONFIG_NOP_USB_XCEIV is not enabled.

It allows to remove ifdef's from board file.

Signed-off-by: Maulik Mankad <x0082077@ti.com>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: Ajay Kumar Gupta <ajay.gupta@ti.com>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Felipe Balbi <felipe.balbi@nokia.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/linux/usb/otg.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 52bb917641f0..fef0972c8146 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -110,9 +110,19 @@ struct otg_transceiver {
 /* for board-specific init logic */
 extern int otg_set_transceiver(struct otg_transceiver *);
 
+#if defined(CONFIG_NOP_USB_XCEIV) || defined(CONFIG_NOP_USB_XCEIV_MODULE)
 /* sometimes transceivers are accessed only through e.g. ULPI */
 extern void usb_nop_xceiv_register(void);
 extern void usb_nop_xceiv_unregister(void);
+#else
+static inline void usb_nop_xceiv_register(void)
+{
+}
+
+static inline void usb_nop_xceiv_unregister(void)
+{
+}
+#endif
 
 /* helpers for direct access thru low-level io interface */
 static inline int otg_io_read(struct otg_transceiver *otg, u32 reg)
-- 
cgit v1.2.3


From 884b8369ee78c081b5e5a99d1d09a95815d13c28 Mon Sep 17 00:00:00 2001
From: Maulik Mankad <x0082077@ti.com>
Date: Wed, 17 Feb 2010 14:09:30 -0800
Subject: omap: musb: Pass board specific data from board file

Pass board specific data for MUSB (like interface_type,
mode etc) from board file by defining board
specific structure.

Each board file can define this structure based on
its requirement and pass this information to the
driver.

Signed-off-by: Maulik Mankad <x0082077@ti.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Felipe Balbi <felipe.balbi@nokia.com>
Cc: David Brownell <david-b@pacbell.net>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Gupta Ajay Kumar <ajay.gupta@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-2430sdp.c          |  8 +++++++-
 arch/arm/mach-omap2/board-3430sdp.c          |  8 +++++++-
 arch/arm/mach-omap2/board-cm-t35.c           |  8 +++++++-
 arch/arm/mach-omap2/board-devkit8000.c       |  8 +++++++-
 arch/arm/mach-omap2/board-igep0020.c         |  8 +++++++-
 arch/arm/mach-omap2/board-ldp.c              |  8 +++++++-
 arch/arm/mach-omap2/board-omap3beagle.c      |  8 +++++++-
 arch/arm/mach-omap2/board-omap3evm.c         |  8 +++++++-
 arch/arm/mach-omap2/board-omap3pandora.c     |  8 +++++++-
 arch/arm/mach-omap2/board-omap3touchbook.c   |  8 +++++++-
 arch/arm/mach-omap2/board-overo.c            |  8 +++++++-
 arch/arm/mach-omap2/board-rx51.c             |  8 +++++++-
 arch/arm/mach-omap2/board-zoom-peripherals.c |  8 +++++++-
 arch/arm/mach-omap2/usb-musb.c               |  7 +++++--
 arch/arm/plat-omap/include/plat/usb.h        | 11 ++++++++++-
 include/linux/usb/musb.h                     |  3 +++
 16 files changed, 109 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c
index d6f55ef9059d..01d113ff9fcf 100644
--- a/arch/arm/mach-omap2/board-2430sdp.c
+++ b/arch/arm/mach-omap2/board-2430sdp.c
@@ -193,6 +193,12 @@ static struct omap2_hsmmc_info mmc[] __initdata = {
 	{}	/* Terminator */
 };
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init omap_2430sdp_init(void)
 {
 	int ret;
@@ -202,7 +208,7 @@ static void __init omap_2430sdp_init(void)
 	platform_add_devices(sdp2430_devices, ARRAY_SIZE(sdp2430_devices));
 	omap_serial_init();
 	omap2_hsmmc_init(mmc);
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	board_smc91x_init();
 
 	/* Turn off secondary LCD backlight */
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 5adef517a2b3..a10e96f48aea 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -760,6 +760,12 @@ static struct flash_partitions sdp_flash_partitions[] = {
 	},
 };
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init omap_3430sdp_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
@@ -774,7 +780,7 @@ static void __init omap_3430sdp_init(void)
 				ARRAY_SIZE(sdp3430_spi_board_info));
 	ads7846_dev_init();
 	omap_serial_init();
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	board_smc91x_init();
 	sdp_flash_init(sdp_flash_partitions);
 	sdp3430_display_init();
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index 8659c3e2ef6e..afa77caaff4d 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -811,6 +811,12 @@ static struct omap_board_mux board_mux[] __initdata = {
 	{ .reg_offset = OMAP_MUX_TERMINATOR },
 };
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init cm_t35_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CUS);
@@ -822,7 +828,7 @@ static void __init cm_t35_init(void)
 	cm_t35_init_led();
 	cm_t35_init_display();
 
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 }
 
 MACHINE_START(CM_T35, "Compulab CM-T35")
diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c
index 89a55f1d07e3..371019054b49 100644
--- a/arch/arm/mach-omap2/board-devkit8000.c
+++ b/arch/arm/mach-omap2/board-devkit8000.c
@@ -630,6 +630,12 @@ static void __init devkit8000_flash_init(void)
 	}
 }
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static struct ehci_hcd_omap_platform_data ehci_pdata __initconst = {
 
 	.port_mode[0] = EHCI_HCD_OMAP_MODE_PHY,
@@ -665,7 +671,7 @@ static void __init devkit8000_init(void)
 	/* REVISIT leave DVI powered down until it's needed ... */
 	gpio_direction_output(170, true);
 
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	usb_ehci_init(&ehci_pdata);
 	devkit8000_flash_init();
 
diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index 748f418ba442..9958987a3d0a 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -436,6 +436,12 @@ static int __init igep2_i2c_init(void)
 	return 0;
 }
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static struct ehci_hcd_omap_platform_data ehci_pdata __initconst = {
 	.port_mode[0] = EHCI_HCD_OMAP_MODE_UNKNOWN,
 	.port_mode[1] = EHCI_HCD_OMAP_MODE_PHY,
@@ -461,7 +467,7 @@ static void __init igep2_init(void)
 	igep2_i2c_init();
 	platform_add_devices(igep2_devices, ARRAY_SIZE(igep2_devices));
 	omap_serial_init();
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	usb_ehci_init(&ehci_pdata);
 
 	igep2_flash_init();
diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index 095adcb642b8..5fcb52e71298 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -383,6 +383,12 @@ static struct omap_board_mux board_mux[] __initdata = {
 #define board_mux	NULL
 #endif
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init omap_ldp_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
@@ -394,7 +400,7 @@ static void __init omap_ldp_init(void)
 				ARRAY_SIZE(ldp_spi_board_info));
 	ads7846_dev_init();
 	omap_serial_init();
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 
 	omap2_hsmmc_init(mmc);
 	/* link regulators to MMC adapters */
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 1bae69913376..6eb77e1f7c82 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -430,6 +430,12 @@ static struct omap_board_mux board_mux[] __initdata = {
 #define board_mux	NULL
 #endif
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init omap3_beagle_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
@@ -443,7 +449,7 @@ static void __init omap3_beagle_init(void)
 	/* REVISIT leave DVI powered down until it's needed ... */
 	gpio_direction_output(170, true);
 
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	usb_ehci_init(&ehci_pdata);
 	omap3beagle_flash_init();
 
diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 57506da4e433..d6bc88c426b5 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -662,6 +662,12 @@ static struct omap_board_mux board_mux[] __initdata = {
 #define board_mux	NULL
 #endif
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init omap3_evm_init(void)
 {
 	omap3_evm_get_revision();
@@ -701,7 +707,7 @@ static void __init omap3_evm_init(void)
 		omap_mux_init_gpio(135, OMAP_PIN_OUTPUT);
 		ehci_pdata.reset_gpio_port[1] = 135;
 	}
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	usb_ehci_init(&ehci_pdata);
 	ads7846_dev_init();
 	omap3evm_init_smsc911x();
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index a0acf8d1e806..4827f4658df3 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -557,6 +557,12 @@ static struct omap_board_mux board_mux[] __initdata = {
 #define board_mux	NULL
 #endif
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init omap3pandora_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
@@ -569,7 +575,7 @@ static void __init omap3pandora_init(void)
 	omap3pandora_ads7846_init();
 	usb_ehci_init(&ehci_pdata);
 	pandora_keys_gpio_init();
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 
 	/* Ensure SDRC pins are mux'd for self-refresh */
 	omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT);
diff --git a/arch/arm/mach-omap2/board-omap3touchbook.c b/arch/arm/mach-omap2/board-omap3touchbook.c
index 8252ba49a664..3943d0f8322c 100644
--- a/arch/arm/mach-omap2/board-omap3touchbook.c
+++ b/arch/arm/mach-omap2/board-omap3touchbook.c
@@ -527,6 +527,12 @@ static void __init early_touchbook_revision(char **p)
 }
 __early_param("tbr=", early_touchbook_revision);
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init omap3_touchbook_init(void)
 {
 	pm_power_off = omap3_touchbook_poweroff;
@@ -545,7 +551,7 @@ static void __init omap3_touchbook_init(void)
 	spi_register_board_info(omap3_ads7846_spi_board_info,
 				ARRAY_SIZE(omap3_ads7846_spi_board_info));
 	omap3_ads7846_init();
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	usb_ehci_init(&ehci_pdata);
 	omap3touchbook_flash_init();
 
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index 7e6aa8292746..50872a42bec7 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -413,6 +413,12 @@ static struct omap_board_mux board_mux[] __initdata = {
 #define board_mux	NULL
 #endif
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void __init overo_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
@@ -420,7 +426,7 @@ static void __init overo_init(void)
 	platform_add_devices(overo_devices, ARRAY_SIZE(overo_devices));
 	omap_serial_init();
 	overo_flash_init();
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	usb_ehci_init(&ehci_pdata);
 	overo_ads7846_init();
 	overo_init_smsc911x();
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index 6a49f916103d..0ccb8b46d157 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -78,11 +78,17 @@ static struct omap_board_mux board_mux[] __initdata = {
 #define board_mux	NULL
 #endif
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_PERIPHERAL,
+	.power			= 0,
+};
+
 static void __init rx51_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
 	omap_serial_init();
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	rx51_peripherals_init();
 
 	/* Ensure SDRC pins are mux'd for self-refresh */
diff --git a/arch/arm/mach-omap2/board-zoom-peripherals.c b/arch/arm/mach-omap2/board-zoom-peripherals.c
index 9a0821fb7ea0..ca95d8d64136 100755
--- a/arch/arm/mach-omap2/board-zoom-peripherals.c
+++ b/arch/arm/mach-omap2/board-zoom-peripherals.c
@@ -264,6 +264,12 @@ static int __init omap_i2c_init(void)
 	return 0;
 }
 
+static struct omap_musb_board_data musb_board_data = {
+	.interface_type		= MUSB_INTERFACE_ULPI,
+	.mode			= MUSB_OTG,
+	.power			= 100,
+};
+
 static void enable_board_wakeup_source(void)
 {
 	/* T2 interrupt line (keypad) */
@@ -275,6 +281,6 @@ void __init zoom_peripherals_init(void)
 {
 	omap_i2c_init();
 	omap_serial_init();
-	usb_musb_init();
+	usb_musb_init(&musb_board_data);
 	enable_board_wakeup_source();
 }
diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index ba71f762db71..2ddc316d9394 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -146,7 +146,7 @@ static struct platform_device musb_device = {
 	.resource	= musb_resources,
 };
 
-void __init usb_musb_init(void)
+void __init usb_musb_init(struct omap_musb_board_data *board_data)
 {
 	if (cpu_is_omap243x())
 		musb_resources[0].start = OMAP243X_HS_BASE;
@@ -159,6 +159,9 @@ void __init usb_musb_init(void)
 	 * musb_core.c have been converted to use use clkdev.
 	 */
 	musb_plat.clock = "ick";
+	musb_plat.board_data = board_data;
+	musb_plat.power = board_data->power >> 1;
+	musb_plat.mode = board_data->mode;
 
 	if (platform_device_register(&musb_device) < 0) {
 		printk(KERN_ERR "Unable to register HS-USB (MUSB) device\n");
@@ -167,7 +170,7 @@ void __init usb_musb_init(void)
 }
 
 #else
-void __init usb_musb_init(void)
+void __init usb_musb_init(struct omap_musb_board_data *board_data)
 {
 }
 #endif /* CONFIG_USB_MUSB_SOC */
diff --git a/arch/arm/plat-omap/include/plat/usb.h b/arch/arm/plat-omap/include/plat/usb.h
index 33a500eb2f93..288e29e1c06f 100644
--- a/arch/arm/plat-omap/include/plat/usb.h
+++ b/arch/arm/plat-omap/include/plat/usb.h
@@ -3,6 +3,7 @@
 #ifndef	__ASM_ARCH_OMAP_USB_H
 #define	__ASM_ARCH_OMAP_USB_H
 
+#include <linux/usb/musb.h>
 #include <plat/board.h>
 
 #define OMAP3_HS_USB_PORTS	3
@@ -42,7 +43,15 @@ struct ehci_hcd_omap_platform_data {
 #define UDC_BASE			OMAP2_UDC_BASE
 #define OMAP_OHCI_BASE			OMAP2_OHCI_BASE
 
-extern void usb_musb_init(void);
+struct omap_musb_board_data {
+	u8	interface_type;
+	u8	mode;
+	u8	power;
+};
+
+enum musb_interface    {MUSB_INTERFACE_ULPI, MUSB_INTERFACE_UTMI};
+
+extern void usb_musb_init(struct omap_musb_board_data *board_data);
 
 extern void usb_ehci_init(struct ehci_hcd_omap_platform_data *pdata);
 
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index d43755669261..5dc2f227a0f1 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -84,6 +84,9 @@ struct musb_hdrc_platform_data {
 
 	/* MUSB configuration-specific details */
 	struct musb_hdrc_config	*config;
+
+	/* Architecture specific board data	*/
+	void		*board_data;
 };
 
 
-- 
cgit v1.2.3


From cf4c43dd439b90a1a876b3f836ebe745abb9a269 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 15 Jul 2009 13:13:00 -0700
Subject: PCI: Add pci_bus_find_ext_capability

For use by code that needs to walk extended capability lists before
pci_dev structures are set up.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80CFD@orsmsx508.amr.corp.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 drivers/pci/pci.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  2 ++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 315fea47e784..aad62af2b4c6 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -297,6 +297,49 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap)
 }
 EXPORT_SYMBOL_GPL(pci_find_ext_capability);
 
+/**
+ * pci_bus_find_ext_capability - find an extended capability
+ * @bus:   the PCI bus to query
+ * @devfn: PCI device to query
+ * @cap:   capability code
+ *
+ * Like pci_find_ext_capability() but works for pci devices that do not have a
+ * pci_dev structure set up yet.
+ *
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it.
+ */
+int pci_bus_find_ext_capability(struct pci_bus *bus, unsigned int devfn,
+				int cap)
+{
+	u32 header;
+	int ttl;
+	int pos = PCI_CFG_SPACE_SIZE;
+
+	/* minimum 8 bytes per capability */
+	ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+	if (!pci_bus_read_config_dword(bus, devfn, pos, &header))
+		return 0;
+	if (header == 0xffffffff || header == 0)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < PCI_CFG_SPACE_SIZE)
+			break;
+
+		if (!pci_bus_read_config_dword(bus, devfn, pos, &header))
+			break;
+	}
+
+	return 0;
+}
+
 static int __pci_find_next_ht_cap(struct pci_dev *dev, int pos, int ht_cap)
 {
 	int rc, ttl = PCI_FIND_CAP_TTL;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c1968f464c38..65f8a8f9d3e5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -631,6 +631,8 @@ enum pci_lost_interrupt_reason pci_lost_interrupt(struct pci_dev *dev);
 int pci_find_capability(struct pci_dev *dev, int cap);
 int pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap);
 int pci_find_ext_capability(struct pci_dev *dev, int cap);
+int pci_bus_find_ext_capability(struct pci_bus *bus, unsigned int devfn,
+				int cap);
 int pci_find_ht_capability(struct pci_dev *dev, int ht_cap);
 int pci_find_next_ht_capability(struct pci_dev *dev, int pos, int ht_cap);
 struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
-- 
cgit v1.2.3


From 4e10ae11317b238609fc3ec9d50a5dee9473e045 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sat, 20 Feb 2010 09:41:30 +0100
Subject: ARM: 5951/1: ARM: fix documentation of the PrimeCell bus

This fixes the filepath encoded in <linux/amba/bus.h> and adds
some documentation as to what this bus really means.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index ab94335b4bb9..6816be6c3f77 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -1,5 +1,9 @@
 /*
- *  linux/include/asm-arm/hardware/amba.h
+ *  linux/include/amba/bus.h
+ *
+ *  This device type deals with ARM PrimeCells and anything else that
+ *  presents a proper CID (0xB105F00D) at the end of the I/O register
+ *  region or that is derived from a PrimeCell.
  *
  *  Copyright (C) 2003 Deep Blue Solutions Ltd, All Rights Reserved.
  *
-- 
cgit v1.2.3


From f501912a35c02eadc55ca9396ece55fe36f785d0 Mon Sep 17 00:00:00 2001
From: Ben Myers <bpm@sgi.com>
Date: Wed, 17 Feb 2010 14:05:11 -0600
Subject: commit_metadata export operation replacing nfsd_sync_dir

- Add commit_metadata export_operation to allow the underlying filesystem to
decide how to commit an inode most efficiently.

- Usage of nfsd_sync_dir and write_inode_now has been replaced with the
commit_metadata function that takes a svc_fh.

- The commit_metadata function calls the commit_metadata export_op if it's
there, or else falls back to sync_inode instead of fsync and write_inode_now
because only metadata need be synced here.

- nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static

Signed-off-by: Ben Myers <bpm@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4recover.c    |   4 +-
 fs/nfsd/vfs.c            | 106 +++++++++++++++++++++++------------------------
 include/linux/exportfs.h |   5 +++
 3 files changed, 58 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5a754f7b71ed..98fb98e330b4 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -119,9 +119,7 @@ out_no_tfm:
 static void
 nfsd4_sync_rec_dir(void)
 {
-	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
-	nfsd_sync_dir(rec_dir.dentry);
-	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+	vfs_fsync(NULL, rec_dir.dentry, 0);
 }
 
 int
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ed024d329056..8afdba5082e8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -27,6 +27,8 @@
 #include <linux/jhash.h>
 #include <linux/ima.h>
 #include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
 
 #ifdef CONFIG_NFSD_V3
 #include "xdr3.h"
@@ -271,6 +273,32 @@ out:
 	return err;
 }
 
+/*
+ * Commit metadata changes to stable storage.
+ */
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+	struct inode *inode = fhp->fh_dentry->d_inode;
+	const struct export_operations *export_ops = inode->i_sb->s_export_op;
+	int error = 0;
+
+	if (!EX_ISSYNC(fhp->fh_export))
+		return 0;
+
+	if (export_ops->commit_metadata) {
+		error = export_ops->commit_metadata(inode);
+	} else {
+		struct writeback_control wbc = {
+			.sync_mode = WB_SYNC_ALL,
+			.nr_to_write = 0, /* metadata only */
+		};
+
+		error = sync_inode(inode, &wbc);
+	}
+
+	return error;
+}
 
 /*
  * Set various file attributes.
@@ -768,28 +796,6 @@ nfsd_close(struct file *filp)
 	fput(filp);
 }
 
-/*
- * Sync a directory to disk.
- *
- * We can't just call vfs_fsync because our requirements are slightly odd:
- *
- *  a) we do not have a file struct available
- *  b) we expect to have i_mutex already held by the caller
- */
-int
-nfsd_sync_dir(struct dentry *dentry)
-{
-	struct inode *inode = dentry->d_inode;
-	int error;
-
-	WARN_ON(!mutex_is_locked(&inode->i_mutex));
-
-	error = filemap_write_and_wait(inode->i_mapping);
-	if (!error && inode->i_fop->fsync)
-		error = inode->i_fop->fsync(NULL, dentry, 0);
-	return error;
-}
-
 /*
  * Obtain the readahead parameters for the file
  * specified by (dev, ino).
@@ -1331,12 +1337,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out_nfserr;
 	}
 
-	if (EX_ISSYNC(fhp->fh_export)) {
-		err = nfserrno(nfsd_sync_dir(dentry));
-		write_inode_now(dchild->d_inode, 1);
-	}
+	err = nfsd_create_setattr(rqstp, resfhp, iap);
 
-	err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+	/*
+	 * nfsd_setattr already committed the child.  Transactional filesystems
+	 * had a chance to commit changes for both parent and child
+	 * simultaneously making the following commit_metadata a noop.
+	 */
+	err2 = nfserrno(commit_metadata(fhp));
 	if (err2)
 		err = err2;
 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1368,7 +1376,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
 	__be32		err;
-	__be32		err2;
 	int		host_err;
 	__u32		v_mtime=0, v_atime=0;
 
@@ -1463,11 +1470,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (created)
 		*created = 1;
 
-	if (EX_ISSYNC(fhp->fh_export)) {
-		err = nfserrno(nfsd_sync_dir(dentry));
-		/* setattr will sync the child (or not) */
-	}
-
 	nfsd_check_ignore_resizing(iap);
 
 	if (createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -1482,9 +1484,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	}
 
  set_attr:
-	err2 = nfsd_create_setattr(rqstp, resfhp, iap);
-	if (err2)
-		err = err2;
+	err = nfsd_create_setattr(rqstp, resfhp, iap);
+
+	/*
+	 * nfsd_setattr already committed the child (and possibly also the parent).
+	 */
+	if (!err)
+		err = nfserrno(commit_metadata(fhp));
 
 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
 	/*
@@ -1599,12 +1605,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		}
 	} else
 		host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
-	if (!host_err) {
-		if (EX_ISSYNC(fhp->fh_export))
-			host_err = nfsd_sync_dir(dentry);
-	}
 	err = nfserrno(host_err);
+	if (!err)
+		err = nfserrno(commit_metadata(fhp));
 	fh_unlock(fhp);
 
 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1666,11 +1669,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 	}
 	host_err = vfs_link(dold, dirp, dnew);
 	if (!host_err) {
-		if (EX_ISSYNC(ffhp->fh_export)) {
-			err = nfserrno(nfsd_sync_dir(ddir));
-			write_inode_now(dest, 1);
-		}
-		err = 0;
+		err = nfserrno(commit_metadata(ffhp));
+		if (!err)
+			err = nfserrno(commit_metadata(tfhp));
 	} else {
 		if (host_err == -EXDEV && rqstp->rq_vers == 2)
 			err = nfserr_acces;
@@ -1766,10 +1767,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 		goto out_dput_new;
 
 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
-	if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
-		host_err = nfsd_sync_dir(tdentry);
+	if (!host_err) {
+		host_err = commit_metadata(tfhp);
 		if (!host_err)
-			host_err = nfsd_sync_dir(fdentry);
+			host_err = commit_metadata(ffhp);
 	}
 
 	mnt_drop_write(ffhp->fh_export->ex_path.mnt);
@@ -1850,12 +1851,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 
 	dput(rdentry);
 
-	if (host_err)
-		goto out_drop;
-	if (EX_ISSYNC(fhp->fh_export))
-		host_err = nfsd_sync_dir(dentry);
+	if (!host_err)
+		host_err = commit_metadata(fhp);
 
-out_drop:
 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
 out_nfserr:
 	err = nfserrno(host_err);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index dc12f416a49f..a9cd507f8cd2 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -96,6 +96,7 @@ struct fid {
  * @fh_to_parent:   find the implied object's parent and get a dentry for it
  * @get_name:       find the name for a given inode in a given directory
  * @get_parent:     find the parent of a given directory
+ * @commit_metadata: commit metadata changes to stable storage
  *
  * See Documentation/filesystems/nfs/Exporting for details on how to use
  * this interface correctly.
@@ -137,6 +138,9 @@ struct fid {
  *    is also a directory.  In the event that it cannot be found, or storage
  *    space cannot be allocated, a %ERR_PTR should be returned.
  *
+ * commit_metadata:
+ *    @commit_metadata should commit metadata changes to stable storage.
+ *
  * Locking rules:
  *    get_parent is called with child->d_inode->i_mutex down
  *    get_name is not (which is possibly inconsistent)
@@ -152,6 +156,7 @@ struct export_operations {
 	int (*get_name)(struct dentry *parent, char *name,
 			struct dentry *child);
 	struct dentry * (*get_parent)(struct dentry *child);
+	int (*commit_metadata)(struct inode *inode);
 };
 
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
-- 
cgit v1.2.3


From b028461d66a4dc2754d4e5dab1b3974c44798c5d Mon Sep 17 00:00:00 2001
From: dann frazier <dannf@hp.com>
Date: Wed, 17 Feb 2010 16:53:31 -0700
Subject: cciss: remove C99-style comments

Some cleanup before the header file split-out so we don't propagate this style
into new files.

Acked-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: dann frazier <dannf@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/cciss.c       | 37 ++++++++++++++-------------
 drivers/block/cciss.h       |  9 +++----
 drivers/block/cciss_cmd.h   | 61 +++++++++++++++++++++------------------------
 drivers/block/cciss_scsi.h  | 18 ++++++-------
 include/linux/cciss_ioctl.h | 33 ++++++++++++------------
 5 files changed, 78 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 873e594860d3..8df23c732c46 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1341,26 +1341,27 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 				kfree(buff);
 				return -ENOMEM;
 			}
-			// Fill in the command type
+			/* Fill in the command type */
 			c->cmd_type = CMD_IOCTL_PEND;
-			// Fill in Command Header
-			c->Header.ReplyQueue = 0;	// unused in simple mode
-			if (iocommand.buf_size > 0)	// buffer to fill
+			/* Fill in Command Header */
+			c->Header.ReplyQueue = 0;   /* unused in simple mode */
+			if (iocommand.buf_size > 0) /* buffer to fill */
 			{
 				c->Header.SGList = 1;
 				c->Header.SGTotal = 1;
-			} else	// no buffers to fill
+			} else /* no buffers to fill */
 			{
 				c->Header.SGList = 0;
 				c->Header.SGTotal = 0;
 			}
 			c->Header.LUN = iocommand.LUN_info;
-			c->Header.Tag.lower = c->busaddr;	// use the kernel address the cmd block for tag
+			/* use the kernel address the cmd block for tag */
+			c->Header.Tag.lower = c->busaddr;
 
-			// Fill in Request block
+			/* Fill in Request block */
 			c->Request = iocommand.Request;
 
-			// Fill in the scatter gather information
+			/* Fill in the scatter gather information */
 			if (iocommand.buf_size > 0) {
 				temp64.val = pci_map_single(host->pdev, buff,
 					iocommand.buf_size,
@@ -1368,7 +1369,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 				c->SG[0].Addr.lower = temp64.val32.lower;
 				c->SG[0].Addr.upper = temp64.val32.upper;
 				c->SG[0].Len = iocommand.buf_size;
-				c->SG[0].Ext = 0;	// we are not chaining
+				c->SG[0].Ext = 0;  /* we are not chaining */
 			}
 			c->waiting = &wait;
 
@@ -2422,7 +2423,7 @@ static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
 			c->Request.Type.Direction = XFER_READ;
 			c->Request.Timeout = 0;
 			c->Request.CDB[0] = cmd;
-			c->Request.CDB[6] = (size >> 24) & 0xFF;	//MSB
+			c->Request.CDB[6] = (size >> 24) & 0xFF; /* MSB */
 			c->Request.CDB[7] = (size >> 16) & 0xFF;
 			c->Request.CDB[8] = (size >> 8) & 0xFF;
 			c->Request.CDB[9] = size & 0xFF;
@@ -2691,7 +2692,7 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
 			       "cciss: reading geometry failed, volume "
 			       "does not support reading geometry\n");
 			drv->heads = 255;
-			drv->sectors = 32;	// Sectors per track
+			drv->sectors = 32;	/* Sectors per track */
 			drv->cylinders = total_size + 1;
 			drv->raid_level = RAID_UNKNOWN;
 		} else {
@@ -3109,19 +3110,19 @@ static void do_cciss_request(struct request_queue *q)
 
 	/* fill in the request */
 	drv = creq->rq_disk->private_data;
-	c->Header.ReplyQueue = 0;	// unused in simple mode
+	c->Header.ReplyQueue = 0;	/* unused in simple mode */
 	/* got command from pool, so use the command block index instead */
 	/* for direct lookups. */
 	/* The first 2 bits are reserved for controller error reporting. */
 	c->Header.Tag.lower = (c->cmdindex << 3);
 	c->Header.Tag.lower |= 0x04;	/* flag for direct lookup. */
 	memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID));
-	c->Request.CDBLen = 10;	// 12 byte commands not in FW yet;
-	c->Request.Type.Type = TYPE_CMD;	// It is a command.
+	c->Request.CDBLen = 10;	/* 12 byte commands not in FW yet; */
+	c->Request.Type.Type = TYPE_CMD;	/* It is a command. */
 	c->Request.Type.Attribute = ATTR_SIMPLE;
 	c->Request.Type.Direction =
 	    (rq_data_dir(creq) == READ) ? XFER_READ : XFER_WRITE;
-	c->Request.Timeout = 0;	// Don't time out
+	c->Request.Timeout = 0;	/* Don't time out */
 	c->Request.CDB[0] =
 	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
 	start_blk = blk_rq_pos(creq);
@@ -3206,11 +3207,11 @@ static void do_cciss_request(struct request_queue *q)
 	if (likely(blk_fs_request(creq))) {
 		if(h->cciss_read == CCISS_READ_10) {
 			c->Request.CDB[1] = 0;
-			c->Request.CDB[2] = (start_blk >> 24) & 0xff;	//MSB
+			c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */
 			c->Request.CDB[3] = (start_blk >> 16) & 0xff;
 			c->Request.CDB[4] = (start_blk >> 8) & 0xff;
 			c->Request.CDB[5] = start_blk & 0xff;
-			c->Request.CDB[6] = 0;	// (sect >> 24) & 0xff; MSB
+			c->Request.CDB[6] = 0; /* (sect >> 24) & 0xff; MSB */
 			c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff;
 			c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff;
 			c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
@@ -3219,7 +3220,7 @@ static void do_cciss_request(struct request_queue *q)
 
 			c->Request.CDBLen = 16;
 			c->Request.CDB[1]= 0;
-			c->Request.CDB[2]= (upper32 >> 24) & 0xff;	//MSB
+			c->Request.CDB[2]= (upper32 >> 24) & 0xff; /* MSB */
 			c->Request.CDB[3]= (upper32 >> 16) & 0xff;
 			c->Request.CDB[4]= (upper32 >>  8) & 0xff;
 			c->Request.CDB[5]= upper32 & 0xff;
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 1d95db254069..2b07bdacbd12 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -66,7 +66,7 @@ struct ctlr_info
 	int	ctlr;
 	char	devname[8];
 	char    *product_name;
-	char	firm_ver[4]; // Firmware version 
+	char	firm_ver[4]; /* Firmware version */
 	struct pci_dev *pdev;
 	__u32	board_id;
 	void __iomem *vaddr;
@@ -103,7 +103,7 @@ struct ctlr_info
 	BYTE	cciss_write;
 	BYTE	cciss_read_capacity;
 
-	// information about each logical volume
+	/* information about each logical volume */
 	drive_info_struct *drv[CISS_MAX_LUN];
 
 	struct access_method access;
@@ -116,7 +116,7 @@ struct ctlr_info
 	unsigned int maxSG;
 	spinlock_t lock;
 
-	//* pointers to command and error info pool */ 
+	/* pointers to command and error info pool */
 	CommandList_struct 	*cmd_pool;
 	dma_addr_t		cmd_pool_dhandle; 
 	ErrorInfo_struct 	*errinfo_pool;
@@ -134,7 +134,7 @@ struct ctlr_info
 	*/
 	int			next_to_run;
 
-	// Disk structures we need to pass back
+	/* Disk structures we need to pass back */
 	struct gendisk   *gendisk[CISS_MAX_LUN];
 #ifdef CONFIG_CISS_SCSI_TAPE
 	void *scsi_ctlr; /* ptr to structure containing scsi related stuff */
@@ -315,4 +315,3 @@ struct board_type {
 #define CCISS_LOCK(i)	(&hba[i]->lock)
 
 #endif /* CCISS_H */
-
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 6afa700890ff..277422b7e060 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -1,17 +1,16 @@
 #ifndef CCISS_CMD_H
 #define CCISS_CMD_H
-//###########################################################################
-//DEFINES
-//###########################################################################
+/* DEFINES */
 #define CISS_VERSION "1.00"
 
-//general boundary definitions
-#define SENSEINFOBYTES          32//note that this value may vary between host implementations
+/* general boundary definitions */
+#define SENSEINFOBYTES          32 /* note that this value may vary
+				      between host implementations */
 #define MAXSGENTRIES            32
 #define CCISS_SG_CHAIN          0x80000000
 #define MAXREPLYQS              256
 
-//Command Status value
+/* Command Status value */
 #define CMD_SUCCESS             0x0000
 #define CMD_TARGET_STATUS       0x0001
 #define CMD_DATA_UNDERRUN       0x0002
@@ -49,30 +48,30 @@
 #define ASYM_ACCESS_CHANGED		0x06
 #define LUN_CAPACITY_CHANGED		0x09
 
-//transfer direction
+/* transfer direction */
 #define XFER_NONE               0x00
 #define XFER_WRITE              0x01
 #define XFER_READ               0x02
 #define XFER_RSVD               0x03
 
-//task attribute
+/* task attribute */
 #define ATTR_UNTAGGED           0x00
 #define ATTR_SIMPLE             0x04
 #define ATTR_HEADOFQUEUE        0x05
 #define ATTR_ORDERED            0x06
 #define ATTR_ACA                0x07
 
-//cdb type
+/* cdb type */
 #define TYPE_CMD				0x00
 #define TYPE_MSG				0x01
 
-//config space register offsets
+/* config space register offsets */
 #define CFG_VENDORID            0x00
 #define CFG_DEVICEID            0x02
 #define CFG_I2OBAR              0x10
 #define CFG_MEM1BAR             0x14
 
-//i2o space register offsets
+/* i2o space register offsets */
 #define I2O_IBDB_SET            0x20
 #define I2O_IBDB_CLEAR          0x70
 #define I2O_INT_STATUS          0x30
@@ -81,7 +80,7 @@
 #define I2O_OBPOST_Q            0x44
 #define I2O_DMA1_CFG		0x214
 
-//Configuration Table
+/* Configuration Table */
 #define CFGTBL_ChangeReq        0x00000001l
 #define CFGTBL_AccCmds          0x00000001l
 
@@ -103,24 +102,22 @@ typedef union _u64bit
    __u64	val;
 } u64bit;
 
-// Type defs used in the following structs
+/* Type defs used in the following structs */
 #define BYTE __u8
 #define WORD __u16
 #define HWORD __u16
 #define DWORD __u32
 #define QWORD vals32 
 
-//###########################################################################
-//STRUCTURES
-//###########################################################################
+/* STRUCTURES */
 #define CISS_MAX_LUN	1024
 #define CISS_MAX_PHYS_LUN	1024
-// SCSI-3 Cmmands 
+/* SCSI-3 Cmmands */
 
 #pragma pack(1)	
 
 #define CISS_INQUIRY 0x12
-//Date returned
+/* Date returned */
 typedef struct _InquiryData_struct
 {
   BYTE data_byte[36];
@@ -128,7 +125,7 @@ typedef struct _InquiryData_struct
 
 #define CISS_REPORT_LOG 0xc2    /* Report Logical LUNs */
 #define CISS_REPORT_PHYS 0xc3   /* Report Physical LUNs */
-// Data returned
+/* Data returned */
 typedef struct _ReportLUNdata_struct
 {
   BYTE LUNListLength[4];
@@ -139,8 +136,8 @@ typedef struct _ReportLUNdata_struct
 #define CCISS_READ_CAPACITY 0x25 /* Read Capacity */ 
 typedef struct _ReadCapdata_struct
 {
-  BYTE total_size[4];	// Total size in blocks
-  BYTE block_size[4];	// Size of blocks in bytes
+  BYTE total_size[4];	/* Total size in blocks */
+  BYTE block_size[4];	/* Size of blocks in bytes */
 } ReadCapdata_struct;
 
 #define CCISS_READ_CAPACITY_16 0x9e /* Read Capacity 16 */
@@ -172,29 +169,29 @@ typedef struct _ReadCapdata_struct_16
 #define CDB_LEN10	10
 #define CDB_LEN16	16
 
-// BMIC commands 
+/* BMIC commands */
 #define BMIC_READ 0x26
 #define BMIC_WRITE 0x27
 #define BMIC_CACHE_FLUSH 0xc2
-#define CCISS_CACHE_FLUSH 0x01	//C2 was already being used by CCISS
+#define CCISS_CACHE_FLUSH 0x01	/* C2 was already being used by CCISS */
 
-//Command List Structure
+/* Command List Structure */
 typedef union _SCSI3Addr_struct {
    struct {
     BYTE Dev;
     BYTE Bus:6;
-    BYTE Mode:2;        // b00
+    BYTE Mode:2;        /* b00 */
   } PeripDev;
    struct {
     BYTE DevLSB;
     BYTE DevMSB:6;
-    BYTE Mode:2;        // b01
+    BYTE Mode:2;        /* b01 */
   } LogDev;
    struct {
     BYTE Dev:5;
     BYTE Bus:3;
     BYTE Targ:6;
-    BYTE Mode:2;        // b10
+    BYTE Mode:2;        /* b10 */
   } LogUnit;
 } SCSI3Addr_struct;
 
@@ -202,7 +199,7 @@ typedef struct _PhysDevAddr_struct {
   DWORD             TargetId:24;
   DWORD             Bus:6;
   DWORD             Mode:2;
-  SCSI3Addr_struct  Target[2]; //2 level target device addr
+  SCSI3Addr_struct  Target[2]; /* 2 level target device addr */
 } PhysDevAddr_struct;
   
 typedef struct _LogDevAddr_struct {
@@ -255,8 +252,8 @@ typedef union _MoreErrInfo_struct{
   }Common_Info;
   struct{
     BYTE  Reserved[2];
-    BYTE  offense_size;//size of offending entry
-    BYTE  offense_num; //byte # of offense 0-base
+    BYTE  offense_size; /* size of offending entry */
+    BYTE  offense_num;  /* byte # of offense 0-base */
     DWORD offense_value;
   }Invalid_Cmd;
 }MoreErrInfo_struct;
@@ -300,7 +297,7 @@ typedef struct _CommandList_struct {
   char   pad[PADSIZE];
 } CommandList_struct;
 
-//Configuration Table Structure
+/* Configuration Table Structure */
 typedef struct _HostWrite_struct {
   DWORD TransportRequest;
   DWORD Reserved;
@@ -326,4 +323,4 @@ typedef struct _CfgTable_struct {
   DWORD            MaxPhysicalDrivesPerLogicalUnit;
 } CfgTable_struct;
 #pragma pack()	 
-#endif // CCISS_CMD_H
+#endif /* CCISS_CMD_H */
diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h
index 7b750245ae76..6d5822fe851a 100644
--- a/drivers/block/cciss_scsi.h
+++ b/drivers/block/cciss_scsi.h
@@ -25,16 +25,16 @@
 
 #include <scsi/scsicam.h> /* possibly irrelevant, since we don't show disks */
 
-		// the scsi id of the adapter...
+		/* the scsi id of the adapter... */
 #define SELF_SCSI_ID 15
-		// 15 is somewhat arbitrary, since the scsi-2 bus
-		// that's presented by the driver to the OS is
-		// fabricated.  The "real" scsi-3 bus the 
-		// hardware presents is fabricated too.
-		// The actual, honest-to-goodness physical
-		// bus that the devices are attached to is not 
-		// addressible natively, and may in fact turn
-		// out to be not scsi at all.
+		/* 15 is somewhat arbitrary, since the scsi-2 bus
+		   that's presented by the driver to the OS is
+		   fabricated.  The "real" scsi-3 bus the
+		   hardware presents is fabricated too.
+		   The actual, honest-to-goodness physical
+		   bus that the devices are attached to is not
+		   addressible natively, and may in fact turn
+		   out to be not scsi at all. */
 
 #define SCSI_CCISS_CAN_QUEUE 2
 
diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h
index eb130b4d8e72..e7343a1a9ea8 100644
--- a/include/linux/cciss_ioctl.h
+++ b/include/linux/cciss_ioctl.h
@@ -37,12 +37,13 @@ typedef __u32 DriverVer_type;
 #define MAX_KMALLOC_SIZE 128000
 
 #ifndef CCISS_CMD_H
-// This defines are duplicated in cciss_cmd.h in the driver directory 
+/* This defines are duplicated in cciss_cmd.h in the driver directory */
 
-//general boundary definitions
-#define SENSEINFOBYTES          32//note that this value may vary between host implementations
+/* general boundary definitions */
+#define SENSEINFOBYTES          32 /* note that this value may vary
+				      between host implementations */
 
-//Command Status value
+/* Command Status value */
 #define CMD_SUCCESS             0x0000
 #define CMD_TARGET_STATUS       0x0001
 #define CMD_DATA_UNDERRUN       0x0002
@@ -57,24 +58,24 @@ typedef __u32 DriverVer_type;
 #define CMD_TIMEOUT             0x000B
 #define CMD_UNABORTABLE		0x000C
 
-//transfer direction
+/* transfer direction */
 #define XFER_NONE               0x00
 #define XFER_WRITE              0x01
 #define XFER_READ               0x02
 #define XFER_RSVD               0x03
 
-//task attribute
+/* task attribute */
 #define ATTR_UNTAGGED           0x00
 #define ATTR_SIMPLE             0x04
 #define ATTR_HEADOFQUEUE        0x05
 #define ATTR_ORDERED            0x06
 #define ATTR_ACA                0x07
 
-//cdb type
+/* cdb type */
 #define TYPE_CMD				0x00
 #define TYPE_MSG				0x01
 
-// Type defs used in the following structs
+/* Type defs used in the following structs */
 #define BYTE __u8
 #define WORD __u16
 #define HWORD __u16
@@ -82,28 +83,28 @@ typedef __u32 DriverVer_type;
 
 #define CISS_MAX_LUN	1024
 
-#define LEVEL2LUN   1   // index into Target(x) structure, due to byte swapping
+#define LEVEL2LUN   1 /* index into Target(x) structure, due to byte swapping */
 #define LEVEL3LUN   0
 
 #pragma pack(1)
 
-//Command List Structure
+/* Command List Structure */
 typedef union _SCSI3Addr_struct {
    struct {
     BYTE Dev;
     BYTE Bus:6;
-    BYTE Mode:2;        // b00
+    BYTE Mode:2;        /* b00 */
   } PeripDev;
    struct {
     BYTE DevLSB;
     BYTE DevMSB:6;
-    BYTE Mode:2;        // b01
+    BYTE Mode:2;        /* b01 */
   } LogDev;
    struct {
     BYTE Dev:5;
     BYTE Bus:3;
     BYTE Targ:6;
-    BYTE Mode:2;        // b10
+    BYTE Mode:2;        /* b10 */
   } LogUnit;
 } SCSI3Addr_struct;
 
@@ -111,7 +112,7 @@ typedef struct _PhysDevAddr_struct {
   DWORD             TargetId:24;
   DWORD             Bus:6;
   DWORD             Mode:2;
-  SCSI3Addr_struct  Target[2]; //2 level target device addr
+  SCSI3Addr_struct  Target[2]; /* 2 level target device addr */
 } PhysDevAddr_struct;
   
 typedef struct _LogDevAddr_struct {
@@ -146,8 +147,8 @@ typedef union _MoreErrInfo_struct{
   }Common_Info;
   struct{
     BYTE  Reserved[2];
-    BYTE  offense_size;//size of offending entry
-    BYTE  offense_num; //byte # of offense 0-base
+    BYTE  offense_size; /* size of offending entry */
+    BYTE  offense_num;  /* byte # of offense 0-base */
     DWORD offense_value;
   }Invalid_Cmd;
 }MoreErrInfo_struct;
-- 
cgit v1.2.3


From 429c42c9d246f5bda868495c09974312a0177328 Mon Sep 17 00:00:00 2001
From: dann frazier <dannf@hp.com>
Date: Wed, 17 Feb 2010 16:55:11 -0700
Subject: cciss: Consolidate duplicate bits in cciss_cmd.h & cciss_ioctl.h

There are several duplicate definitions in cciss_cmd.h and cciss_ioctl.h.
Consolidate these into the new cciss_defs.h file. This patch doesn't change
the definitions exposed under include/linux, so userspace apps shouldn't
be affected.

Acked-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: dann frazier <dannf@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/cciss_cmd.h   | 113 +-------------------------------------
 include/linux/cciss_defs.h  | 130 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cciss_ioctl.h | 129 +------------------------------------------
 3 files changed, 134 insertions(+), 238 deletions(-)
 create mode 100644 include/linux/cciss_defs.h

(limited to 'include/linux')

diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 277422b7e060..25f97623bacf 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -1,30 +1,16 @@
 #ifndef CCISS_CMD_H
 #define CCISS_CMD_H
+
+#include <linux/cciss_defs.h>
+
 /* DEFINES */
 #define CISS_VERSION "1.00"
 
 /* general boundary definitions */
-#define SENSEINFOBYTES          32 /* note that this value may vary
-				      between host implementations */
 #define MAXSGENTRIES            32
 #define CCISS_SG_CHAIN          0x80000000
 #define MAXREPLYQS              256
 
-/* Command Status value */
-#define CMD_SUCCESS             0x0000
-#define CMD_TARGET_STATUS       0x0001
-#define CMD_DATA_UNDERRUN       0x0002
-#define CMD_DATA_OVERRUN        0x0003
-#define CMD_INVALID             0x0004
-#define CMD_PROTOCOL_ERR        0x0005
-#define CMD_HARDWARE_ERR        0x0006
-#define CMD_CONNECTION_LOST     0x0007
-#define CMD_ABORTED             0x0008
-#define CMD_ABORT_FAILED        0x0009
-#define CMD_UNSOLICITED_ABORT   0x000A
-#define CMD_TIMEOUT             0x000B
-#define CMD_UNABORTABLE		0x000C
-
 /* Unit Attentions ASC's as defined for the MSA2012sa */
 #define POWER_OR_RESET			0x29
 #define STATE_CHANGED			0x2a
@@ -48,23 +34,6 @@
 #define ASYM_ACCESS_CHANGED		0x06
 #define LUN_CAPACITY_CHANGED		0x09
 
-/* transfer direction */
-#define XFER_NONE               0x00
-#define XFER_WRITE              0x01
-#define XFER_READ               0x02
-#define XFER_RSVD               0x03
-
-/* task attribute */
-#define ATTR_UNTAGGED           0x00
-#define ATTR_SIMPLE             0x04
-#define ATTR_HEADOFQUEUE        0x05
-#define ATTR_ORDERED            0x06
-#define ATTR_ACA                0x07
-
-/* cdb type */
-#define TYPE_CMD				0x00
-#define TYPE_MSG				0x01
-
 /* config space register offsets */
 #define CFG_VENDORID            0x00
 #define CFG_DEVICEID            0x02
@@ -103,14 +72,9 @@ typedef union _u64bit
 } u64bit;
 
 /* Type defs used in the following structs */
-#define BYTE __u8
-#define WORD __u16
-#define HWORD __u16
-#define DWORD __u32
 #define QWORD vals32 
 
 /* STRUCTURES */
-#define CISS_MAX_LUN	1024
 #define CISS_MAX_PHYS_LUN	1024
 /* SCSI-3 Cmmands */
 
@@ -176,45 +140,6 @@ typedef struct _ReadCapdata_struct_16
 #define CCISS_CACHE_FLUSH 0x01	/* C2 was already being used by CCISS */
 
 /* Command List Structure */
-typedef union _SCSI3Addr_struct {
-   struct {
-    BYTE Dev;
-    BYTE Bus:6;
-    BYTE Mode:2;        /* b00 */
-  } PeripDev;
-   struct {
-    BYTE DevLSB;
-    BYTE DevMSB:6;
-    BYTE Mode:2;        /* b01 */
-  } LogDev;
-   struct {
-    BYTE Dev:5;
-    BYTE Bus:3;
-    BYTE Targ:6;
-    BYTE Mode:2;        /* b10 */
-  } LogUnit;
-} SCSI3Addr_struct;
-
-typedef struct _PhysDevAddr_struct {
-  DWORD             TargetId:24;
-  DWORD             Bus:6;
-  DWORD             Mode:2;
-  SCSI3Addr_struct  Target[2]; /* 2 level target device addr */
-} PhysDevAddr_struct;
-  
-typedef struct _LogDevAddr_struct {
-  DWORD            VolId:30;
-  DWORD            Mode:2;
-  BYTE             reserved[4];
-} LogDevAddr_struct;
-
-typedef union _LUNAddr_struct {
-  BYTE               LunAddrBytes[8];
-  SCSI3Addr_struct   SCSI3Lun[4];
-  PhysDevAddr_struct PhysDev;
-  LogDevAddr_struct  LogDev;
-} LUNAddr_struct;
-
 #define CTLR_LUNID "\0\0\0\0\0\0\0\0"
 
 typedef struct _CommandListHeader_struct {
@@ -224,16 +149,6 @@ typedef struct _CommandListHeader_struct {
   QWORD             Tag;
   LUNAddr_struct    LUN;
 } CommandListHeader_struct;
-typedef struct _RequestBlock_struct {
-  BYTE   CDBLen;
-  struct {
-    BYTE Type:3;
-    BYTE Attribute:3;
-    BYTE Direction:2;
-  } Type;
-  HWORD  Timeout;
-  BYTE   CDB[16];
-} RequestBlock_struct;
 typedef struct _ErrDescriptor_struct {
   QWORD  Addr;
   DWORD  Len;
@@ -244,28 +159,6 @@ typedef struct _SGDescriptor_struct {
   DWORD  Ext;
 } SGDescriptor_struct;
 
-typedef union _MoreErrInfo_struct{
-  struct {
-    BYTE  Reserved[3];
-    BYTE  Type;
-    DWORD ErrorInfo;
-  }Common_Info;
-  struct{
-    BYTE  Reserved[2];
-    BYTE  offense_size; /* size of offending entry */
-    BYTE  offense_num;  /* byte # of offense 0-base */
-    DWORD offense_value;
-  }Invalid_Cmd;
-}MoreErrInfo_struct;
-typedef struct _ErrorInfo_struct {
-  BYTE               ScsiStatus;
-  BYTE               SenseLen;
-  HWORD              CommandStatus;
-  DWORD              ResidualCnt;
-  MoreErrInfo_struct MoreErrInfo;
-  BYTE               SenseInfo[SENSEINFOBYTES];
-} ErrorInfo_struct;
-
 /* Command types */
 #define CMD_RWREQ       0x00
 #define CMD_IOCTL_PEND  0x01
diff --git a/include/linux/cciss_defs.h b/include/linux/cciss_defs.h
new file mode 100644
index 000000000000..316b670d4e33
--- /dev/null
+++ b/include/linux/cciss_defs.h
@@ -0,0 +1,130 @@
+#ifndef CCISS_DEFS_H
+#define CCISS_DEFS_H
+
+#include <linux/types.h>
+
+/* general boundary definitions */
+#define SENSEINFOBYTES          32 /* note that this value may vary
+				      between host implementations */
+
+/* Command Status value */
+#define CMD_SUCCESS             0x0000
+#define CMD_TARGET_STATUS       0x0001
+#define CMD_DATA_UNDERRUN       0x0002
+#define CMD_DATA_OVERRUN        0x0003
+#define CMD_INVALID             0x0004
+#define CMD_PROTOCOL_ERR        0x0005
+#define CMD_HARDWARE_ERR        0x0006
+#define CMD_CONNECTION_LOST     0x0007
+#define CMD_ABORTED             0x0008
+#define CMD_ABORT_FAILED        0x0009
+#define CMD_UNSOLICITED_ABORT   0x000A
+#define CMD_TIMEOUT             0x000B
+#define CMD_UNABORTABLE		0x000C
+
+/* transfer direction */
+#define XFER_NONE               0x00
+#define XFER_WRITE              0x01
+#define XFER_READ               0x02
+#define XFER_RSVD               0x03
+
+/* task attribute */
+#define ATTR_UNTAGGED           0x00
+#define ATTR_SIMPLE             0x04
+#define ATTR_HEADOFQUEUE        0x05
+#define ATTR_ORDERED            0x06
+#define ATTR_ACA                0x07
+
+/* cdb type */
+#define TYPE_CMD				0x00
+#define TYPE_MSG				0x01
+
+/* Type defs used in the following structs */
+#define BYTE __u8
+#define WORD __u16
+#define HWORD __u16
+#define DWORD __u32
+
+#define CISS_MAX_LUN	1024
+
+#define LEVEL2LUN   1 /* index into Target(x) structure, due to byte swapping */
+#define LEVEL3LUN   0
+
+#pragma pack(1)
+
+/* Command List Structure */
+typedef union _SCSI3Addr_struct {
+   struct {
+    BYTE Dev;
+    BYTE Bus:6;
+    BYTE Mode:2;        /* b00 */
+  } PeripDev;
+   struct {
+    BYTE DevLSB;
+    BYTE DevMSB:6;
+    BYTE Mode:2;        /* b01 */
+  } LogDev;
+   struct {
+    BYTE Dev:5;
+    BYTE Bus:3;
+    BYTE Targ:6;
+    BYTE Mode:2;        /* b10 */
+  } LogUnit;
+} SCSI3Addr_struct;
+
+typedef struct _PhysDevAddr_struct {
+  DWORD             TargetId:24;
+  DWORD             Bus:6;
+  DWORD             Mode:2;
+  SCSI3Addr_struct  Target[2]; /* 2 level target device addr */
+} PhysDevAddr_struct;
+
+typedef struct _LogDevAddr_struct {
+  DWORD            VolId:30;
+  DWORD            Mode:2;
+  BYTE             reserved[4];
+} LogDevAddr_struct;
+
+typedef union _LUNAddr_struct {
+  BYTE               LunAddrBytes[8];
+  SCSI3Addr_struct   SCSI3Lun[4];
+  PhysDevAddr_struct PhysDev;
+  LogDevAddr_struct  LogDev;
+} LUNAddr_struct;
+
+typedef struct _RequestBlock_struct {
+  BYTE   CDBLen;
+  struct {
+    BYTE Type:3;
+    BYTE Attribute:3;
+    BYTE Direction:2;
+  } Type;
+  HWORD  Timeout;
+  BYTE   CDB[16];
+} RequestBlock_struct;
+
+typedef union _MoreErrInfo_struct{
+  struct {
+    BYTE  Reserved[3];
+    BYTE  Type;
+    DWORD ErrorInfo;
+  } Common_Info;
+  struct{
+    BYTE  Reserved[2];
+    BYTE  offense_size; /* size of offending entry */
+    BYTE  offense_num;  /* byte # of offense 0-base */
+    DWORD offense_value;
+  } Invalid_Cmd;
+} MoreErrInfo_struct;
+typedef struct _ErrorInfo_struct {
+  BYTE               ScsiStatus;
+  BYTE               SenseLen;
+  HWORD              CommandStatus;
+  DWORD              ResidualCnt;
+  MoreErrInfo_struct MoreErrInfo;
+  BYTE               SenseInfo[SENSEINFOBYTES];
+} ErrorInfo_struct;
+
+#pragma pack()
+
+#endif /* CCISS_DEFS_H */
diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h
index e7343a1a9ea8..986493f5b92b 100644
--- a/include/linux/cciss_ioctl.h
+++ b/include/linux/cciss_ioctl.h
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/ioctl.h>
+#include <linux/cciss_defs.h>
 
 #define CCISS_IOC_MAGIC 'B'
 
@@ -36,134 +37,6 @@ typedef __u32 DriverVer_type;
 
 #define MAX_KMALLOC_SIZE 128000
 
-#ifndef CCISS_CMD_H
-/* This defines are duplicated in cciss_cmd.h in the driver directory */
-
-/* general boundary definitions */
-#define SENSEINFOBYTES          32 /* note that this value may vary
-				      between host implementations */
-
-/* Command Status value */
-#define CMD_SUCCESS             0x0000
-#define CMD_TARGET_STATUS       0x0001
-#define CMD_DATA_UNDERRUN       0x0002
-#define CMD_DATA_OVERRUN        0x0003
-#define CMD_INVALID             0x0004
-#define CMD_PROTOCOL_ERR        0x0005
-#define CMD_HARDWARE_ERR        0x0006
-#define CMD_CONNECTION_LOST     0x0007
-#define CMD_ABORTED             0x0008
-#define CMD_ABORT_FAILED        0x0009
-#define CMD_UNSOLICITED_ABORT   0x000A
-#define CMD_TIMEOUT             0x000B
-#define CMD_UNABORTABLE		0x000C
-
-/* transfer direction */
-#define XFER_NONE               0x00
-#define XFER_WRITE              0x01
-#define XFER_READ               0x02
-#define XFER_RSVD               0x03
-
-/* task attribute */
-#define ATTR_UNTAGGED           0x00
-#define ATTR_SIMPLE             0x04
-#define ATTR_HEADOFQUEUE        0x05
-#define ATTR_ORDERED            0x06
-#define ATTR_ACA                0x07
-
-/* cdb type */
-#define TYPE_CMD				0x00
-#define TYPE_MSG				0x01
-
-/* Type defs used in the following structs */
-#define BYTE __u8
-#define WORD __u16
-#define HWORD __u16
-#define DWORD __u32
-
-#define CISS_MAX_LUN	1024
-
-#define LEVEL2LUN   1 /* index into Target(x) structure, due to byte swapping */
-#define LEVEL3LUN   0
-
-#pragma pack(1)
-
-/* Command List Structure */
-typedef union _SCSI3Addr_struct {
-   struct {
-    BYTE Dev;
-    BYTE Bus:6;
-    BYTE Mode:2;        /* b00 */
-  } PeripDev;
-   struct {
-    BYTE DevLSB;
-    BYTE DevMSB:6;
-    BYTE Mode:2;        /* b01 */
-  } LogDev;
-   struct {
-    BYTE Dev:5;
-    BYTE Bus:3;
-    BYTE Targ:6;
-    BYTE Mode:2;        /* b10 */
-  } LogUnit;
-} SCSI3Addr_struct;
-
-typedef struct _PhysDevAddr_struct {
-  DWORD             TargetId:24;
-  DWORD             Bus:6;
-  DWORD             Mode:2;
-  SCSI3Addr_struct  Target[2]; /* 2 level target device addr */
-} PhysDevAddr_struct;
-  
-typedef struct _LogDevAddr_struct {
-  DWORD            VolId:30;
-  DWORD            Mode:2;
-  BYTE             reserved[4];
-} LogDevAddr_struct;
-
-typedef union _LUNAddr_struct {
-  BYTE               LunAddrBytes[8];
-  SCSI3Addr_struct   SCSI3Lun[4];
-  PhysDevAddr_struct PhysDev;
-  LogDevAddr_struct  LogDev;
-} LUNAddr_struct;
-
-typedef struct _RequestBlock_struct {
-  BYTE   CDBLen;
-  struct {
-    BYTE Type:3;
-    BYTE Attribute:3;
-    BYTE Direction:2;
-  } Type;
-  HWORD  Timeout;
-  BYTE   CDB[16];
-} RequestBlock_struct;
-
-typedef union _MoreErrInfo_struct{
-  struct {
-    BYTE  Reserved[3];
-    BYTE  Type;
-    DWORD ErrorInfo;
-  }Common_Info;
-  struct{
-    BYTE  Reserved[2];
-    BYTE  offense_size; /* size of offending entry */
-    BYTE  offense_num;  /* byte # of offense 0-base */
-    DWORD offense_value;
-  }Invalid_Cmd;
-}MoreErrInfo_struct;
-typedef struct _ErrorInfo_struct {
-  BYTE               ScsiStatus;
-  BYTE               SenseLen;
-  HWORD              CommandStatus;
-  DWORD              ResidualCnt;
-  MoreErrInfo_struct MoreErrInfo;
-  BYTE               SenseInfo[SENSEINFOBYTES];
-} ErrorInfo_struct;
-
-#pragma pack()
-#endif /* CCISS_CMD_H */ 
-
 typedef struct _IOCTL_Command_struct {
   LUNAddr_struct	   LUN_info;
   RequestBlock_struct      Request;
-- 
cgit v1.2.3


From 1cc523271ef0b6305c565a143e3d48f6fff826dd Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 22 Feb 2010 07:57:17 +0000
Subject: seq_file: add RCU versions of new hlist/list iterators (v3)

Many usages of seq_file use RCU protected lists, so non RCU
iterators will not work safely.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/seq_file.c            | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rculist.h  |  5 ++++
 include/linux/seq_file.h | 15 +++++++---
 3 files changed, 87 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index f65b16f02da3..5afd554efad3 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -750,3 +750,74 @@ struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head,
 		return node->next;
 }
 EXPORT_SYMBOL(seq_hlist_next);
+
+/**
+ * seq_hlist_start_rcu - start an iteration of a hlist protected by RCU
+ * @head: the head of the hlist
+ * @pos:  the start position of the sequence
+ *
+ * Called at seq_file->op->start().
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head,
+				       loff_t pos)
+{
+	struct hlist_node *node;
+
+	__hlist_for_each_rcu(node, head)
+		if (pos-- == 0)
+			return node;
+	return NULL;
+}
+EXPORT_SYMBOL(seq_hlist_start_rcu);
+
+/**
+ * seq_hlist_start_head_rcu - start an iteration of a hlist protected by RCU
+ * @head: the head of the hlist
+ * @pos:  the start position of the sequence
+ *
+ * Called at seq_file->op->start(). Call this function if you want to
+ * print a header at the top of the output.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head,
+					    loff_t pos)
+{
+	if (!pos)
+		return SEQ_START_TOKEN;
+
+	return seq_hlist_start_rcu(head, pos - 1);
+}
+EXPORT_SYMBOL(seq_hlist_start_head_rcu);
+
+/**
+ * seq_hlist_next_rcu - move to the next position of the hlist protected by RCU
+ * @v:    the current iterator
+ * @head: the head of the hlist
+ * @pos:  the current posision
+ *
+ * Called at seq_file->op->next().
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+struct hlist_node *seq_hlist_next_rcu(void *v,
+				      struct hlist_head *head,
+				      loff_t *ppos)
+{
+	struct hlist_node *node = v;
+
+	++*ppos;
+	if (v == SEQ_START_TOKEN)
+		return rcu_dereference(head->first);
+	else
+		return rcu_dereference(node->next);
+}
+EXPORT_SYMBOL(seq_hlist_next_rcu);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 1bf0f708c4fc..701fe9cb552a 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -406,6 +406,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		n->next->pprev = &n->next;
 }
 
+#define __hlist_for_each_rcu(pos, head)			\
+	for (pos = rcu_dereference((head)->first);	\
+	     pos && ({ prefetch(pos->next); 1; });	\
+	     pos = rcu_dereference(pos->next))
+
 /**
  * hlist_for_each_entry_rcu - iterate over rcu list of given type
  * @tpos:	the type * to use as a loop cursor.
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index c95bcdc18f4c..03c0232b4169 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -140,10 +140,17 @@ extern struct list_head *seq_list_next(void *v, struct list_head *head,
  */
 
 extern struct hlist_node *seq_hlist_start(struct hlist_head *head,
-		loff_t pos);
+					  loff_t pos);
 extern struct hlist_node *seq_hlist_start_head(struct hlist_head *head,
-		loff_t pos);
+					       loff_t pos);
 extern struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head,
-		loff_t *ppos);
-
+					 loff_t *ppos);
+
+extern struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head,
+					      loff_t pos);
+extern struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head,
+						   loff_t pos);
+extern struct hlist_node *seq_hlist_next_rcu(void *v,
+						   struct hlist_head *head,
+						   loff_t *ppos);
 #endif
-- 
cgit v1.2.3


From 536c8cb49eccd4f753b4782e7e975ef87359cb44 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Sun, 13 Dec 2009 08:11:31 -0500
Subject: PCI: Unify pcie_link_speed and pci_bus_speed

These enums must not overlap anyway, since we only have a single
pci_bus_speed_strings array.  Use a single enum, and move it to
pci.h.  Add 'SPEED' to the pcie names to make it clear what they are.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp_hpc.c | 16 ++++++++--------
 include/linux/pci.h              | 21 +++++++++++++++++++++
 include/linux/pci_hotplug.h      | 26 --------------------------
 3 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 10040d58c8ef..6744ca1d8d01 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -613,7 +613,7 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value)
 {
 	struct controller *ctrl = slot->ctrl;
-	enum pcie_link_speed lnk_speed;
+	enum pci_bus_speed lnk_speed;
 	u32	lnk_cap;
 	int retval = 0;
 
@@ -625,13 +625,13 @@ int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	switch (lnk_cap & 0x000F) {
 	case 1:
-		lnk_speed = PCIE_2_5GB;
+		lnk_speed = PCIE_SPEED_2_5GT;
 		break;
 	case 2:
-		lnk_speed = PCIE_5_0GB;
+		lnk_speed = PCIE_SPEED_5_0GT;
 		break;
 	default:
-		lnk_speed = PCIE_LNK_SPEED_UNKNOWN;
+		lnk_speed = PCI_SPEED_UNKNOWN;
 		break;
 	}
 
@@ -694,7 +694,7 @@ int pciehp_get_max_lnk_width(struct slot *slot,
 int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value)
 {
 	struct controller *ctrl = slot->ctrl;
-	enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN;
+	enum pci_bus_speed lnk_speed = PCI_SPEED_UNKNOWN;
 	int retval = 0;
 	u16 lnk_status;
 
@@ -707,13 +707,13 @@ int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	switch (lnk_status & PCI_EXP_LNKSTA_CLS) {
 	case 1:
-		lnk_speed = PCIE_2_5GB;
+		lnk_speed = PCIE_SPEED_2_5GT;
 		break;
 	case 2:
-		lnk_speed = PCIE_5_0GB;
+		lnk_speed = PCIE_SPEED_5_0GT;
 		break;
 	default:
-		lnk_speed = PCIE_LNK_SPEED_UNKNOWN;
+		lnk_speed = PCI_SPEED_UNKNOWN;
 		break;
 	}
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c1968f464c38..d76a8a0b6b53 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -187,6 +187,27 @@ enum pci_bus_flags {
 	PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2,
 };
 
+/* Based on the PCI Hotplug Spec, but some values are made up by us */
+enum pci_bus_speed {
+	PCI_SPEED_33MHz			= 0x00,
+	PCI_SPEED_66MHz			= 0x01,
+	PCI_SPEED_66MHz_PCIX		= 0x02,
+	PCI_SPEED_100MHz_PCIX		= 0x03,
+	PCI_SPEED_133MHz_PCIX		= 0x04,
+	PCI_SPEED_66MHz_PCIX_ECC	= 0x05,
+	PCI_SPEED_100MHz_PCIX_ECC	= 0x06,
+	PCI_SPEED_133MHz_PCIX_ECC	= 0x07,
+	PCI_SPEED_66MHz_PCIX_266	= 0x09,
+	PCI_SPEED_100MHz_PCIX_266	= 0x0a,
+	PCI_SPEED_133MHz_PCIX_266	= 0x0b,
+	PCI_SPEED_66MHz_PCIX_533	= 0x11,
+	PCI_SPEED_100MHz_PCIX_533	= 0x12,
+	PCI_SPEED_133MHz_PCIX_533	= 0x13,
+	PCIE_SPEED_2_5GT		= 0x14,
+	PCIE_SPEED_5_0GT		= 0x15,
+	PCI_SPEED_UNKNOWN		= 0xff,
+};
+
 struct pci_cap_saved_state {
 	struct hlist_node next;
 	char cap_nr;
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 652ba797696d..56251ac385e9 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -28,26 +28,6 @@
 #ifndef _PCI_HOTPLUG_H
 #define _PCI_HOTPLUG_H
 
-
-/* These values come from the PCI Hotplug Spec */
-enum pci_bus_speed {
-	PCI_SPEED_33MHz			= 0x00,
-	PCI_SPEED_66MHz			= 0x01,
-	PCI_SPEED_66MHz_PCIX		= 0x02,
-	PCI_SPEED_100MHz_PCIX		= 0x03,
-	PCI_SPEED_133MHz_PCIX		= 0x04,
-	PCI_SPEED_66MHz_PCIX_ECC	= 0x05,
-	PCI_SPEED_100MHz_PCIX_ECC	= 0x06,
-	PCI_SPEED_133MHz_PCIX_ECC	= 0x07,
-	PCI_SPEED_66MHz_PCIX_266	= 0x09,
-	PCI_SPEED_100MHz_PCIX_266	= 0x0a,
-	PCI_SPEED_133MHz_PCIX_266	= 0x0b,
-	PCI_SPEED_66MHz_PCIX_533	= 0x11,
-	PCI_SPEED_100MHz_PCIX_533	= 0x12,
-	PCI_SPEED_133MHz_PCIX_533	= 0x13,
-	PCI_SPEED_UNKNOWN		= 0xff,
-};
-
 /* These values come from the PCI Express Spec */
 enum pcie_link_width {
 	PCIE_LNK_WIDTH_RESRV	= 0x00,
@@ -61,12 +41,6 @@ enum pcie_link_width {
 	PCIE_LNK_WIDTH_UNKNOWN  = 0xFF,
 };
 
-enum pcie_link_speed {
-	PCIE_2_5GB		= 0x14,
-	PCIE_5_0GB		= 0x15,
-	PCIE_LNK_SPEED_UNKNOWN	= 0xFF,
-};
-
 /**
  * struct hotplug_slot_ops -the callbacks that the hotplug pci core can use
  * @owner: The module owner of this structure
-- 
cgit v1.2.3


From 3749c51ac6c1560aa1cb1520066bed84c6f8152a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Sun, 13 Dec 2009 08:11:32 -0500
Subject: PCI: Make current and maximum bus speeds part of the PCI core

Move the max_bus_speed and cur_bus_speed into the pci_bus.  Expose the
values through the PCI slot driver instead of the hotplug slot driver.
Update all the hotplug drivers to use the pci_bus instead of their own
data structures.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpiphp_core.c     |   2 -
 drivers/pci/hotplug/cpqphp.h           |   2 -
 drivers/pci/hotplug/cpqphp_core.c      |  57 ++++---------
 drivers/pci/hotplug/cpqphp_ctrl.c      |  27 +++---
 drivers/pci/hotplug/ibmphp_core.c      | 106 +++++++----------------
 drivers/pci/hotplug/pci_hotplug_core.c | 132 -----------------------------
 drivers/pci/hotplug/pciehp_core.c      |  25 ------
 drivers/pci/hotplug/pciehp_hpc.c       |  72 ++--------------
 drivers/pci/hotplug/rpaphp_core.c      |  23 ++---
 drivers/pci/hotplug/shpchp.h           |   2 -
 drivers/pci/hotplug/shpchp_core.c      |  35 --------
 drivers/pci/hotplug/shpchp_ctrl.c      |  13 +--
 drivers/pci/hotplug/shpchp_hpc.c       | 149 +++++++++++++++++----------------
 drivers/pci/probe.c                    |  27 ++++++
 drivers/pci/slot.c                     |  54 ++++++++++++
 include/linux/pci.h                    |   3 +
 include/linux/pci_hotplug.h            |  15 +---
 17 files changed, 246 insertions(+), 498 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index 4dd7114964ac..efa9f2de51c1 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -332,8 +332,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
 	slot->hotplug_slot->info->attention_status = 0;
 	slot->hotplug_slot->info->latch_status = acpiphp_get_latch_status(slot->acpi_slot);
 	slot->hotplug_slot->info->adapter_status = acpiphp_get_adapter_status(slot->acpi_slot);
-	slot->hotplug_slot->info->max_bus_speed = PCI_SPEED_UNKNOWN;
-	slot->hotplug_slot->info->cur_bus_speed = PCI_SPEED_UNKNOWN;
 
 	acpiphp_slot->slot = slot;
 	snprintf(name, SLOT_NAME_SIZE, "%llu", slot->acpi_slot->sun);
diff --git a/drivers/pci/hotplug/cpqphp.h b/drivers/pci/hotplug/cpqphp.h
index 9c6a9fd26812..d8ffc7366801 100644
--- a/drivers/pci/hotplug/cpqphp.h
+++ b/drivers/pci/hotplug/cpqphp.h
@@ -310,8 +310,6 @@ struct controller {
 	u8 first_slot;
 	u8 add_support;
 	u8 push_flag;
-	enum pci_bus_speed speed;
-	enum pci_bus_speed speed_capability;
 	u8 push_button;			/* 0 = no pushbutton, 1 = pushbutton present */
 	u8 slot_switch_type;		/* 0 = no switch, 1 = switch present */
 	u8 defeature_PHP;		/* 0 = PHP not supported, 1 = PHP supported */
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 075b4f4b6e0d..f184d1d2ecbe 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -583,30 +583,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-static int get_max_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
-{
-	struct slot *slot = hotplug_slot->private;
-	struct controller *ctrl = slot->ctrl;
-
-	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
-
-	*value = ctrl->speed_capability;
-
-	return 0;
-}
-
-static int get_cur_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
-{
-	struct slot *slot = hotplug_slot->private;
-	struct controller *ctrl = slot->ctrl;
-
-	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
-
-	*value = ctrl->speed;
-
-	return 0;
-}
-
 static struct hotplug_slot_ops cpqphp_hotplug_slot_ops = {
 	.set_attention_status =	set_attention_status,
 	.enable_slot =		process_SI,
@@ -616,8 +592,6 @@ static struct hotplug_slot_ops cpqphp_hotplug_slot_ops = {
 	.get_attention_status =	get_attention_status,
 	.get_latch_status =	get_latch_status,
 	.get_adapter_status =	get_adapter_status,
-	.get_max_bus_speed =	get_max_bus_speed,
-	.get_cur_bus_speed =	get_cur_bus_speed,
 };
 
 #define SLOT_NAME_SIZE 10
@@ -629,6 +603,7 @@ static int ctrl_slot_setup(struct controller *ctrl,
 	struct slot *slot;
 	struct hotplug_slot *hotplug_slot;
 	struct hotplug_slot_info *hotplug_slot_info;
+	struct pci_bus *bus = ctrl->pci_bus;
 	u8 number_of_slots;
 	u8 slot_device;
 	u8 slot_number;
@@ -694,7 +669,7 @@ static int ctrl_slot_setup(struct controller *ctrl,
 			slot->capabilities |= PCISLOT_64_BIT_SUPPORTED;
 		if (is_slot66mhz(slot))
 			slot->capabilities |= PCISLOT_66_MHZ_SUPPORTED;
-		if (ctrl->speed == PCI_SPEED_66MHz)
+		if (bus->cur_bus_speed == PCI_SPEED_66MHz)
 			slot->capabilities |= PCISLOT_66_MHZ_OPERATION;
 
 		ctrl_slot =
@@ -844,6 +819,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	u32 rc;
 	struct controller *ctrl;
 	struct pci_func *func;
+	struct pci_bus *bus;
 	int err;
 
 	err = pci_enable_device(pdev);
@@ -852,6 +828,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			pci_name(pdev), err);
 		return err;
 	}
+	bus = pdev->subordinate;
 
 	/* Need to read VID early b/c it's used to differentiate CPQ and INTC
 	 * discovery
@@ -929,22 +906,22 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			pci_read_config_byte(pdev, 0x41, &bus_cap);
 			if (bus_cap & 0x80) {
 				dbg("bus max supports 133MHz PCI-X\n");
-				ctrl->speed_capability = PCI_SPEED_133MHz_PCIX;
+				bus->max_bus_speed = PCI_SPEED_133MHz_PCIX;
 				break;
 			}
 			if (bus_cap & 0x40) {
 				dbg("bus max supports 100MHz PCI-X\n");
-				ctrl->speed_capability = PCI_SPEED_100MHz_PCIX;
+				bus->max_bus_speed = PCI_SPEED_100MHz_PCIX;
 				break;
 			}
 			if (bus_cap & 20) {
 				dbg("bus max supports 66MHz PCI-X\n");
-				ctrl->speed_capability = PCI_SPEED_66MHz_PCIX;
+				bus->max_bus_speed = PCI_SPEED_66MHz_PCIX;
 				break;
 			}
 			if (bus_cap & 10) {
 				dbg("bus max supports 66MHz PCI\n");
-				ctrl->speed_capability = PCI_SPEED_66MHz;
+				bus->max_bus_speed = PCI_SPEED_66MHz;
 				break;
 			}
 
@@ -955,7 +932,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		case PCI_SUB_HPC_ID:
 			/* Original 6500/7000 implementation */
 			ctrl->slot_switch_type = 1;
-			ctrl->speed_capability = PCI_SPEED_33MHz;
+			bus->max_bus_speed = PCI_SPEED_33MHz;
 			ctrl->push_button = 0;
 			ctrl->pci_config_space = 1;
 			ctrl->defeature_PHP = 1;
@@ -966,7 +943,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			/* First Pushbutton implementation */
 			ctrl->push_flag = 1;
 			ctrl->slot_switch_type = 1;
-			ctrl->speed_capability = PCI_SPEED_33MHz;
+			bus->max_bus_speed = PCI_SPEED_33MHz;
 			ctrl->push_button = 1;
 			ctrl->pci_config_space = 1;
 			ctrl->defeature_PHP = 1;
@@ -976,7 +953,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		case PCI_SUB_HPC_ID_INTC:
 			/* Third party (6500/7000) */
 			ctrl->slot_switch_type = 1;
-			ctrl->speed_capability = PCI_SPEED_33MHz;
+			bus->max_bus_speed = PCI_SPEED_33MHz;
 			ctrl->push_button = 0;
 			ctrl->pci_config_space = 1;
 			ctrl->defeature_PHP = 1;
@@ -987,7 +964,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			/* First 66 Mhz implementation */
 			ctrl->push_flag = 1;
 			ctrl->slot_switch_type = 1;
-			ctrl->speed_capability = PCI_SPEED_66MHz;
+			bus->max_bus_speed = PCI_SPEED_66MHz;
 			ctrl->push_button = 1;
 			ctrl->pci_config_space = 1;
 			ctrl->defeature_PHP = 1;
@@ -998,7 +975,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			/* First PCI-X implementation, 100MHz */
 			ctrl->push_flag = 1;
 			ctrl->slot_switch_type = 1;
-			ctrl->speed_capability = PCI_SPEED_100MHz_PCIX;
+			bus->max_bus_speed = PCI_SPEED_100MHz_PCIX;
 			ctrl->push_button = 1;
 			ctrl->pci_config_space = 1;
 			ctrl->defeature_PHP = 1;
@@ -1015,9 +992,9 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	case PCI_VENDOR_ID_INTEL:
 		/* Check for speed capability (0=33, 1=66) */
 		if (subsystem_deviceid & 0x0001)
-			ctrl->speed_capability = PCI_SPEED_66MHz;
+			bus->max_bus_speed = PCI_SPEED_66MHz;
 		else
-			ctrl->speed_capability = PCI_SPEED_33MHz;
+			bus->max_bus_speed = PCI_SPEED_33MHz;
 
 		/* Check for push button */
 		if (subsystem_deviceid & 0x0002)
@@ -1079,7 +1056,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 					pdev->bus->number);
 
 	dbg("Hotplug controller capabilities:\n");
-	dbg("    speed_capability       %d\n", ctrl->speed_capability);
+	dbg("    speed_capability       %d\n", bus->max_bus_speed);
 	dbg("    slot_switch_type       %s\n", ctrl->slot_switch_type ?
 					"switch present" : "no switch");
 	dbg("    defeature_PHP          %s\n", ctrl->defeature_PHP ?
@@ -1142,7 +1119,7 @@ static int cpqhpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	/* Check for 66Mhz operation */
-	ctrl->speed = get_controller_speed(ctrl);
+	bus->cur_bus_speed = get_controller_speed(ctrl);
 
 
 	/********************************************************
diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index 0ff689afa757..e43908d9b5df 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -1130,12 +1130,13 @@ static int is_bridge(struct pci_func * func)
 static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_slot)
 {
 	struct slot *slot;
+	struct pci_bus *bus = ctrl->pci_bus;
 	u8 reg;
 	u8 slot_power = readb(ctrl->hpc_reg + SLOT_POWER);
 	u16 reg16;
 	u32 leds = readl(ctrl->hpc_reg + LED_CONTROL);
 
-	if (ctrl->speed == adapter_speed)
+	if (bus->cur_bus_speed == adapter_speed)
 		return 0;
 
 	/* We don't allow freq/mode changes if we find another adapter running
@@ -1152,7 +1153,7 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_
 		 * lower speed/mode, we allow the new adapter to function at
 		 * this rate if supported
 		 */
-		if (ctrl->speed < adapter_speed)
+		if (bus->cur_bus_speed < adapter_speed)
 			return 0;
 
 		return 1;
@@ -1161,20 +1162,20 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_
 	/* If the controller doesn't support freq/mode changes and the
 	 * controller is running at a higher mode, we bail
 	 */
-	if ((ctrl->speed > adapter_speed) && (!ctrl->pcix_speed_capability))
+	if ((bus->cur_bus_speed > adapter_speed) && (!ctrl->pcix_speed_capability))
 		return 1;
 
 	/* But we allow the adapter to run at a lower rate if possible */
-	if ((ctrl->speed < adapter_speed) && (!ctrl->pcix_speed_capability))
+	if ((bus->cur_bus_speed < adapter_speed) && (!ctrl->pcix_speed_capability))
 		return 0;
 
 	/* We try to set the max speed supported by both the adapter and
 	 * controller
 	 */
-	if (ctrl->speed_capability < adapter_speed) {
-		if (ctrl->speed == ctrl->speed_capability)
+	if (bus->max_bus_speed < adapter_speed) {
+		if (bus->cur_bus_speed == bus->max_bus_speed)
 			return 0;
-		adapter_speed = ctrl->speed_capability;
+		adapter_speed = bus->max_bus_speed;
 	}
 
 	writel(0x0L, ctrl->hpc_reg + LED_CONTROL);
@@ -1229,8 +1230,8 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_
 	pci_write_config_byte(ctrl->pci_dev, 0x43, reg);
 
 	/* Only if mode change...*/
-	if (((ctrl->speed == PCI_SPEED_66MHz) && (adapter_speed == PCI_SPEED_66MHz_PCIX)) ||
-		((ctrl->speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz))) 
+	if (((bus->cur_bus_speed == PCI_SPEED_66MHz) && (adapter_speed == PCI_SPEED_66MHz_PCIX)) ||
+		((bus->cur_bus_speed == PCI_SPEED_66MHz_PCIX) && (adapter_speed == PCI_SPEED_66MHz))) 
 			set_SOGO(ctrl);
 
 	wait_for_ctrl_irq(ctrl);
@@ -1243,7 +1244,7 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_
 	set_SOGO(ctrl);
 	wait_for_ctrl_irq(ctrl);
 
-	ctrl->speed = adapter_speed;
+	bus->cur_bus_speed = adapter_speed;
 	slot = cpqhp_find_slot(ctrl, hp_slot + ctrl->slot_device_offset);
 
 	info("Successfully changed frequency/mode for adapter in slot %d\n",
@@ -1269,6 +1270,7 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_
  */
 static u32 board_replaced(struct pci_func *func, struct controller *ctrl)
 {
+	struct pci_bus *bus = ctrl->pci_bus;
 	u8 hp_slot;
 	u8 temp_byte;
 	u8 adapter_speed;
@@ -1309,7 +1311,7 @@ static u32 board_replaced(struct pci_func *func, struct controller *ctrl)
 		wait_for_ctrl_irq (ctrl);
 
 		adapter_speed = get_adapter_speed(ctrl, hp_slot);
-		if (ctrl->speed != adapter_speed)
+		if (bus->cur_bus_speed != adapter_speed)
 			if (set_controller_speed(ctrl, adapter_speed, hp_slot))
 				rc = WRONG_BUS_FREQUENCY;
 
@@ -1426,6 +1428,7 @@ static u32 board_added(struct pci_func *func, struct controller *ctrl)
 	u32 temp_register = 0xFFFFFFFF;
 	u32 rc = 0;
 	struct pci_func *new_slot = NULL;
+	struct pci_bus *bus = ctrl->pci_bus;
 	struct slot *p_slot;
 	struct resource_lists res_lists;
 
@@ -1456,7 +1459,7 @@ static u32 board_added(struct pci_func *func, struct controller *ctrl)
 	wait_for_ctrl_irq (ctrl);
 
 	adapter_speed = get_adapter_speed(ctrl, hp_slot);
-	if (ctrl->speed != adapter_speed)
+	if (bus->cur_bus_speed != adapter_speed)
 		if (set_controller_speed(ctrl, adapter_speed, hp_slot))
 			rc = WRONG_BUS_FREQUENCY;
 
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 7485ffda950c..d934dd4fa873 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -395,89 +395,40 @@ static int get_adapter_present(struct hotplug_slot *hotplug_slot, u8 * value)
 	return rc;
 }
 
-static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
+static int get_max_bus_speed(struct slot *slot)
 {
-	int rc = -ENODEV;
-	struct slot *pslot;
+	int rc;
 	u8 mode = 0;
+	enum pci_bus_speed speed;
+	struct pci_bus *bus = slot->hotplug_slot->pci_slot->bus;
 
-	debug("%s - Entry hotplug_slot[%p] pvalue[%p]\n", __func__,
-		hotplug_slot, value);
+	debug("%s - Entry slot[%p]\n", __func__, slot);
 
 	ibmphp_lock_operations();
-
-	if (hotplug_slot) {
-		pslot = hotplug_slot->private;
-		if (pslot) {
-			rc = 0;
-			mode = pslot->supported_bus_mode;
-			*value = pslot->supported_speed; 
-			switch (*value) {
-			case BUS_SPEED_33:
-				break;
-			case BUS_SPEED_66:
-				if (mode == BUS_MODE_PCIX) 
-					*value += 0x01;
-				break;
-			case BUS_SPEED_100:
-			case BUS_SPEED_133:
-				*value = pslot->supported_speed + 0x01;
-				break;
-			default:
-				/* Note (will need to change): there would be soon 256, 512 also */
-				rc = -ENODEV;
-			}
-		}
-	}
-
+	mode = slot->supported_bus_mode;
+	speed = slot->supported_speed; 
 	ibmphp_unlock_operations();
-	debug("%s - Exit rc[%d] value[%x]\n", __func__, rc, *value);
-	return rc;
-}
 
-static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
-{
-	int rc = -ENODEV;
-	struct slot *pslot;
-	u8 mode = 0;
-
-	debug("%s - Entry hotplug_slot[%p] pvalue[%p]\n", __func__,
-		hotplug_slot, value);
-
-	ibmphp_lock_operations();
-
-	if (hotplug_slot) {
-		pslot = hotplug_slot->private;
-		if (pslot) {
-			rc = get_cur_bus_info(&pslot);
-			if (!rc) {
-				mode = pslot->bus_on->current_bus_mode;
-				*value = pslot->bus_on->current_speed;
-				switch (*value) {
-				case BUS_SPEED_33:
-					break;
-				case BUS_SPEED_66:
-					if (mode == BUS_MODE_PCIX) 
-						*value += 0x01;
-					else if (mode == BUS_MODE_PCI)
-						;
-					else
-						*value = PCI_SPEED_UNKNOWN;
-					break;
-				case BUS_SPEED_100:
-				case BUS_SPEED_133:
-					*value += 0x01;
-					break;
-				default:
-					/* Note of change: there would also be 256, 512 soon */
-					rc = -ENODEV;
-				}
-			}
-		}
+	switch (speed) {
+	case BUS_SPEED_33:
+		break;
+	case BUS_SPEED_66:
+		if (mode == BUS_MODE_PCIX) 
+			speed += 0x01;
+		break;
+	case BUS_SPEED_100:
+	case BUS_SPEED_133:
+		speed += 0x01;
+		break;
+	default:
+		/* Note (will need to change): there would be soon 256, 512 also */
+		rc = -ENODEV;
 	}
 
-	ibmphp_unlock_operations();
-	debug("%s - Exit rc[%d] value[%x]\n", __func__, rc, *value);
+	if (!rc)
+		bus->max_bus_speed = speed;
+
+	debug("%s - Exit rc[%d] speed[%x]\n", __func__, rc, speed);
 	return rc;
 }
 
@@ -572,6 +523,7 @@ static int __init init_ops(void)
 		if (slot_cur->bus_on->current_speed == 0xFF) 
 			if (get_cur_bus_info(&slot_cur)) 
 				return -1;
+		get_max_bus_speed(slot_cur);
 
 		if (slot_cur->ctrl->options == 0xFF)
 			if (get_hpc_options(slot_cur, &slot_cur->ctrl->options))
@@ -655,6 +607,7 @@ static int validate(struct slot *slot_cur, int opn)
 int ibmphp_update_slot_info(struct slot *slot_cur)
 {
 	struct hotplug_slot_info *info;
+	struct pci_bus *bus = slot_cur->hotplug_slot->pci_slot->bus;
 	int rc;
 	u8 bus_speed;
 	u8 mode;
@@ -700,8 +653,7 @@ int ibmphp_update_slot_info(struct slot *slot_cur)
 			bus_speed = PCI_SPEED_UNKNOWN;
 	}
 
-	info->cur_bus_speed = bus_speed;
-	info->max_bus_speed = slot_cur->hotplug_slot->info->max_bus_speed;
+	bus->cur_bus_speed = bus_speed;
 	// To do: bus_names 
 	
 	rc = pci_hp_change_slot_info(slot_cur->hotplug_slot, info);
@@ -1326,8 +1278,6 @@ struct hotplug_slot_ops ibmphp_hotplug_slot_ops = {
 	.get_attention_status =		get_attention_status,
 	.get_latch_status =		get_latch_status,
 	.get_adapter_status =		get_adapter_present,
-	.get_max_bus_speed =		get_max_bus_speed,
-	.get_cur_bus_speed =		get_cur_bus_speed,
 /*	.get_max_adapter_speed =	get_max_adapter_speed,
 	.get_bus_name_status =		get_bus_name,
 */
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 38183a534b65..728b119f71ad 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -64,32 +64,6 @@ static int debug;
 static LIST_HEAD(pci_hotplug_slot_list);
 static DEFINE_MUTEX(pci_hp_mutex);
 
-/* these strings match up with the values in pci_bus_speed */
-static char *pci_bus_speed_strings[] = {
-	"33 MHz PCI",		/* 0x00 */
-	"66 MHz PCI",		/* 0x01 */
-	"66 MHz PCI-X",		/* 0x02 */
-	"100 MHz PCI-X",	/* 0x03 */
-	"133 MHz PCI-X",	/* 0x04 */
-	NULL,			/* 0x05 */
-	NULL,			/* 0x06 */
-	NULL,			/* 0x07 */
-	NULL,			/* 0x08 */
-	"66 MHz PCI-X 266",	/* 0x09 */
-	"100 MHz PCI-X 266",	/* 0x0a */
-	"133 MHz PCI-X 266",	/* 0x0b */
-	NULL,			/* 0x0c */
-	NULL,			/* 0x0d */
-	NULL,			/* 0x0e */
-	NULL,			/* 0x0f */
-	NULL,			/* 0x10 */
-	"66 MHz PCI-X 533",	/* 0x11 */
-	"100 MHz PCI-X 533",	/* 0x12 */
-	"133 MHz PCI-X 533",	/* 0x13 */
-	"2.5 GT/s PCIe",	/* 0x14 */
-	"5.0 GT/s PCIe",	/* 0x15 */
-};
-
 #ifdef CONFIG_HOTPLUG_PCI_CPCI
 extern int cpci_hotplug_init(int debug);
 extern void cpci_hotplug_exit(void);
@@ -118,8 +92,6 @@ GET_STATUS(power_status, u8)
 GET_STATUS(attention_status, u8)
 GET_STATUS(latch_status, u8)
 GET_STATUS(adapter_status, u8)
-GET_STATUS(max_bus_speed, enum pci_bus_speed)
-GET_STATUS(cur_bus_speed, enum pci_bus_speed)
 
 static ssize_t power_read_file(struct pci_slot *slot, char *buf)
 {
@@ -263,60 +235,6 @@ static struct pci_slot_attribute hotplug_slot_attr_presence = {
 	.show = presence_read_file,
 };
 
-static char *unknown_speed = "Unknown bus speed";
-
-static ssize_t max_bus_speed_read_file(struct pci_slot *slot, char *buf)
-{
-	char *speed_string;
-	int retval;
-	enum pci_bus_speed value;
-	
-	retval = get_max_bus_speed(slot->hotplug, &value);
-	if (retval)
-		goto exit;
-
-	if (value == PCI_SPEED_UNKNOWN)
-		speed_string = unknown_speed;
-	else
-		speed_string = pci_bus_speed_strings[value];
-	
-	retval = sprintf (buf, "%s\n", speed_string);
-
-exit:
-	return retval;
-}
-
-static struct pci_slot_attribute hotplug_slot_attr_max_bus_speed = {
-	.attr = {.name = "max_bus_speed", .mode = S_IFREG | S_IRUGO},
-	.show = max_bus_speed_read_file,
-};
-
-static ssize_t cur_bus_speed_read_file(struct pci_slot *slot, char *buf)
-{
-	char *speed_string;
-	int retval;
-	enum pci_bus_speed value;
-
-	retval = get_cur_bus_speed(slot->hotplug, &value);
-	if (retval)
-		goto exit;
-
-	if (value == PCI_SPEED_UNKNOWN)
-		speed_string = unknown_speed;
-	else
-		speed_string = pci_bus_speed_strings[value];
-	
-	retval = sprintf (buf, "%s\n", speed_string);
-
-exit:
-	return retval;
-}
-
-static struct pci_slot_attribute hotplug_slot_attr_cur_bus_speed = {
-	.attr = {.name = "cur_bus_speed", .mode = S_IFREG | S_IRUGO},
-	.show = cur_bus_speed_read_file,
-};
-
 static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf,
 		size_t count)
 {
@@ -391,26 +309,6 @@ static bool has_adapter_file(struct pci_slot *pci_slot)
 	return false;
 }
 
-static bool has_max_bus_speed_file(struct pci_slot *pci_slot)
-{
-	struct hotplug_slot *slot = pci_slot->hotplug;
-	if ((!slot) || (!slot->ops))
-		return false;
-	if (slot->ops->get_max_bus_speed)
-		return true;
-	return false;
-}
-
-static bool has_cur_bus_speed_file(struct pci_slot *pci_slot)
-{
-	struct hotplug_slot *slot = pci_slot->hotplug;
-	if ((!slot) || (!slot->ops))
-		return false;
-	if (slot->ops->get_cur_bus_speed)
-		return true;
-	return false;
-}
-
 static bool has_test_file(struct pci_slot *pci_slot)
 {
 	struct hotplug_slot *slot = pci_slot->hotplug;
@@ -456,20 +354,6 @@ static int fs_add_slot(struct pci_slot *slot)
 			goto exit_adapter;
 	}
 
-	if (has_max_bus_speed_file(slot)) {
-		retval = sysfs_create_file(&slot->kobj,
-					&hotplug_slot_attr_max_bus_speed.attr);
-		if (retval)
-			goto exit_max_speed;
-	}
-
-	if (has_cur_bus_speed_file(slot)) {
-		retval = sysfs_create_file(&slot->kobj,
-					&hotplug_slot_attr_cur_bus_speed.attr);
-		if (retval)
-			goto exit_cur_speed;
-	}
-
 	if (has_test_file(slot)) {
 		retval = sysfs_create_file(&slot->kobj,
 					   &hotplug_slot_attr_test.attr);
@@ -480,14 +364,6 @@ static int fs_add_slot(struct pci_slot *slot)
 	goto exit;
 
 exit_test:
-	if (has_cur_bus_speed_file(slot))
-		sysfs_remove_file(&slot->kobj,
-				  &hotplug_slot_attr_cur_bus_speed.attr);
-exit_cur_speed:
-	if (has_max_bus_speed_file(slot))
-		sysfs_remove_file(&slot->kobj,
-				  &hotplug_slot_attr_max_bus_speed.attr);
-exit_max_speed:
 	if (has_adapter_file(slot))
 		sysfs_remove_file(&slot->kobj,
 				  &hotplug_slot_attr_presence.attr);
@@ -523,14 +399,6 @@ static void fs_remove_slot(struct pci_slot *slot)
 		sysfs_remove_file(&slot->kobj,
 				  &hotplug_slot_attr_presence.attr);
 
-	if (has_max_bus_speed_file(slot))
-		sysfs_remove_file(&slot->kobj,
-				  &hotplug_slot_attr_max_bus_speed.attr);
-
-	if (has_cur_bus_speed_file(slot))
-		sysfs_remove_file(&slot->kobj,
-				  &hotplug_slot_attr_cur_bus_speed.attr);
-
 	if (has_test_file(slot))
 		sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_test.attr);
 
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 5674b2075bdc..920f820edf87 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -69,8 +69,6 @@ static int get_power_status	(struct hotplug_slot *slot, u8 *value);
 static int get_attention_status	(struct hotplug_slot *slot, u8 *value);
 static int get_latch_status	(struct hotplug_slot *slot, u8 *value);
 static int get_adapter_status	(struct hotplug_slot *slot, u8 *value);
-static int get_max_bus_speed	(struct hotplug_slot *slot, enum pci_bus_speed *value);
-static int get_cur_bus_speed	(struct hotplug_slot *slot, enum pci_bus_speed *value);
 
 /**
  * release_slot - free up the memory used by a slot
@@ -113,8 +111,6 @@ static int init_slot(struct controller *ctrl)
 	ops->disable_slot = disable_slot;
 	ops->get_power_status = get_power_status;
 	ops->get_adapter_status = get_adapter_status;
-	ops->get_max_bus_speed = get_max_bus_speed;
-	ops->get_cur_bus_speed = get_cur_bus_speed;
 	if (MRL_SENS(ctrl))
 		ops->get_latch_status = get_latch_status;
 	if (ATTN_LED(ctrl)) {
@@ -227,27 +223,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return pciehp_get_adapter_status(slot, value);
 }
 
-static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
-				enum pci_bus_speed *value)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
-		 __func__, slot_name(slot));
-
-	return pciehp_get_max_link_speed(slot, value);
-}
-
-static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
-		 __func__, slot_name(slot));
-
-	return pciehp_get_cur_link_speed(slot, value);
-}
-
 static int pciehp_probe(struct pcie_device *dev)
 {
 	int rc;
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 6744ca1d8d01..40b48f569b1e 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -492,6 +492,7 @@ int pciehp_power_on_slot(struct slot * slot)
 	u16 slot_cmd;
 	u16 cmd_mask;
 	u16 slot_status;
+	u16 lnk_status;
 	int retval = 0;
 
 	/* Clear sticky power-fault bit from previous power failures */
@@ -523,6 +524,14 @@ int pciehp_power_on_slot(struct slot * slot)
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
 		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd);
 
+	retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status);
+	if (retval) {
+		ctrl_err(ctrl, "%s: Cannot read LNKSTA register\n",
+				__func__);
+		return retval;
+	}
+	pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status);
+
 	return retval;
 }
 
@@ -610,37 +619,6 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value)
-{
-	struct controller *ctrl = slot->ctrl;
-	enum pci_bus_speed lnk_speed;
-	u32	lnk_cap;
-	int retval = 0;
-
-	retval = pciehp_readl(ctrl, PCI_EXP_LNKCAP, &lnk_cap);
-	if (retval) {
-		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
-		return retval;
-	}
-
-	switch (lnk_cap & 0x000F) {
-	case 1:
-		lnk_speed = PCIE_SPEED_2_5GT;
-		break;
-	case 2:
-		lnk_speed = PCIE_SPEED_5_0GT;
-		break;
-	default:
-		lnk_speed = PCI_SPEED_UNKNOWN;
-		break;
-	}
-
-	*value = lnk_speed;
-	ctrl_dbg(ctrl, "Max link speed = %d\n", lnk_speed);
-
-	return retval;
-}
-
 int pciehp_get_max_lnk_width(struct slot *slot,
 				 enum pcie_link_width *value)
 {
@@ -691,38 +669,6 @@ int pciehp_get_max_lnk_width(struct slot *slot,
 	return retval;
 }
 
-int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value)
-{
-	struct controller *ctrl = slot->ctrl;
-	enum pci_bus_speed lnk_speed = PCI_SPEED_UNKNOWN;
-	int retval = 0;
-	u16 lnk_status;
-
-	retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status);
-	if (retval) {
-		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
-			 __func__);
-		return retval;
-	}
-
-	switch (lnk_status & PCI_EXP_LNKSTA_CLS) {
-	case 1:
-		lnk_speed = PCIE_SPEED_2_5GT;
-		break;
-	case 2:
-		lnk_speed = PCIE_SPEED_5_0GT;
-		break;
-	default:
-		lnk_speed = PCI_SPEED_UNKNOWN;
-		break;
-	}
-
-	*value = lnk_speed;
-	ctrl_dbg(ctrl, "Current link speed = %d\n", lnk_speed);
-
-	return retval;
-}
-
 int pciehp_get_cur_lnk_width(struct slot *slot,
 				 enum pcie_link_width *value)
 {
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index c159223389ec..b14e5e66a92c 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -130,10 +130,9 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 * value)
 	return 0;
 }
 
-static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
+static enum pci_bus_speed get_max_bus_speed(struct slot *slot)
 {
-	struct slot *slot = (struct slot *)hotplug_slot->private;
-
+	enum pci_bus_speed speed;
 	switch (slot->type) {
 	case 1:
 	case 2:
@@ -141,30 +140,30 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
 	case 4:
 	case 5:
 	case 6:
-		*value = PCI_SPEED_33MHz;	/* speed for case 1-6 */
+		speed = PCI_SPEED_33MHz;	/* speed for case 1-6 */
 		break;
 	case 7:
 	case 8:
-		*value = PCI_SPEED_66MHz;
+		speed = PCI_SPEED_66MHz;
 		break;
 	case 11:
 	case 14:
-		*value = PCI_SPEED_66MHz_PCIX;
+		speed = PCI_SPEED_66MHz_PCIX;
 		break;
 	case 12:
 	case 15:
-		*value = PCI_SPEED_100MHz_PCIX;
+		speed = PCI_SPEED_100MHz_PCIX;
 		break;
 	case 13:
 	case 16:
-		*value = PCI_SPEED_133MHz_PCIX;
+		speed = PCI_SPEED_133MHz_PCIX;
 		break;
 	default:
-		*value = PCI_SPEED_UNKNOWN;
+		speed = PCI_SPEED_UNKNOWN;
 		break;
-
 	}
-	return 0;
+
+	return speed;
 }
 
 static int get_children_props(struct device_node *dn, const int **drc_indexes,
@@ -408,6 +407,8 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
 		slot->state = NOT_VALID;
 		return -EINVAL;
 	}
+
+	slot->bus->max_bus_speed = get_max_bus_speed(slot);
 	return 0;
 }
 
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 8e210cd76e55..d2627e1c3ac1 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -333,8 +333,6 @@ struct hpc_ops {
 	int (*set_attention_status)(struct slot *slot, u8 status);
 	int (*get_latch_status)(struct slot *slot, u8 *status);
 	int (*get_adapter_status)(struct slot *slot, u8 *status);
-	int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
-	int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
 	int (*get_adapter_speed)(struct slot *slot, enum pci_bus_speed *speed);
 	int (*get_mode1_ECC_cap)(struct slot *slot, u8 *mode);
 	int (*get_prog_int)(struct slot *slot, u8 *prog_int);
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 8a520a3d0f59..a5062297f488 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -65,8 +65,6 @@ static int get_power_status	(struct hotplug_slot *slot, u8 *value);
 static int get_attention_status	(struct hotplug_slot *slot, u8 *value);
 static int get_latch_status	(struct hotplug_slot *slot, u8 *value);
 static int get_adapter_status	(struct hotplug_slot *slot, u8 *value);
-static int get_max_bus_speed	(struct hotplug_slot *slot, enum pci_bus_speed *value);
-static int get_cur_bus_speed	(struct hotplug_slot *slot, enum pci_bus_speed *value);
 
 static struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
 	.set_attention_status =	set_attention_status,
@@ -76,8 +74,6 @@ static struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
 	.get_attention_status =	get_attention_status,
 	.get_latch_status =	get_latch_status,
 	.get_adapter_status =	get_adapter_status,
-	.get_max_bus_speed =	get_max_bus_speed,
-	.get_cur_bus_speed =	get_cur_bus_speed,
 };
 
 /**
@@ -279,37 +275,6 @@ static int get_adapter_status (struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
-				enum pci_bus_speed *value)
-{
-	struct slot *slot = get_slot(hotplug_slot);
-	int retval;
-
-	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
-		 __func__, slot_name(slot));
-
-	retval = slot->hpc_ops->get_max_bus_speed(slot, value);
-	if (retval < 0)
-		*value = PCI_SPEED_UNKNOWN;
-
-	return 0;
-}
-
-static int get_cur_bus_speed (struct hotplug_slot *hotplug_slot, enum pci_bus_speed *value)
-{
-	struct slot *slot = get_slot(hotplug_slot);
-	int retval;
-
-	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
-		 __func__, slot_name(slot));
-
-	retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
-	if (retval < 0)
-		*value = PCI_SPEED_UNKNOWN;
-
-	return 0;
-}
-
 static int is_shpc_capable(struct pci_dev *dev)
 {
 	if ((dev->vendor == PCI_VENDOR_ID_AMD) || (dev->device ==
diff --git a/drivers/pci/hotplug/shpchp_ctrl.c b/drivers/pci/hotplug/shpchp_ctrl.c
index b8ab2796e66a..179b1c1cb99b 100644
--- a/drivers/pci/hotplug/shpchp_ctrl.c
+++ b/drivers/pci/hotplug/shpchp_ctrl.c
@@ -285,17 +285,8 @@ static int board_added(struct slot *p_slot)
 		return WRONG_BUS_FREQUENCY;
 	}
 
-	rc = p_slot->hpc_ops->get_cur_bus_speed(p_slot, &bsp);
-	if (rc) {
-		ctrl_err(ctrl, "Can't get bus operation speed\n");
-		return WRONG_BUS_FREQUENCY;
-	}
-
-	rc = p_slot->hpc_ops->get_max_bus_speed(p_slot, &msp);
-	if (rc) {
-		ctrl_err(ctrl, "Can't get max bus operation speed\n");
-		msp = bsp;
-	}
+	bsp = ctrl->pci_dev->bus->cur_bus_speed;
+	msp = ctrl->pci_dev->bus->max_bus_speed;
 
 	/* Check if there are other slots or devices on the same bus */
 	if (!list_empty(&ctrl->pci_dev->subordinate->devices))
diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c
index 86dc39847769..5f5e8d2e3552 100644
--- a/drivers/pci/hotplug/shpchp_hpc.c
+++ b/drivers/pci/hotplug/shpchp_hpc.c
@@ -660,6 +660,75 @@ static int hpc_slot_disable(struct slot * slot)
 	return retval;
 }
 
+static int shpc_get_cur_bus_speed(struct controller *ctrl)
+{
+	int retval = 0;
+	struct pci_bus *bus = ctrl->pci_dev->subordinate;
+	enum pci_bus_speed bus_speed = PCI_SPEED_UNKNOWN;
+	u16 sec_bus_reg = shpc_readw(ctrl, SEC_BUS_CONFIG);
+	u8 pi = shpc_readb(ctrl, PROG_INTERFACE);
+	u8 speed_mode = (pi == 2) ? (sec_bus_reg & 0xF) : (sec_bus_reg & 0x7);
+
+	if ((pi == 1) && (speed_mode > 4)) {
+		retval = -ENODEV;
+		goto out;
+	}
+
+	switch (speed_mode) {
+	case 0x0:
+		bus_speed = PCI_SPEED_33MHz;
+		break;
+	case 0x1:
+		bus_speed = PCI_SPEED_66MHz;
+		break;
+	case 0x2:
+		bus_speed = PCI_SPEED_66MHz_PCIX;
+		break;
+	case 0x3:
+		bus_speed = PCI_SPEED_100MHz_PCIX;
+		break;
+	case 0x4:
+		bus_speed = PCI_SPEED_133MHz_PCIX;
+		break;
+	case 0x5:
+		bus_speed = PCI_SPEED_66MHz_PCIX_ECC;
+		break;
+	case 0x6:
+		bus_speed = PCI_SPEED_100MHz_PCIX_ECC;
+		break;
+	case 0x7:
+		bus_speed = PCI_SPEED_133MHz_PCIX_ECC;
+		break;
+	case 0x8:
+		bus_speed = PCI_SPEED_66MHz_PCIX_266;
+		break;
+	case 0x9:
+		bus_speed = PCI_SPEED_100MHz_PCIX_266;
+		break;
+	case 0xa:
+		bus_speed = PCI_SPEED_133MHz_PCIX_266;
+		break;
+	case 0xb:
+		bus_speed = PCI_SPEED_66MHz_PCIX_533;
+		break;
+	case 0xc:
+		bus_speed = PCI_SPEED_100MHz_PCIX_533;
+		break;
+	case 0xd:
+		bus_speed = PCI_SPEED_133MHz_PCIX_533;
+		break;
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+ out:
+	bus->cur_bus_speed = bus_speed;
+	dbg("Current bus speed = %d\n", bus_speed);
+	return retval;
+}
+
+
 static int hpc_set_bus_speed_mode(struct slot * slot, enum pci_bus_speed value)
 {
 	int retval;
@@ -720,6 +789,8 @@ static int hpc_set_bus_speed_mode(struct slot * slot, enum pci_bus_speed value)
 	retval = shpc_write_cmd(slot, 0, cmd);
 	if (retval)
 		ctrl_err(ctrl, "%s: Write command failed!\n", __func__);
+	else
+		shpc_get_cur_bus_speed(ctrl);
 
 	return retval;
 }
@@ -803,10 +874,10 @@ static irqreturn_t shpc_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int hpc_get_max_bus_speed (struct slot *slot, enum pci_bus_speed *value)
+static int shpc_get_max_bus_speed(struct controller *ctrl)
 {
 	int retval = 0;
-	struct controller *ctrl = slot->ctrl;
+	struct pci_bus *bus = ctrl->pci_dev->subordinate;
 	enum pci_bus_speed bus_speed = PCI_SPEED_UNKNOWN;
 	u8 pi = shpc_readb(ctrl, PROG_INTERFACE);
 	u32 slot_avail1 = shpc_readl(ctrl, SLOT_AVAIL1);
@@ -842,79 +913,12 @@ static int hpc_get_max_bus_speed (struct slot *slot, enum pci_bus_speed *value)
 			retval = -ENODEV;
 	}
 
-	*value = bus_speed;
+	bus->max_bus_speed = bus_speed;
 	ctrl_dbg(ctrl, "Max bus speed = %d\n", bus_speed);
 
 	return retval;
 }
 
-static int hpc_get_cur_bus_speed (struct slot *slot, enum pci_bus_speed *value)
-{
-	int retval = 0;
-	struct controller *ctrl = slot->ctrl;
-	enum pci_bus_speed bus_speed = PCI_SPEED_UNKNOWN;
-	u16 sec_bus_reg = shpc_readw(ctrl, SEC_BUS_CONFIG);
-	u8 pi = shpc_readb(ctrl, PROG_INTERFACE);
-	u8 speed_mode = (pi == 2) ? (sec_bus_reg & 0xF) : (sec_bus_reg & 0x7);
-
-	if ((pi == 1) && (speed_mode > 4)) {
-		*value = PCI_SPEED_UNKNOWN;
-		return -ENODEV;
-	}
-
-	switch (speed_mode) {
-	case 0x0:
-		*value = PCI_SPEED_33MHz;
-		break;
-	case 0x1:
-		*value = PCI_SPEED_66MHz;
-		break;
-	case 0x2:
-		*value = PCI_SPEED_66MHz_PCIX;
-		break;
-	case 0x3:
-		*value = PCI_SPEED_100MHz_PCIX;
-		break;
-	case 0x4:
-		*value = PCI_SPEED_133MHz_PCIX;
-		break;
-	case 0x5:
-		*value = PCI_SPEED_66MHz_PCIX_ECC;
-		break;
-	case 0x6:
-		*value = PCI_SPEED_100MHz_PCIX_ECC;
-		break;
-	case 0x7:
-		*value = PCI_SPEED_133MHz_PCIX_ECC;
-		break;
-	case 0x8:
-		*value = PCI_SPEED_66MHz_PCIX_266;
-		break;
-	case 0x9:
-		*value = PCI_SPEED_100MHz_PCIX_266;
-		break;
-	case 0xa:
-		*value = PCI_SPEED_133MHz_PCIX_266;
-		break;
-	case 0xb:
-		*value = PCI_SPEED_66MHz_PCIX_533;
-		break;
-	case 0xc:
-		*value = PCI_SPEED_100MHz_PCIX_533;
-		break;
-	case 0xd:
-		*value = PCI_SPEED_133MHz_PCIX_533;
-		break;
-	default:
-		*value = PCI_SPEED_UNKNOWN;
-		retval = -ENODEV;
-		break;
-	}
-
-	ctrl_dbg(ctrl, "Current bus speed = %d\n", bus_speed);
-	return retval;
-}
-
 static struct hpc_ops shpchp_hpc_ops = {
 	.power_on_slot			= hpc_power_on_slot,
 	.slot_enable			= hpc_slot_enable,
@@ -926,8 +930,6 @@ static struct hpc_ops shpchp_hpc_ops = {
 	.get_latch_status		= hpc_get_latch_status,
 	.get_adapter_status		= hpc_get_adapter_status,
 
-	.get_max_bus_speed		= hpc_get_max_bus_speed,
-	.get_cur_bus_speed		= hpc_get_cur_bus_speed,
 	.get_adapter_speed		= hpc_get_adapter_speed,
 	.get_mode1_ECC_cap		= hpc_get_mode1_ECC_cap,
 	.get_prog_int			= hpc_get_prog_int,
@@ -1086,6 +1088,9 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev)
 	}
 	ctrl_dbg(ctrl, "HPC at %s irq=%x\n", pci_name(pdev), pdev->irq);
 
+	shpc_get_max_bus_speed(ctrl);
+	shpc_get_cur_bus_speed(ctrl);
+
 	/*
 	 * If this is the first controller to be initialized,
 	 * initialize the shpchpd work queue
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dd64310a735e..51cf8982fec7 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -387,10 +387,37 @@ static struct pci_bus * pci_alloc_bus(void)
 		INIT_LIST_HEAD(&b->children);
 		INIT_LIST_HEAD(&b->devices);
 		INIT_LIST_HEAD(&b->slots);
+		b->max_bus_speed = PCI_SPEED_UNKNOWN;
+		b->cur_bus_speed = PCI_SPEED_UNKNOWN;
 	}
 	return b;
 }
 
+static unsigned char pcie_link_speed[] = {
+	PCI_SPEED_UNKNOWN,		/* 0 */
+	PCIE_SPEED_2_5GT,		/* 1 */
+	PCIE_SPEED_5_0GT,		/* 2 */
+	PCI_SPEED_UNKNOWN,		/* 3 */
+	PCI_SPEED_UNKNOWN,		/* 4 */
+	PCI_SPEED_UNKNOWN,		/* 5 */
+	PCI_SPEED_UNKNOWN,		/* 6 */
+	PCI_SPEED_UNKNOWN,		/* 7 */
+	PCI_SPEED_UNKNOWN,		/* 8 */
+	PCI_SPEED_UNKNOWN,		/* 9 */
+	PCI_SPEED_UNKNOWN,		/* A */
+	PCI_SPEED_UNKNOWN,		/* B */
+	PCI_SPEED_UNKNOWN,		/* C */
+	PCI_SPEED_UNKNOWN,		/* D */
+	PCI_SPEED_UNKNOWN,		/* E */
+	PCI_SPEED_UNKNOWN		/* F */
+};
+
+void pcie_update_link_speed(struct pci_bus *bus, u16 linksta)
+{
+	bus->cur_bus_speed = pcie_link_speed[linksta & 0xf];
+}
+EXPORT_SYMBOL_GPL(pcie_update_link_speed);
+
 static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 					   struct pci_dev *bridge, int busnr)
 {
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 8c02b6c53bdb..6f6b8d24786a 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -47,6 +47,54 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf)
 				slot->number);
 }
 
+/* these strings match up with the values in pci_bus_speed */
+static char *pci_bus_speed_strings[] = {
+	"33 MHz PCI",		/* 0x00 */
+	"66 MHz PCI",		/* 0x01 */
+	"66 MHz PCI-X", 	/* 0x02 */
+	"100 MHz PCI-X",	/* 0x03 */
+	"133 MHz PCI-X",	/* 0x04 */
+	NULL,			/* 0x05 */
+	NULL,			/* 0x06 */
+	NULL,			/* 0x07 */
+	NULL,			/* 0x08 */
+	"66 MHz PCI-X 266",	/* 0x09 */
+	"100 MHz PCI-X 266",	/* 0x0a */
+	"133 MHz PCI-X 266",	/* 0x0b */
+	NULL,			/* 0x0c */
+	NULL,			/* 0x0d */
+	NULL,			/* 0x0e */
+	NULL,			/* 0x0f */
+	NULL,			/* 0x10 */
+	"66 MHz PCI-X 533",	/* 0x11 */
+	"100 MHz PCI-X 533",	/* 0x12 */
+	"133 MHz PCI-X 533",	/* 0x13 */
+	"2.5 GT/s PCIe",	/* 0x14 */
+	"5.0 GT/s PCIe",	/* 0x15 */
+};
+
+static ssize_t bus_speed_read(enum pci_bus_speed speed, char *buf)
+{
+	const char *speed_string;
+
+	if (speed < ARRAY_SIZE(pci_bus_speed_strings))
+		speed_string = pci_bus_speed_strings[speed];
+	else
+		speed_string = "Unknown";
+
+	return sprintf(buf, "%s\n", speed_string);
+}
+
+static ssize_t max_speed_read_file(struct pci_slot *slot, char *buf)
+{
+	return bus_speed_read(slot->bus->max_bus_speed, buf);
+}
+
+static ssize_t cur_speed_read_file(struct pci_slot *slot, char *buf)
+{
+	return bus_speed_read(slot->bus->cur_bus_speed, buf);
+}
+
 static void pci_slot_release(struct kobject *kobj)
 {
 	struct pci_dev *dev;
@@ -66,9 +114,15 @@ static void pci_slot_release(struct kobject *kobj)
 
 static struct pci_slot_attribute pci_slot_attr_address =
 	__ATTR(address, (S_IFREG | S_IRUGO), address_read_file, NULL);
+static struct pci_slot_attribute pci_slot_attr_max_speed =
+	__ATTR(max_bus_speed, (S_IFREG | S_IRUGO), max_speed_read_file, NULL);
+static struct pci_slot_attribute pci_slot_attr_cur_speed =
+	__ATTR(cur_bus_speed, (S_IFREG | S_IRUGO), cur_speed_read_file, NULL);
 
 static struct attribute *pci_slot_default_attrs[] = {
 	&pci_slot_attr_address.attr,
+	&pci_slot_attr_max_speed.attr,
+	&pci_slot_attr_cur_speed.attr,
 	NULL,
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d76a8a0b6b53..a446097a9f68 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -380,6 +380,8 @@ struct pci_bus {
 	unsigned char	primary;	/* number of primary bridge */
 	unsigned char	secondary;	/* number of secondary bridge */
 	unsigned char	subordinate;	/* max number of subordinate buses */
+	unsigned char	max_bus_speed;	/* enum pci_bus_speed */
+	unsigned char	cur_bus_speed;	/* enum pci_bus_speed */
 
 	char		name[48];
 
@@ -610,6 +612,7 @@ struct pci_bus *pci_create_bus(struct device *parent, int bus,
 			       struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
 				int busnr);
+void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 				 const char *name,
 				 struct hotplug_slot *hotplug);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 56251ac385e9..5d09cbafa7db 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -63,12 +63,6 @@ enum pcie_link_width {
  * @get_adapter_status: Called to get see if an adapter is present in the slot or not.
  *	If this field is NULL, the value passed in the struct hotplug_slot_info
  *	will be used when this value is requested by a user.
- * @get_max_bus_speed: Called to get the max bus speed for a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
- * @get_cur_bus_speed: Called to get the current bus speed for a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
  *
  * The table of function pointers that is passed to the hotplug pci core by a
  * hotplug pci driver.  These functions are called by the hotplug pci core when
@@ -86,17 +80,14 @@ struct hotplug_slot_ops {
 	int (*get_attention_status)	(struct hotplug_slot *slot, u8 *value);
 	int (*get_latch_status)		(struct hotplug_slot *slot, u8 *value);
 	int (*get_adapter_status)	(struct hotplug_slot *slot, u8 *value);
-	int (*get_max_bus_speed)	(struct hotplug_slot *slot, enum pci_bus_speed *value);
-	int (*get_cur_bus_speed)	(struct hotplug_slot *slot, enum pci_bus_speed *value);
 };
 
 /**
  * struct hotplug_slot_info - used to notify the hotplug pci core of the state of the slot
- * @power: if power is enabled or not (1/0)
+ * @power_status: if power is enabled or not (1/0)
  * @attention_status: if the attention light is enabled or not (1/0)
  * @latch_status: if the latch (if any) is open or closed (1/0)
- * @adapter_present: if there is a pci board present in the slot or not (1/0)
- * @address: (domain << 16 | bus << 8 | dev)
+ * @adapter_status: if there is a pci board present in the slot or not (1/0)
  *
  * Used to notify the hotplug pci core of the status of a specific slot.
  */
@@ -105,8 +96,6 @@ struct hotplug_slot_info {
 	u8	attention_status;
 	u8	latch_status;
 	u8	adapter_status;
-	enum pci_bus_speed	max_bus_speed;
-	enum pci_bus_speed	cur_bus_speed;
 };
 
 /**
-- 
cgit v1.2.3


From 45b4cdd57ef0e57555b2ab61b584784819b39365 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Sun, 13 Dec 2009 08:11:34 -0500
Subject: PCI: Add support for AGP in cur/max bus speed

Take advantage of some gaps in the table to fit in support for AGP speeds.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/slot.c  | 10 +++++-----
 include/linux/pci.h |  5 +++++
 3 files changed, 55 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 188ee9cf0605..2803ab96a98c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -437,11 +437,56 @@ void pcie_update_link_speed(struct pci_bus *bus, u16 linksta)
 }
 EXPORT_SYMBOL_GPL(pcie_update_link_speed);
 
+static unsigned char agp_speeds[] = {
+	AGP_UNKNOWN,
+	AGP_1X,
+	AGP_2X,
+	AGP_4X,
+	AGP_8X
+};
+
+static enum pci_bus_speed agp_speed(int agp3, int agpstat)
+{
+	int index = 0;
+
+	if (agpstat & 4)
+		index = 3;
+	else if (agpstat & 2)
+		index = 2;
+	else if (agpstat & 1)
+		index = 1;
+	else
+		goto out;
+	
+	if (agp3) {
+		index += 2;
+		if (index == 5)
+			index = 0;
+	}
+
+ out:
+	return agp_speeds[index];
+}
+
+
 static void pci_set_bus_speed(struct pci_bus *bus)
 {
 	struct pci_dev *bridge = bus->self;
 	int pos;
 
+	pos = pci_find_capability(bridge, PCI_CAP_ID_AGP);
+	if (!pos)
+		pos = pci_find_capability(bridge, PCI_CAP_ID_AGP3);
+	if (pos) {
+		u32 agpstat, agpcmd;
+
+		pci_read_config_dword(bridge, pos + PCI_AGP_STATUS, &agpstat);
+		bus->max_bus_speed = agp_speed(agpstat & 8, agpstat & 7);
+
+		pci_read_config_dword(bridge, pos + PCI_AGP_COMMAND, &agpcmd);
+		bus->cur_bus_speed = agp_speed(agpstat & 8, agpcmd & 7);
+	}
+
 	pos = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
 	if (pos) {
 		u16 status;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 6f6b8d24786a..c7260d4e339b 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -61,11 +61,11 @@ static char *pci_bus_speed_strings[] = {
 	"66 MHz PCI-X 266",	/* 0x09 */
 	"100 MHz PCI-X 266",	/* 0x0a */
 	"133 MHz PCI-X 266",	/* 0x0b */
-	NULL,			/* 0x0c */
-	NULL,			/* 0x0d */
-	NULL,			/* 0x0e */
-	NULL,			/* 0x0f */
-	NULL,			/* 0x10 */
+	"Unknown AGP",		/* 0x0c */
+	"1x AGP",		/* 0x0d */
+	"2x AGP",		/* 0x0e */
+	"4x AGP",		/* 0x0f */
+	"8x AGP",		/* 0x10 */
 	"66 MHz PCI-X 533",	/* 0x11 */
 	"100 MHz PCI-X 533",	/* 0x12 */
 	"133 MHz PCI-X 533",	/* 0x13 */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a446097a9f68..842adaad312e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -200,6 +200,11 @@ enum pci_bus_speed {
 	PCI_SPEED_66MHz_PCIX_266	= 0x09,
 	PCI_SPEED_100MHz_PCIX_266	= 0x0a,
 	PCI_SPEED_133MHz_PCIX_266	= 0x0b,
+	AGP_UNKNOWN			= 0x0c,
+	AGP_1X				= 0x0d,
+	AGP_2X				= 0x0e,
+	AGP_4X				= 0x0f,
+	AGP_8X				= 0x10,
 	PCI_SPEED_66MHz_PCIX_533	= 0x11,
 	PCI_SPEED_100MHz_PCIX_533	= 0x12,
 	PCI_SPEED_133MHz_PCIX_533	= 0x13,
-- 
cgit v1.2.3


From 9dfd97fe12f79ec8b68feb63912a4ef2f31f571a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Sun, 13 Dec 2009 08:11:35 -0500
Subject: PCI: Add support for reporting PCIe 3.0 speeds

Add the 8.0 GT/s speed.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 2 +-
 drivers/pci/slot.c  | 1 +
 include/linux/pci.h | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2803ab96a98c..9672760c7ade 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -416,7 +416,7 @@ static unsigned char pcie_link_speed[] = {
 	PCI_SPEED_UNKNOWN,		/* 0 */
 	PCIE_SPEED_2_5GT,		/* 1 */
 	PCIE_SPEED_5_0GT,		/* 2 */
-	PCI_SPEED_UNKNOWN,		/* 3 */
+	PCIE_SPEED_8_0GT,		/* 3 */
 	PCI_SPEED_UNKNOWN,		/* 4 */
 	PCI_SPEED_UNKNOWN,		/* 5 */
 	PCI_SPEED_UNKNOWN,		/* 6 */
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index c7260d4e339b..49c9e6c9779a 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -71,6 +71,7 @@ static char *pci_bus_speed_strings[] = {
 	"133 MHz PCI-X 533",	/* 0x13 */
 	"2.5 GT/s PCIe",	/* 0x14 */
 	"5.0 GT/s PCIe",	/* 0x15 */
+	"8.0 GT/s PCIe",	/* 0x16 */
 };
 
 static ssize_t bus_speed_read(enum pci_bus_speed speed, char *buf)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 842adaad312e..0c3a29444f89 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -210,6 +210,7 @@ enum pci_bus_speed {
 	PCI_SPEED_133MHz_PCIX_533	= 0x13,
 	PCIE_SPEED_2_5GT		= 0x14,
 	PCIE_SPEED_5_0GT		= 0x15,
+	PCIE_SPEED_8_0GT		= 0x16,
 	PCI_SPEED_UNKNOWN		= 0xff,
 };
 
-- 
cgit v1.2.3


From 93177a748ba0d4f3d3e51c8e6c785773bf6a70df Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 2 Jan 2010 22:57:24 +0100
Subject: PCI: Clean up build for CONFIG_PCI_QUIRKS unset

Currently, drivers/pci/quirks.c is built unconditionally, but if
CONFIG_PCI_QUIRKS is unset, the only things actually built in this
file are definitions of global variables and empty functions (due to
the #ifdef CONFIG_PCI_QUIRKS embracing all of the code inside the
file).  This is not particularly nice and if someone overlooks
the #ifdef CONFIG_PCI_QUIRKS, build errors are introduced.

To clean that up, move the definitions of the global variables in
quirks.c that are always built to pci.c, move the definitions of
the empty functions (compiled when CONFIG_PCI_QUIRKS is unset) to
headers (additionally make these functions static inline) and modify
drivers/pci/Makefile so that quirks.c is only built if
CONFIG_PCI_QUIRKS is set.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/Makefile |  4 +++-
 drivers/pci/pci.c    |  7 ++++++-
 drivers/pci/pci.h    |  7 +++++++
 drivers/pci/quirks.c | 14 ++------------
 include/linux/pci.h  |  6 +++++-
 5 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 4df48d58eaa6..adb74253a996 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -2,12 +2,14 @@
 # Makefile for the PCI bus specific drivers.
 #
 
-obj-y		+= access.o bus.o probe.o remove.o pci.o quirks.o \
+obj-y		+= access.o bus.o probe.o remove.o pci.o \
 			pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \
 			irq.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSFS) += slot.o
 
+obj-$(CONFIG_PCI_QUIRKS) += quirks.o
+
 obj-$(CONFIG_PCI_LEGACY) += legacy.o
 CFLAGS_legacy.o += -Wno-deprecated-declarations
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 315fea47e784..1f9be53c39ba 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -19,7 +19,6 @@
 #include <linux/pci-aspm.h>
 #include <linux/pm_wakeup.h>
 #include <linux/interrupt.h>
-#include <asm/dma.h>	/* isa_dma_bridge_buggy */
 #include <linux/device.h>
 #include <asm/setup.h>
 #include "pci.h"
@@ -29,6 +28,12 @@ const char *pci_power_names[] = {
 };
 EXPORT_SYMBOL_GPL(pci_power_names);
 
+int isa_dma_bridge_buggy;
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+
+int pci_pci_problems;
+EXPORT_SYMBOL(pci_pci_problems);
+
 unsigned int pci_pm_d3_delay;
 
 static void pci_dev_d3_sleep(struct pci_dev *dev)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fbd0e3adbca3..5d169bc3ccd1 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -319,6 +319,13 @@ struct pci_dev_reset_methods {
 	int (*reset)(struct pci_dev *dev, int probe);
 };
 
+#ifdef CONFIG_PCI_QUIRKS
 extern int pci_dev_specific_reset(struct pci_dev *dev, int probe);
+#else
+static inline int pci_dev_specific_reset(struct pci_dev *dev, int probe)
+{
+	return -ENOTTY;
+}
+#endif
 
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index d58b94030ef3..790eb69a4aa9 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -25,14 +25,9 @@
 #include <linux/dmi.h>
 #include <linux/pci-aspm.h>
 #include <linux/ioport.h>
+#include <asm/dma.h>	/* isa_dma_bridge_buggy */
 #include "pci.h"
 
-int isa_dma_bridge_buggy;
-EXPORT_SYMBOL(isa_dma_bridge_buggy);
-int pci_pci_problems;
-EXPORT_SYMBOL(pci_pci_problems);
-
-#ifdef CONFIG_PCI_QUIRKS
 /*
  * This quirk function disables memory decoding and releases memory resources
  * of the device specified by kernel's boot parameter 'pci=resource_alignment='.
@@ -2612,6 +2607,7 @@ void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev)
 	}
 	pci_do_fixups(dev, start, end);
 }
+EXPORT_SYMBOL(pci_fixup_device);
 
 static int __init pci_apply_final_quirks(void)
 {
@@ -2723,9 +2719,3 @@ int pci_dev_specific_reset(struct pci_dev *dev, int probe)
 
 	return -ENOTTY;
 }
-
-#else
-void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) {}
-int pci_dev_specific_reset(struct pci_dev *dev, int probe) { return -ENOTTY; }
-#endif
-EXPORT_SYMBOL(pci_fixup_device);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0c3a29444f89..16f48e7506ed 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1271,8 +1271,12 @@ enum pci_fixup_pass {
 	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
 			suspend##vendor##device##hook, vendor, device, hook)
 
-
+#ifdef CONFIG_PCI_QUIRKS
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
+#else
+static inline void pci_fixup_device(enum pci_fixup_pass pass,
+				    struct pci_dev *dev) {}
+#endif
 
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
 void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
-- 
cgit v1.2.3


From 93da6202264ce1256b04db8008a43882ae62d060 Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Tue, 12 Jan 2010 16:56:37 -0800
Subject: x86/PCI: irq and pci_ids patch for Intel Cougar Point DeviceIDs

This patch adds the Intel Cougar Point (PCH) LPC and SMBus Controller DeviceIDs.

Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/irq.c      | 2 ++
 include/linux/pci_ids.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 0696d506c4ad..b02f6d8ac922 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -590,6 +590,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
 	case PCI_DEVICE_ID_INTEL_ICH10_1:
 	case PCI_DEVICE_ID_INTEL_ICH10_2:
 	case PCI_DEVICE_ID_INTEL_ICH10_3:
+	case PCI_DEVICE_ID_INTEL_CPT_LPC1:
+	case PCI_DEVICE_ID_INTEL_CPT_LPC2:
 		r->name = "PIIX/ICH";
 		r->get = pirq_piix_get;
 		r->set = pirq_piix_set;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index cca8a044e2b6..0be824320580 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2417,6 +2417,9 @@
 #define PCI_DEVICE_ID_INTEL_82840_HB	0x1a21
 #define PCI_DEVICE_ID_INTEL_82845_HB	0x1a30
 #define PCI_DEVICE_ID_INTEL_IOAT	0x1a38
+#define PCI_DEVICE_ID_INTEL_CPT_SMBUS	0x1c22
+#define PCI_DEVICE_ID_INTEL_CPT_LPC1	0x1c42
+#define PCI_DEVICE_ID_INTEL_CPT_LPC2	0x1c43
 #define PCI_DEVICE_ID_INTEL_82801AA_0	0x2410
 #define PCI_DEVICE_ID_INTEL_82801AA_1	0x2411
 #define PCI_DEVICE_ID_INTEL_82801AA_3	0x2413
-- 
cgit v1.2.3


From b26b2d494b659f988b4d75eb394dfa0ddac415c9 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 1 Jan 2010 17:40:49 +0100
Subject: resource/PCI: align functions now return start of resource

As suggested by Linus, align functions should return the start
of a resource, not void. An update of "res->start" is no longer
necessary.

Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/alpha/kernel/pci.c                 |  4 ++--
 arch/arm/kernel/bios32.c                |  8 +++++---
 arch/cris/arch-v32/drivers/pci/bios.c   | 14 ++++++--------
 arch/frv/mb93090-mb00/pci-frv.c         | 14 ++++++--------
 arch/ia64/pci/pci.c                     |  3 ++-
 arch/mips/pci/pci.c                     |  4 ++--
 arch/mips/pmc-sierra/yosemite/ht.c      | 10 +++++-----
 arch/mn10300/unit-asb2305/pci-asb2305.c | 16 +++++++---------
 arch/parisc/kernel/pci.c                | 10 +++++-----
 arch/powerpc/kernel/pci-common.c        | 13 ++++++-------
 arch/sh/drivers/pci/pci.c               |  6 +++---
 arch/sparc/kernel/pci.c                 |  5 +++--
 arch/sparc/kernel/pcic.c                |  5 +++--
 arch/x86/pci/i386.c                     | 12 +++++-------
 arch/xtensa/kernel/pci.c                | 11 +++++------
 drivers/pci/bus.c                       |  6 ++++--
 drivers/pcmcia/rsrc_mgr.c               | 12 ++++++------
 drivers/pcmcia/rsrc_nonstatic.c         | 16 +++++++++-------
 include/linux/ioport.h                  |  6 ++++--
 include/linux/pci.h                     |  8 +++++---
 kernel/resource.c                       | 14 +++++++++-----
 21 files changed, 102 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index a91ba28999b5..5cf111ed851c 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -126,7 +126,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
 #define MB			(1024*KB)
 #define GB			(1024*MB)
 
-void
+resource_size_t
 pcibios_align_resource(void *data, struct resource *res,
 		       resource_size_t size, resource_size_t align)
 {
@@ -184,7 +184,7 @@ pcibios_align_resource(void *data, struct resource *res,
 		}
 	}
 
-	res->start = start;
+	return start;
 }
 #undef KB
 #undef MB
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 809681900ec8..a7c85f84f06c 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -616,15 +616,17 @@ char * __init pcibios_setup(char *str)
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might be mirrored at 0x0100-0x03ff..
  */
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
+resource_size_t pcibios_align_resource(void *data, struct resource *res,
+				resource_size_t size, resource_size_t align)
 {
 	resource_size_t start = res->start;
 
 	if (res->flags & IORESOURCE_IO && start & 0x300)
 		start = (start + 0x3ff) & ~0x3ff;
 
-	res->start = (start + align - 1) & ~(align - 1);
+	start = (start + align - 1) & ~(align - 1);
+
+	return start;
 }
 
 /**
diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c
index 77ee319193c3..5811e2f8945c 100644
--- a/arch/cris/arch-v32/drivers/pci/bios.c
+++ b/arch/cris/arch-v32/drivers/pci/bios.c
@@ -41,18 +41,16 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 	return 0;
 }
 
-void
+resource_size_t
 pcibios_align_resource(void *data, struct resource *res,
 		       resource_size_t size, resource_size_t align)
 {
-	if (res->flags & IORESOURCE_IO) {
-		resource_size_t start = res->start;
+	resource_size_t start = res->start;
 
-		if (start & 0x300) {
-			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
-	}
+	if ((res->flags & IORESOURCE_IO) && (start & 0x300))
+		start = (start + 0x3ff) & ~0x3ff;
+
+	return start
 }
 
 int pcibios_enable_resources(struct pci_dev *dev, int mask)
diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c
index 566bdeb499d1..c947aa43f2a9 100644
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@@ -32,18 +32,16 @@
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-void
+resource_size_t
 pcibios_align_resource(void *data, struct resource *res,
 		       resource_size_t size, resource_size_t align)
 {
-	if (res->flags & IORESOURCE_IO) {
-		resource_size_t start = res->start;
+	resource_size_t start = res->start;
 
-		if (start & 0x300) {
-			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
-	}
+	if ((res->flags & IORESOURCE_IO) && (start & 0x300))
+		start = (start + 0x3ff) & ~0x3ff;
+
+	return start
 }
 
 
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index df639db779f9..ef574cd311d1 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -547,10 +547,11 @@ pcibios_disable_device (struct pci_dev *dev)
 		acpi_pci_irq_disable(dev);
 }
 
-void
+resource_size_t
 pcibios_align_resource (void *data, struct resource *res,
 		        resource_size_t size, resource_size_t align)
 {
+	return res->start;
 }
 
 /*
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 9a11c2226891..9085988f7cf3 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -49,7 +49,7 @@ static int pci_initialized;
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-void
+resource_size_t
 pcibios_align_resource(void *data, struct resource *res,
 		       resource_size_t size, resource_size_t align)
 {
@@ -73,7 +73,7 @@ pcibios_align_resource(void *data, struct resource *res,
 			start = PCIBIOS_MIN_MEM + hose->mem_resource->start;
 	}
 
-	res->start = start;
+	return start;
 }
 
 static void __devinit pcibios_scanbus(struct pci_controller *hose)
diff --git a/arch/mips/pmc-sierra/yosemite/ht.c b/arch/mips/pmc-sierra/yosemite/ht.c
index 678388fd34b1..5e410087b628 100644
--- a/arch/mips/pmc-sierra/yosemite/ht.c
+++ b/arch/mips/pmc-sierra/yosemite/ht.c
@@ -345,14 +345,13 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
         return pcibios_enable_resources(dev);
 }
 
-void pcibios_align_resource(void *data, struct resource *res,
-                            resource_size_t size, resource_size_t align)
+resource_size_t pcibios_align_resource(void *data, struct resource *res,
+				resource_size_t size, resource_size_t align)
 {
         struct pci_dev *dev = data;
+	resource_size_t start = res->start;
 
         if (res->flags & IORESOURCE_IO) {
-                resource_size_t start = res->start;
-
                 /* We need to avoid collisions with `mirrored' VGA ports
                    and other strange ISA hardware, so we always want the
                    addresses kilobyte aligned.  */
@@ -363,8 +362,9 @@ void pcibios_align_resource(void *data, struct resource *res,
                 }
 
                 start = (start + 1024 - 1) & ~(1024 - 1);
-                res->start = start;
         }
+
+	return start;
 }
 
 struct pci_ops titan_pci_ops = {
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.c b/arch/mn10300/unit-asb2305/pci-asb2305.c
index 78cd134ddf7d..b0b3758fba08 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.c
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.c
@@ -31,9 +31,11 @@
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
+resource_size_t pcibios_align_resource(void *data, struct resource *res,
+				resource_size_t size, resource_size_t align)
 {
+	resource_size_t start = res->start;
+
 #if 0
 	struct pci_dev *dev = data;
 
@@ -47,14 +49,10 @@ void pcibios_align_resource(void *data, struct resource *res,
 	       );
 #endif
 
-	if (res->flags & IORESOURCE_IO) {
-		unsigned long start = res->start;
+	if ((res->flags & IORESOURCE_IO) && (start & 0x300))
+		start = (start + 0x3ff) & ~0x3ff;
 
-		if (start & 0x300) {
-			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
-	}
+	return start;
 }
 
 
diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c
index f7064abc3bb6..4463a31973ba 100644
--- a/arch/parisc/kernel/pci.c
+++ b/arch/parisc/kernel/pci.c
@@ -254,10 +254,10 @@ EXPORT_SYMBOL(pcibios_bus_to_resource);
  * Since we are just checking candidates, don't use any fields other
  * than res->start.
  */
-void pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, struct resource *res,
 				resource_size_t size, resource_size_t alignment)
 {
-	resource_size_t mask, align;
+	resource_size_t mask, align, start = res->start;
 
 	DBG_RES("pcibios_align_resource(%s, (%p) [%lx,%lx]/%x, 0x%lx, 0x%lx)\n",
 		pci_name(((struct pci_dev *) data)),
@@ -269,10 +269,10 @@ void pcibios_align_resource(void *data, struct resource *res,
 
 	/* Align to largest of MIN or input size */
 	mask = max(alignment, align) - 1;
-	res->start += mask;
-	res->start &= ~mask;
+	start += mask;
+	start &= ~mask;
 
-	/* The caller updates the end field, we don't.  */
+	return start;
 }
 
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index cadbed679fbb..d7eebbaf01f1 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1181,21 +1181,20 @@ static int skip_isa_ioresource_align(struct pci_dev *dev)
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-void pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
+	resource_size_t start = res->start;
 
 	if (res->flags & IORESOURCE_IO) {
-		resource_size_t start = res->start;
-
 		if (skip_isa_ioresource_align(dev))
-			return;
-		if (start & 0x300) {
+			return start;
+		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
 	}
+
+	return start;
 }
 EXPORT_SYMBOL(pcibios_align_resource);
 
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index c481df639022..b36ca825b3d3 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -148,8 +148,8 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
  * addresses to be allocated in the 0x000-0x0ff region
  * modulo 0x400.
  */
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
+resource_size_t pcibios_align_resource(void *data, struct resource *res,
+				resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
 	struct pci_channel *chan = dev->sysdata;
@@ -171,7 +171,7 @@ void pcibios_align_resource(void *data, struct resource *res,
 			start = PCIBIOS_MIN_MEM + chan->mem_resource->start;
 	}
 
-	res->start = start;
+	return start;
 }
 
 void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 592b03d85167..d56c1f34689e 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -722,9 +722,10 @@ void pcibios_update_irq(struct pci_dev *pdev, int irq)
 {
 }
 
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
+resource_size_t pcibios_align_resource(void *data, struct resource *res,
+				resource_size_t size, resource_size_t align)
 {
+	return res->start;
 }
 
 int pcibios_enable_device(struct pci_dev *dev, int mask)
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 4e2724ec2bb6..46fda64ff163 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -768,9 +768,10 @@ char * __devinit pcibios_setup(char *str)
 	return str;
 }
 
-void pcibios_align_resource(void *data, struct resource *res,
-			    resource_size_t size, resource_size_t align)
+resource_size_t pcibios_align_resource(void *data, struct resource *res,
+				resource_size_t size, resource_size_t align)
 {
+	return res->start;
 }
 
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5dc9e8c63fcd..924e40c916d3 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -60,22 +60,20 @@ skip_isa_ioresource_align(struct pci_dev *dev) {
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-void
+resource_size_t
 pcibios_align_resource(void *data, struct resource *res,
 			resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
+	resource_size_t start = res->start;
 
 	if (res->flags & IORESOURCE_IO) {
-		resource_size_t start = res->start;
-
 		if (skip_isa_ioresource_align(dev))
-			return;
-		if (start & 0x300) {
+			return start;
+		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
 	}
+	return start;
 }
 EXPORT_SYMBOL(pcibios_align_resource);
 
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index b7c073484e01..d7efab03164e 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -69,26 +69,25 @@ static int pci_bus_count;
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-void
+resource_size_t
 pcibios_align_resource(void *data, struct resource *res, resource_size_t size,
     		       resource_size_t align)
 {
 	struct pci_dev *dev = data;
+	resource_size_t start = res->start;
 
 	if (res->flags & IORESOURCE_IO) {
-		resource_size_t start = res->start;
-
 		if (size > 0x100) {
 			printk(KERN_ERR "PCI: I/O Region %s/%d too large"
 			       " (%ld bytes)\n", pci_name(dev),
 			       dev->resource - res, size);
 		}
 
-		if (start & 0x300) {
+		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
-		}
 	}
+
+	return start;
 }
 
 int
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index cef28a79103f..d29d69a37940 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -36,8 +36,10 @@ int
 pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
 		resource_size_t size, resource_size_t align,
 		resource_size_t min, unsigned int type_mask,
-		void (*alignf)(void *, struct resource *, resource_size_t,
-				resource_size_t),
+		resource_size_t (*alignf)(void *,
+					  struct resource *,
+					  resource_size_t,
+					  resource_size_t),
 		void *alignf_data)
 {
 	int i, ret = -ENOMEM;
diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c
index 52db17263d8b..f92a2dac13a2 100644
--- a/drivers/pcmcia/rsrc_mgr.c
+++ b/drivers/pcmcia/rsrc_mgr.c
@@ -114,22 +114,20 @@ struct pcmcia_align_data {
 	unsigned long	offset;
 };
 
-static void pcmcia_align(void *align_data, struct resource *res,
-			unsigned long size, unsigned long align)
+static resource_size_t pcmcia_align(void *align_data, struct resource *res,
+				resource_size_t size, resource_size_t align)
 {
 	struct pcmcia_align_data *data = align_data;
-	unsigned long start;
+	resource_size_t start;
 
 	start = (res->start & ~data->mask) + data->offset;
 	if (start < res->start)
 		start += data->mask + 1;
-	res->start = start;
 
 #ifdef CONFIG_X86
 	if (res->flags & IORESOURCE_IO) {
 		if (start & 0x300) {
 			start = (start + 0x3ff) & ~0x3ff;
-			res->start = start;
 		}
 	}
 #endif
@@ -137,9 +135,11 @@ static void pcmcia_align(void *align_data, struct resource *res,
 #ifdef CONFIG_M68K
 	if (res->flags & IORESOURCE_IO) {
 		if ((res->start + size - 1) >= 1024)
-			res->start = res->end;
+			start = res->end;
 	}
 #endif
+
+	return start;
 }
 
 
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index 9b0dc433a8c3..b65902870e44 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -533,7 +533,7 @@ struct pcmcia_align_data {
 	struct resource_map	*map;
 };
 
-static void
+static resource_size_t
 pcmcia_common_align(void *align_data, struct resource *res,
 			resource_size_t size, resource_size_t align)
 {
@@ -545,17 +545,18 @@ pcmcia_common_align(void *align_data, struct resource *res,
 	start = (res->start & ~data->mask) + data->offset;
 	if (start < res->start)
 		start += data->mask + 1;
-	res->start = start;
+	return start;
 }
 
-static void
+static resource_size_t
 pcmcia_align(void *align_data, struct resource *res, resource_size_t size,
 		resource_size_t align)
 {
 	struct pcmcia_align_data *data = align_data;
 	struct resource_map *m;
+	resource_size_t start;
 
-	pcmcia_common_align(data, res, size, align);
+	start = pcmcia_common_align(data, res, size, align);
 
 	for (m = data->map->next; m != data->map; m = m->next) {
 		unsigned long start = m->base;
@@ -567,8 +568,7 @@ pcmcia_align(void *align_data, struct resource *res, resource_size_t size,
 		 * fit here.
 		 */
 		if (res->start < start) {
-			res->start = start;
-			pcmcia_common_align(data, res, size, align);
+			start = pcmcia_common_align(data, res, size, align);
 		}
 
 		/*
@@ -586,7 +586,9 @@ pcmcia_align(void *align_data, struct resource *res, resource_size_t size,
 	 * If we failed to find something suitable, ensure we fail.
 	 */
 	if (m == data->map)
-		res->start = res->end;
+		start = res->end;
+
+	return start;
 }
 
 /*
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 7129504e053d..f4195de4eb9d 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -120,8 +120,10 @@ extern void insert_resource_expand_to_fit(struct resource *root, struct resource
 extern int allocate_resource(struct resource *root, struct resource *new,
 			     resource_size_t size, resource_size_t min,
 			     resource_size_t max, resource_size_t align,
-			     void (*alignf)(void *, struct resource *,
-					    resource_size_t, resource_size_t),
+			     resource_size_t (*alignf)(void *,
+						       struct resource *,
+						       resource_size_t,
+						       resource_size_t),
 			     void *alignf_data);
 int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 16f48e7506ed..1bd9f52b24b8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -592,7 +592,8 @@ int __must_check pcibios_enable_device(struct pci_dev *, int mask);
 char *pcibios_setup(char *str);
 
 /* Used only when drivers/pci/setup.c is used */
-void pcibios_align_resource(void *, struct resource *, resource_size_t,
+resource_size_t pcibios_align_resource(void *, struct resource *,
+				resource_size_t,
 				resource_size_t);
 void pcibios_update_irq(struct pci_dev *, int irq);
 
@@ -827,8 +828,9 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 			struct resource *res, resource_size_t size,
 			resource_size_t align, resource_size_t min,
 			unsigned int type_mask,
-			void (*alignf)(void *, struct resource *,
-				resource_size_t, resource_size_t),
+			resource_size_t (*alignf)(void *, struct resource *,
+						  resource_size_t,
+						  resource_size_t),
 			void *alignf_data);
 void pci_enable_bridges(struct pci_bus *bus);
 
diff --git a/kernel/resource.c b/kernel/resource.c
index af96c1e4b54b..e697f20e2288 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -303,8 +303,10 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 static int find_resource(struct resource *root, struct resource *new,
 			 resource_size_t size, resource_size_t min,
 			 resource_size_t max, resource_size_t align,
-			 void (*alignf)(void *, struct resource *,
-					resource_size_t, resource_size_t),
+			 resource_size_t (*alignf)(void *,
+						   struct resource *,
+						   resource_size_t,
+						   resource_size_t),
 			 void *alignf_data)
 {
 	struct resource *this = root->child;
@@ -330,7 +332,7 @@ static int find_resource(struct resource *root, struct resource *new,
 			tmp.end = max;
 		tmp.start = ALIGN(tmp.start, align);
 		if (alignf)
-			alignf(alignf_data, &tmp, size, align);
+			tmp.start = alignf(alignf_data, &tmp, size, align);
 		if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) {
 			new->start = tmp.start;
 			new->end = tmp.start + size - 1;
@@ -358,8 +360,10 @@ static int find_resource(struct resource *root, struct resource *new,
 int allocate_resource(struct resource *root, struct resource *new,
 		      resource_size_t size, resource_size_t min,
 		      resource_size_t max, resource_size_t align,
-		      void (*alignf)(void *, struct resource *,
-				     resource_size_t, resource_size_t),
+		      resource_size_t (*alignf)(void *,
+						struct resource *,
+						resource_size_t,
+						resource_size_t),
 		      void *alignf_data)
 {
 	int err;
-- 
cgit v1.2.3


From 3b7a17fcdae532d29dffab9d564a28be08960988 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Fri, 1 Jan 2010 17:40:50 +0100
Subject: resource/PCI: mark struct resource as const

Now that we return the new resource start position, there is no
need to update "struct resource" inside the align function.
Therefore, mark the struct resource as const.

Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/alpha/kernel/pci.c                 | 2 +-
 arch/arm/kernel/bios32.c                | 2 +-
 arch/cris/arch-v32/drivers/pci/bios.c   | 2 +-
 arch/frv/mb93090-mb00/pci-frv.c         | 2 +-
 arch/ia64/pci/pci.c                     | 2 +-
 arch/mips/pci/pci.c                     | 2 +-
 arch/mips/pmc-sierra/yosemite/ht.c      | 2 +-
 arch/mn10300/unit-asb2305/pci-asb2305.c | 2 +-
 arch/parisc/kernel/pci.c                | 2 +-
 arch/powerpc/kernel/pci-common.c        | 2 +-
 arch/sh/drivers/pci/pci.c               | 2 +-
 arch/sparc/kernel/pci.c                 | 2 +-
 arch/sparc/kernel/pcic.c                | 2 +-
 arch/x86/pci/i386.c                     | 2 +-
 arch/xtensa/kernel/pci.c                | 4 ++--
 drivers/pci/bus.c                       | 2 +-
 drivers/pcmcia/rsrc_mgr.c               | 3 ++-
 drivers/pcmcia/rsrc_nonstatic.c         | 6 +++---
 include/linux/ioport.h                  | 2 +-
 include/linux/pci.h                     | 5 +++--
 kernel/resource.c                       | 4 ++--
 21 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 5cf111ed851c..c9ab94ee1ca8 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -127,7 +127,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
 #define GB			(1024*MB)
 
 resource_size_t
-pcibios_align_resource(void *data, struct resource *res,
+pcibios_align_resource(void *data, const struct resource *res,
 		       resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index a7c85f84f06c..bd397e0b663e 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -616,7 +616,7 @@ char * __init pcibios_setup(char *str)
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might be mirrored at 0x0100-0x03ff..
  */
-resource_size_t pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
 	resource_size_t start = res->start;
diff --git a/arch/cris/arch-v32/drivers/pci/bios.c b/arch/cris/arch-v32/drivers/pci/bios.c
index 5811e2f8945c..d4b9c36ddc0f 100644
--- a/arch/cris/arch-v32/drivers/pci/bios.c
+++ b/arch/cris/arch-v32/drivers/pci/bios.c
@@ -42,7 +42,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 }
 
 resource_size_t
-pcibios_align_resource(void *data, struct resource *res,
+pcibios_align_resource(void *data, const struct resource *res,
 		       resource_size_t size, resource_size_t align)
 {
 	resource_size_t start = res->start;
diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c
index c947aa43f2a9..1ed15d7fea20 100644
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@@ -33,7 +33,7 @@
  * which might have be mirrored at 0x0100-0x03ff..
  */
 resource_size_t
-pcibios_align_resource(void *data, struct resource *res,
+pcibios_align_resource(void *data, const struct resource *res,
 		       resource_size_t size, resource_size_t align)
 {
 	resource_size_t start = res->start;
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index ef574cd311d1..783c83bb2b49 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -548,7 +548,7 @@ pcibios_disable_device (struct pci_dev *dev)
 }
 
 resource_size_t
-pcibios_align_resource (void *data, struct resource *res,
+pcibios_align_resource (void *data, const struct resource *res,
 		        resource_size_t size, resource_size_t align)
 {
 	return res->start;
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 9085988f7cf3..f87f5e188fa6 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -50,7 +50,7 @@ static int pci_initialized;
  * which might have be mirrored at 0x0100-0x03ff..
  */
 resource_size_t
-pcibios_align_resource(void *data, struct resource *res,
+pcibios_align_resource(void *data, const struct resource *res,
 		       resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
diff --git a/arch/mips/pmc-sierra/yosemite/ht.c b/arch/mips/pmc-sierra/yosemite/ht.c
index 5e410087b628..fd22597edb64 100644
--- a/arch/mips/pmc-sierra/yosemite/ht.c
+++ b/arch/mips/pmc-sierra/yosemite/ht.c
@@ -345,7 +345,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
         return pcibios_enable_resources(dev);
 }
 
-resource_size_t pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
         struct pci_dev *dev = data;
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.c b/arch/mn10300/unit-asb2305/pci-asb2305.c
index b0b3758fba08..d6119b879a98 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.c
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.c
@@ -31,7 +31,7 @@
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-resource_size_t pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
 	resource_size_t start = res->start;
diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c
index 4463a31973ba..5179e5e6f8fb 100644
--- a/arch/parisc/kernel/pci.c
+++ b/arch/parisc/kernel/pci.c
@@ -254,7 +254,7 @@ EXPORT_SYMBOL(pcibios_bus_to_resource);
  * Since we are just checking candidates, don't use any fields other
  * than res->start.
  */
-resource_size_t pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t alignment)
 {
 	resource_size_t mask, align, start = res->start;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index d7eebbaf01f1..e640810e813f 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1181,7 +1181,7 @@ static int skip_isa_ioresource_align(struct pci_dev *dev)
  * but we want to try to avoid allocating at 0x2900-0x2bff
  * which might have be mirrored at 0x0100-0x03ff..
  */
-resource_size_t pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index b36ca825b3d3..96213fd172ce 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -148,7 +148,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
  * addresses to be allocated in the 0x000-0x0ff region
  * modulo 0x400.
  */
-resource_size_t pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index d56c1f34689e..37b66c60abe3 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -722,7 +722,7 @@ void pcibios_update_irq(struct pci_dev *pdev, int irq)
 {
 }
 
-resource_size_t pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
 	return res->start;
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 46fda64ff163..75e88c00bca3 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -768,7 +768,7 @@ char * __devinit pcibios_setup(char *str)
 	return str;
 }
 
-resource_size_t pcibios_align_resource(void *data, struct resource *res,
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
 	return res->start;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 924e40c916d3..5a8fbf8d4cac 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -61,7 +61,7 @@ skip_isa_ioresource_align(struct pci_dev *dev) {
  * which might have be mirrored at 0x0100-0x03ff..
  */
 resource_size_t
-pcibios_align_resource(void *data, struct resource *res,
+pcibios_align_resource(void *data, const struct resource *res,
 			resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index d7efab03164e..cd1026931203 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -70,8 +70,8 @@ static int pci_bus_count;
  * which might have be mirrored at 0x0100-0x03ff..
  */
 resource_size_t
-pcibios_align_resource(void *data, struct resource *res, resource_size_t size,
-    		       resource_size_t align)
+pcibios_align_resource(void *data, const struct resource *res,
+		       resource_size_t size, resource_size_t align)
 {
 	struct pci_dev *dev = data;
 	resource_size_t start = res->start;
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index d29d69a37940..a26135bb0ffd 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -37,7 +37,7 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
 		resource_size_t size, resource_size_t align,
 		resource_size_t min, unsigned int type_mask,
 		resource_size_t (*alignf)(void *,
-					  struct resource *,
+					  const struct resource *,
 					  resource_size_t,
 					  resource_size_t),
 		void *alignf_data)
diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c
index f92a2dac13a2..f8401a0ef89b 100644
--- a/drivers/pcmcia/rsrc_mgr.c
+++ b/drivers/pcmcia/rsrc_mgr.c
@@ -114,7 +114,8 @@ struct pcmcia_align_data {
 	unsigned long	offset;
 };
 
-static resource_size_t pcmcia_align(void *align_data, struct resource *res,
+static resource_size_t pcmcia_align(void *align_data,
+				const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
 	struct pcmcia_align_data *data = align_data;
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index b65902870e44..45d75dc452f0 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -534,7 +534,7 @@ struct pcmcia_align_data {
 };
 
 static resource_size_t
-pcmcia_common_align(void *align_data, struct resource *res,
+pcmcia_common_align(void *align_data, const struct resource *res,
 			resource_size_t size, resource_size_t align)
 {
 	struct pcmcia_align_data *data = align_data;
@@ -549,8 +549,8 @@ pcmcia_common_align(void *align_data, struct resource *res,
 }
 
 static resource_size_t
-pcmcia_align(void *align_data, struct resource *res, resource_size_t size,
-		resource_size_t align)
+pcmcia_align(void *align_data, const struct resource *res,
+	resource_size_t size, resource_size_t align)
 {
 	struct pcmcia_align_data *data = align_data;
 	struct resource_map *m;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index f4195de4eb9d..4a811891dbe3 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -121,7 +121,7 @@ extern int allocate_resource(struct resource *root, struct resource *new,
 			     resource_size_t size, resource_size_t min,
 			     resource_size_t max, resource_size_t align,
 			     resource_size_t (*alignf)(void *,
-						       struct resource *,
+						       const struct resource *,
 						       resource_size_t,
 						       resource_size_t),
 			     void *alignf_data);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1bd9f52b24b8..449d51560fc2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -592,7 +592,7 @@ int __must_check pcibios_enable_device(struct pci_dev *, int mask);
 char *pcibios_setup(char *str);
 
 /* Used only when drivers/pci/setup.c is used */
-resource_size_t pcibios_align_resource(void *, struct resource *,
+resource_size_t pcibios_align_resource(void *, const struct resource *,
 				resource_size_t,
 				resource_size_t);
 void pcibios_update_irq(struct pci_dev *, int irq);
@@ -828,7 +828,8 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 			struct resource *res, resource_size_t size,
 			resource_size_t align, resource_size_t min,
 			unsigned int type_mask,
-			resource_size_t (*alignf)(void *, struct resource *,
+			resource_size_t (*alignf)(void *,
+						  const struct resource *,
 						  resource_size_t,
 						  resource_size_t),
 			void *alignf_data);
diff --git a/kernel/resource.c b/kernel/resource.c
index e697f20e2288..7fd123ad00aa 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -304,7 +304,7 @@ static int find_resource(struct resource *root, struct resource *new,
 			 resource_size_t size, resource_size_t min,
 			 resource_size_t max, resource_size_t align,
 			 resource_size_t (*alignf)(void *,
-						   struct resource *,
+						   const struct resource *,
 						   resource_size_t,
 						   resource_size_t),
 			 void *alignf_data)
@@ -361,7 +361,7 @@ int allocate_resource(struct resource *root, struct resource *new,
 		      resource_size_t size, resource_size_t min,
 		      resource_size_t max, resource_size_t align,
 		      resource_size_t (*alignf)(void *,
-						struct resource *,
+						const struct resource *,
 						resource_size_t,
 						resource_size_t),
 		      void *alignf_data)
-- 
cgit v1.2.3


From 5eeec0ec931a01e85b3701ce121b7d8a1800ec60 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 22 Dec 2009 15:02:22 -0800
Subject: resource: add release_child_resources

Useful for freeing a portion of the resource tree, e.g. when trying to
reallocate resources more efficiently.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/ioport.h |  1 +
 kernel/resource.c      | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 4a811891dbe3..dda98410d588 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -112,6 +112,7 @@ extern struct resource iomem_resource;
 
 extern int request_resource(struct resource *root, struct resource *new);
 extern int release_resource(struct resource *new);
+void release_child_resources(struct resource *new);
 extern void reserve_region_with_split(struct resource *root,
 			     resource_size_t start, resource_size_t end,
 			     const char *name);
diff --git a/kernel/resource.c b/kernel/resource.c
index 7fd123ad00aa..24e9e60c1459 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -188,6 +188,36 @@ static int __release_resource(struct resource *old)
 	return -EINVAL;
 }
 
+static void __release_child_resources(struct resource *r)
+{
+	struct resource *tmp, *p;
+	resource_size_t size;
+
+	p = r->child;
+	r->child = NULL;
+	while (p) {
+		tmp = p;
+		p = p->sibling;
+
+		tmp->parent = NULL;
+		tmp->sibling = NULL;
+		__release_child_resources(tmp);
+
+		printk(KERN_DEBUG "release child resource %pR\n", tmp);
+		/* need to restore size, and keep flags */
+		size = resource_size(tmp);
+		tmp->start = 0;
+		tmp->end = size - 1;
+	}
+}
+
+void release_child_resources(struct resource *r)
+{
+	write_lock(&resource_lock);
+	__release_child_resources(r);
+	write_unlock(&resource_lock);
+}
+
 /**
  * request_resource - request and reserve an I/O or memory resource
  * @root: root resource descriptor
-- 
cgit v1.2.3


From 41a68a748bbc61f5bcea999e33ba72926dfbe6f7 Mon Sep 17 00:00:00 2001
From: Tilman Schmidt <tilman@imap.cc>
Date: Mon, 18 Jan 2010 17:24:10 +0100
Subject: PCI: push deprecated pci_find_device() function to last user

The ISDN4Linux HiSax driver family contains the last remaining users
of the deprecated pci_find_device() function. This patch creates a
private copy of that function in HiSax, and removes the now unused
global function together with its controlling configuration option,
CONFIG_PCI_LEGACY.

Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/isdn/hisax/Kconfig        | 18 +++++++++---------
 drivers/isdn/hisax/avm_pci.c      |  6 +++---
 drivers/isdn/hisax/bkm_a4t.c      |  2 +-
 drivers/isdn/hisax/bkm_a8.c       |  2 +-
 drivers/isdn/hisax/diva.c         | 14 +++++++-------
 drivers/isdn/hisax/elsa.c         |  8 ++++----
 drivers/isdn/hisax/enternow_pci.c |  2 +-
 drivers/isdn/hisax/gazel.c        |  8 ++++----
 drivers/isdn/hisax/hfc_pci.c      |  2 +-
 drivers/isdn/hisax/hisax.h        | 23 +++++++++++++++++++++++
 drivers/isdn/hisax/niccy.c        |  6 +++---
 drivers/isdn/hisax/nj_s.c         |  2 +-
 drivers/isdn/hisax/nj_u.c         |  2 +-
 drivers/isdn/hisax/sedlbauer.c    |  6 +++---
 drivers/isdn/hisax/telespci.c     |  2 +-
 drivers/isdn/hisax/w6692.c        |  2 +-
 drivers/pci/Kconfig               | 11 -----------
 drivers/pci/Makefile              |  3 ---
 drivers/pci/legacy.c              | 34 ----------------------------------
 include/linux/pci.h               | 13 -------------
 20 files changed, 64 insertions(+), 102 deletions(-)
 delete mode 100644 drivers/pci/legacy.c

(limited to 'include/linux')

diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig
index 3464ebc4cdbc..452fde9edf86 100644
--- a/drivers/isdn/hisax/Kconfig
+++ b/drivers/isdn/hisax/Kconfig
@@ -109,7 +109,7 @@ config HISAX_16_3
 
 config HISAX_TELESPCI
 	bool "Teles PCI"
-	depends on PCI && PCI_LEGACY && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
+	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the Teles PCI.
 	  See <file:Documentation/isdn/README.HiSax> on how to configure it.
@@ -237,7 +237,7 @@ config HISAX_MIC
 
 config HISAX_NETJET
 	bool "NETjet card"
-	depends on PCI && PCI_LEGACY && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
+	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the NetJet from Traverse
 	  Technologies.
@@ -248,7 +248,7 @@ config HISAX_NETJET
 
 config HISAX_NETJET_U
 	bool "NETspider U card"
-	depends on PCI && PCI_LEGACY && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
+	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the Netspider U interface ISDN card
 	  from Traverse Technologies.
@@ -287,7 +287,7 @@ config HISAX_HSTSAPHIR
 
 config HISAX_BKM_A4T
 	bool "Telekom A4T card"
-	depends on PCI && PCI_LEGACY
+	depends on PCI
 	help
 	  This enables HiSax support for the Telekom A4T card.
 
@@ -297,7 +297,7 @@ config HISAX_BKM_A4T
 
 config HISAX_SCT_QUADRO
 	bool "Scitel Quadro card"
-	depends on PCI && PCI_LEGACY
+	depends on PCI
 	help
 	  This enables HiSax support for the Scitel Quadro card.
 
@@ -316,7 +316,7 @@ config HISAX_GAZEL
 
 config HISAX_HFC_PCI
 	bool "HFC PCI-Bus cards"
-	depends on PCI && PCI_LEGACY && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
+	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the HFC-S PCI 2BDS0 based cards.
 
@@ -325,7 +325,7 @@ config HISAX_HFC_PCI
 
 config HISAX_W6692
 	bool "Winbond W6692 based cards"
-	depends on PCI && PCI_LEGACY
+	depends on PCI
 	help
 	  This enables HiSax support for Winbond W6692 based PCI ISDN cards.
 
@@ -341,7 +341,7 @@ config HISAX_HFC_SX
 
 config HISAX_ENTERNOW_PCI
 	bool "Formula-n enter:now PCI card"
-	depends on HISAX_NETJET && PCI && PCI_LEGACY && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
+	depends on HISAX_NETJET && PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV))
 	help
 	  This enables HiSax support for the Formula-n enter:now PCI
 	  ISDN card.
@@ -412,7 +412,7 @@ config HISAX_HFC4S8S
 
 config HISAX_FRITZ_PCIPNP
 	tristate "AVM Fritz!Card PCI/PCIv2/PnP support (EXPERIMENTAL)"
-	depends on PCI && PCI_LEGACY && EXPERIMENTAL
+	depends on PCI && EXPERIMENTAL
 	help
 	  This enables the driver for the AVM Fritz!Card PCI,
 	  Fritz!Card PCI v2 and Fritz!Card PnP.
diff --git a/drivers/isdn/hisax/avm_pci.c b/drivers/isdn/hisax/avm_pci.c
index 7cabc5a19492..14295a155e71 100644
--- a/drivers/isdn/hisax/avm_pci.c
+++ b/drivers/isdn/hisax/avm_pci.c
@@ -822,7 +822,7 @@ static int __devinit avm_pnp_setup(struct IsdnCardState *cs)
 
 #endif /* __ISAPNP__ */
 
-#ifndef CONFIG_PCI_LEGACY
+#ifndef CONFIG_PCI
 
 static int __devinit avm_pci_setup(struct IsdnCardState *cs)
 {
@@ -835,7 +835,7 @@ static struct pci_dev *dev_avm __devinitdata = NULL;
 
 static int __devinit avm_pci_setup(struct IsdnCardState *cs)
 {
-	if ((dev_avm = pci_find_device(PCI_VENDOR_ID_AVM,
+	if ((dev_avm = hisax_find_pci_device(PCI_VENDOR_ID_AVM,
 		PCI_DEVICE_ID_AVM_A1, dev_avm))) {
 
 		if (pci_enable_device(dev_avm))
@@ -864,7 +864,7 @@ static int __devinit avm_pci_setup(struct IsdnCardState *cs)
 	return (1);
 }
 
-#endif /* CONFIG_PCI_LEGACY */
+#endif /* CONFIG_PCI */
 
 int __devinit
 setup_avm_pcipnp(struct IsdnCard *card)
diff --git a/drivers/isdn/hisax/bkm_a4t.c b/drivers/isdn/hisax/bkm_a4t.c
index 9ca2ee54cc94..9f2009c0b69c 100644
--- a/drivers/isdn/hisax/bkm_a4t.c
+++ b/drivers/isdn/hisax/bkm_a4t.c
@@ -340,7 +340,7 @@ setup_bkm_a4t(struct IsdnCard *card)
 	} else
 		return (0);
 
-	while ((dev_a4t = pci_find_device(PCI_VENDOR_ID_ZORAN,
+	while ((dev_a4t = hisax_find_pci_device(PCI_VENDOR_ID_ZORAN,
 		PCI_DEVICE_ID_ZORAN_36120, dev_a4t))) {
 		ret = a4t_pci_probe(dev_a4t, cs, &found, &pci_memaddr);
 		if (!ret)
diff --git a/drivers/isdn/hisax/bkm_a8.c b/drivers/isdn/hisax/bkm_a8.c
index e1ff4717a8a6..e775706c60e3 100644
--- a/drivers/isdn/hisax/bkm_a8.c
+++ b/drivers/isdn/hisax/bkm_a8.c
@@ -301,7 +301,7 @@ setup_sct_quadro(struct IsdnCard *card)
 		(sub_vendor_id != PCI_VENDOR_ID_BERKOM)))
 		return (0);
 	if (cs->subtyp == SCT_1) {
-		while ((dev_a8 = pci_find_device(PCI_VENDOR_ID_PLX,
+		while ((dev_a8 = hisax_find_pci_device(PCI_VENDOR_ID_PLX,
 			PCI_DEVICE_ID_PLX_9050, dev_a8))) {
 			
 			sub_vendor_id = dev_a8->subsystem_vendor;
diff --git a/drivers/isdn/hisax/diva.c b/drivers/isdn/hisax/diva.c
index 0b0c2e5d806b..780da9bda915 100644
--- a/drivers/isdn/hisax/diva.c
+++ b/drivers/isdn/hisax/diva.c
@@ -1148,7 +1148,7 @@ static int __devinit setup_diva_isapnp(struct IsdnCard *card)
 
 #endif	/* ISAPNP */
 
-#ifdef CONFIG_PCI_LEGACY
+#ifdef CONFIG_PCI
 static struct pci_dev *dev_diva __devinitdata = NULL;
 static struct pci_dev *dev_diva_u __devinitdata = NULL;
 static struct pci_dev *dev_diva201 __devinitdata = NULL;
@@ -1159,21 +1159,21 @@ static int __devinit setup_diva_pci(struct IsdnCard *card)
 	struct IsdnCardState *cs = card->cs;
 
 	cs->subtyp = 0;
-	if ((dev_diva = pci_find_device(PCI_VENDOR_ID_EICON,
+	if ((dev_diva = hisax_find_pci_device(PCI_VENDOR_ID_EICON,
 		PCI_DEVICE_ID_EICON_DIVA20, dev_diva))) {
 		if (pci_enable_device(dev_diva))
 			return(0);
 		cs->subtyp = DIVA_PCI;
 		cs->irq = dev_diva->irq;
 		cs->hw.diva.cfg_reg = pci_resource_start(dev_diva, 2);
-	} else if ((dev_diva_u = pci_find_device(PCI_VENDOR_ID_EICON,
+	} else if ((dev_diva_u = hisax_find_pci_device(PCI_VENDOR_ID_EICON,
 		PCI_DEVICE_ID_EICON_DIVA20_U, dev_diva_u))) {
 		if (pci_enable_device(dev_diva_u))
 			return(0);
 		cs->subtyp = DIVA_PCI;
 		cs->irq = dev_diva_u->irq;
 		cs->hw.diva.cfg_reg = pci_resource_start(dev_diva_u, 2);
-	} else if ((dev_diva201 = pci_find_device(PCI_VENDOR_ID_EICON,
+	} else if ((dev_diva201 = hisax_find_pci_device(PCI_VENDOR_ID_EICON,
 		PCI_DEVICE_ID_EICON_DIVA201, dev_diva201))) {
 		if (pci_enable_device(dev_diva201))
 			return(0);
@@ -1183,7 +1183,7 @@ static int __devinit setup_diva_pci(struct IsdnCard *card)
 			(ulong) ioremap(pci_resource_start(dev_diva201, 0), 4096);
 		cs->hw.diva.cfg_reg =
 			(ulong) ioremap(pci_resource_start(dev_diva201, 1), 4096);
-	} else if ((dev_diva202 = pci_find_device(PCI_VENDOR_ID_EICON,
+	} else if ((dev_diva202 = hisax_find_pci_device(PCI_VENDOR_ID_EICON,
 		PCI_DEVICE_ID_EICON_DIVA202, dev_diva202))) {
 		if (pci_enable_device(dev_diva202))
 			return(0);
@@ -1229,14 +1229,14 @@ static int __devinit setup_diva_pci(struct IsdnCard *card)
 	return (1);		/* card found */
 }
 
-#else	/* if !CONFIG_PCI_LEGACY */
+#else	/* if !CONFIG_PCI */
 
 static int __devinit setup_diva_pci(struct IsdnCard *card)
 {
 	return (-1);	/* card not found; continue search */
 }
 
-#endif	/* CONFIG_PCI_LEGACY */
+#endif	/* CONFIG_PCI */
 
 int __devinit
 setup_diva(struct IsdnCard *card)
diff --git a/drivers/isdn/hisax/elsa.c b/drivers/isdn/hisax/elsa.c
index aa29d1cf16af..23c41fcd864e 100644
--- a/drivers/isdn/hisax/elsa.c
+++ b/drivers/isdn/hisax/elsa.c
@@ -1025,7 +1025,7 @@ setup_elsa_pcmcia(struct IsdnCard *card)
 	       cs->irq);
 }
 
-#ifdef CONFIG_PCI_LEGACY
+#ifdef CONFIG_PCI
 static 	struct pci_dev *dev_qs1000 __devinitdata = NULL;
 static 	struct pci_dev *dev_qs3000 __devinitdata = NULL;
 
@@ -1035,7 +1035,7 @@ setup_elsa_pci(struct IsdnCard *card)
 	struct IsdnCardState *cs = card->cs;
 
 	cs->subtyp = 0;
-	if ((dev_qs1000 = pci_find_device(PCI_VENDOR_ID_ELSA,
+	if ((dev_qs1000 = hisax_find_pci_device(PCI_VENDOR_ID_ELSA,
 		PCI_DEVICE_ID_ELSA_MICROLINK, dev_qs1000))) {
 		if (pci_enable_device(dev_qs1000))
 			return(0);
@@ -1043,7 +1043,7 @@ setup_elsa_pci(struct IsdnCard *card)
 		cs->irq = dev_qs1000->irq;
 		cs->hw.elsa.cfg = pci_resource_start(dev_qs1000, 1);
 		cs->hw.elsa.base = pci_resource_start(dev_qs1000, 3);
-	} else if ((dev_qs3000 = pci_find_device(PCI_VENDOR_ID_ELSA,
+	} else if ((dev_qs3000 = hisax_find_pci_device(PCI_VENDOR_ID_ELSA,
 		PCI_DEVICE_ID_ELSA_QS3000, dev_qs3000))) {
 		if (pci_enable_device(dev_qs3000))
 			return(0);
@@ -1093,7 +1093,7 @@ setup_elsa_pci(struct IsdnCard *card)
 {
 	return (1);
 }
-#endif /* CONFIG_PCI_LEGACY */
+#endif /* CONFIG_PCI */
 
 static int __devinit
 setup_elsa_common(struct IsdnCard *card)
diff --git a/drivers/isdn/hisax/enternow_pci.c b/drivers/isdn/hisax/enternow_pci.c
index 39f421ed8de8..26264abf1f58 100644
--- a/drivers/isdn/hisax/enternow_pci.c
+++ b/drivers/isdn/hisax/enternow_pci.c
@@ -406,7 +406,7 @@ setup_enternow_pci(struct IsdnCard *card)
 
 	for ( ;; )
 	{
-		if ((dev_netjet = pci_find_device(PCI_VENDOR_ID_TIGERJET,
+		if ((dev_netjet = hisax_find_pci_device(PCI_VENDOR_ID_TIGERJET,
 			PCI_DEVICE_ID_TIGERJET_300,  dev_netjet))) {
 			ret = en_pci_probe(dev_netjet, cs);
 			if (!ret)
diff --git a/drivers/isdn/hisax/gazel.c b/drivers/isdn/hisax/gazel.c
index 0ea3b4607680..353982fc1436 100644
--- a/drivers/isdn/hisax/gazel.c
+++ b/drivers/isdn/hisax/gazel.c
@@ -531,7 +531,7 @@ setup_gazelisa(struct IsdnCard *card, struct IsdnCardState *cs)
 	return (0);
 }
 
-#ifdef CONFIG_PCI_LEGACY
+#ifdef CONFIG_PCI
 static struct pci_dev *dev_tel __devinitdata = NULL;
 
 static int __devinit
@@ -546,7 +546,7 @@ setup_gazelpci(struct IsdnCardState *cs)
 	found = 0;
 	seekcard = PCI_DEVICE_ID_PLX_R685;
 	for (nbseek = 0; nbseek < 4; nbseek++) {
-		if ((dev_tel = pci_find_device(PCI_VENDOR_ID_PLX,
+		if ((dev_tel = hisax_find_pci_device(PCI_VENDOR_ID_PLX,
 					seekcard, dev_tel))) {
 			if (pci_enable_device(dev_tel))
 				return 1;
@@ -620,7 +620,7 @@ setup_gazelpci(struct IsdnCardState *cs)
 
 	return (0);
 }
-#endif /* CONFIG_PCI_LEGACY */
+#endif /* CONFIG_PCI */
 
 int __devinit
 setup_gazel(struct IsdnCard *card)
@@ -640,7 +640,7 @@ setup_gazel(struct IsdnCard *card)
 			return (0);
 	} else {
 
-#ifdef CONFIG_PCI_LEGACY
+#ifdef CONFIG_PCI
 		if (setup_gazelpci(cs))
 			return (0);
 #else
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index 10914731b304..917cc84065bd 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -1658,7 +1658,7 @@ setup_hfcpci(struct IsdnCard *card)
 
 	i = 0;
 	while (id_list[i].vendor_id) {
-		tmp_hfcpci = pci_find_device(id_list[i].vendor_id,
+		tmp_hfcpci = hisax_find_pci_device(id_list[i].vendor_id,
 					     id_list[i].device_id,
 					     dev_hfcpci);
 		i++;
diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h
index 0685c1946969..832a87855ffb 100644
--- a/drivers/isdn/hisax/hisax.h
+++ b/drivers/isdn/hisax/hisax.h
@@ -1323,3 +1323,26 @@ void release_tei(struct IsdnCardState *cs);
 char *HiSax_getrev(const char *revision);
 int TeiNew(void);
 void TeiFree(void);
+
+#ifdef CONFIG_PCI
+
+#include <linux/pci.h>
+
+/* adaptation wrapper for old usage
+ * WARNING! This is unfit for use in a PCI hotplug environment,
+ * as the returned PCI device can disappear at any moment in time.
+ * Callers should be converted to use pci_get_device() instead.
+ */
+static inline struct pci_dev *hisax_find_pci_device(unsigned int vendor,
+						    unsigned int device,
+						    struct pci_dev *from)
+{
+	struct pci_dev *pdev;
+
+	pci_dev_get(from);
+	pdev = pci_get_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
+	pci_dev_put(pdev);
+	return pdev;
+}
+
+#endif
diff --git a/drivers/isdn/hisax/niccy.c b/drivers/isdn/hisax/niccy.c
index ef00633e1d2a..ccaa6e13310f 100644
--- a/drivers/isdn/hisax/niccy.c
+++ b/drivers/isdn/hisax/niccy.c
@@ -297,12 +297,12 @@ int __devinit setup_niccy(struct IsdnCard *card)
 			return 0;
 		}
 	} else {
-#ifdef CONFIG_PCI_LEGACY
+#ifdef CONFIG_PCI
 		static struct pci_dev *niccy_dev __devinitdata;
 
 		u_int pci_ioaddr;
 		cs->subtyp = 0;
-		if ((niccy_dev = pci_find_device(PCI_VENDOR_ID_SATSAGEM,
+		if ((niccy_dev = hisax_find_pci_device(PCI_VENDOR_ID_SATSAGEM,
 						 PCI_DEVICE_ID_SATSAGEM_NICCY,
 						 niccy_dev))) {
 			if (pci_enable_device(niccy_dev))
@@ -354,7 +354,7 @@ int __devinit setup_niccy(struct IsdnCard *card)
 		printk(KERN_WARNING "Niccy: io0 0 and NO_PCI_BIOS\n");
 		printk(KERN_WARNING "Niccy: unable to config NICCY PCI\n");
 		return 0;
-#endif				/* CONFIG_PCI_LEGACY */
+#endif				/* CONFIG_PCI */
 	}
 	printk(KERN_INFO "HiSax: NICCY %s config irq:%d data:0x%X ale:0x%X\n",
 		(cs->subtyp == 1) ? "PnP" : "PCI",
diff --git a/drivers/isdn/hisax/nj_s.c b/drivers/isdn/hisax/nj_s.c
index 8d36ccc87d81..2344e7b33448 100644
--- a/drivers/isdn/hisax/nj_s.c
+++ b/drivers/isdn/hisax/nj_s.c
@@ -276,7 +276,7 @@ setup_netjet_s(struct IsdnCard *card)
 
 	for ( ;; )
 	{
-		if ((dev_netjet = pci_find_device(PCI_VENDOR_ID_TIGERJET,
+		if ((dev_netjet = hisax_find_pci_device(PCI_VENDOR_ID_TIGERJET,
 			PCI_DEVICE_ID_TIGERJET_300,  dev_netjet))) {
 			ret = njs_pci_probe(dev_netjet, cs);
 			if (!ret)
diff --git a/drivers/isdn/hisax/nj_u.c b/drivers/isdn/hisax/nj_u.c
index d306c946ffba..095e974aed80 100644
--- a/drivers/isdn/hisax/nj_u.c
+++ b/drivers/isdn/hisax/nj_u.c
@@ -240,7 +240,7 @@ setup_netjet_u(struct IsdnCard *card)
 
 	for ( ;; )
 	{
-		if ((dev_netjet = pci_find_device(PCI_VENDOR_ID_TIGERJET,
+		if ((dev_netjet = hisax_find_pci_device(PCI_VENDOR_ID_TIGERJET,
 			PCI_DEVICE_ID_TIGERJET_300,  dev_netjet))) {
 			ret = nju_pci_probe(dev_netjet, cs);
 			if (!ret)
diff --git a/drivers/isdn/hisax/sedlbauer.c b/drivers/isdn/hisax/sedlbauer.c
index 5569a522e2a1..69dfc8d29017 100644
--- a/drivers/isdn/hisax/sedlbauer.c
+++ b/drivers/isdn/hisax/sedlbauer.c
@@ -598,7 +598,7 @@ setup_sedlbauer_isapnp(struct IsdnCard *card, int *bytecnt)
 }
 #endif /* __ISAPNP__ */
 
-#ifdef CONFIG_PCI_LEGACY
+#ifdef CONFIG_PCI
 static struct pci_dev *dev_sedl __devinitdata = NULL;
 
 static int __devinit
@@ -607,7 +607,7 @@ setup_sedlbauer_pci(struct IsdnCard *card)
 	struct IsdnCardState *cs = card->cs;
 	u16 sub_vendor_id, sub_id;
 
-	if ((dev_sedl = pci_find_device(PCI_VENDOR_ID_TIGERJET,
+	if ((dev_sedl = hisax_find_pci_device(PCI_VENDOR_ID_TIGERJET,
 			PCI_DEVICE_ID_TIGERJET_100, dev_sedl))) {
 		if (pci_enable_device(dev_sedl))
 			return(0);
@@ -673,7 +673,7 @@ setup_sedlbauer_pci(struct IsdnCard *card)
 	return (1);
 }
 
-#endif /* CONFIG_PCI_LEGACY */
+#endif /* CONFIG_PCI */
 
 int __devinit
 setup_sedlbauer(struct IsdnCard *card)
diff --git a/drivers/isdn/hisax/telespci.c b/drivers/isdn/hisax/telespci.c
index 28b08de4673d..b85ceb3746ce 100644
--- a/drivers/isdn/hisax/telespci.c
+++ b/drivers/isdn/hisax/telespci.c
@@ -300,7 +300,7 @@ setup_telespci(struct IsdnCard *card)
 	if (cs->typ != ISDN_CTYPE_TELESPCI)
 		return (0);
 
-	if ((dev_tel = pci_find_device (PCI_VENDOR_ID_ZORAN, PCI_DEVICE_ID_ZORAN_36120, dev_tel))) {
+	if ((dev_tel = hisax_find_pci_device (PCI_VENDOR_ID_ZORAN, PCI_DEVICE_ID_ZORAN_36120, dev_tel))) {
 		if (pci_enable_device(dev_tel))
 			return(0);
 		cs->irq = dev_tel->irq;
diff --git a/drivers/isdn/hisax/w6692.c b/drivers/isdn/hisax/w6692.c
index c4d862c11a60..9d6e864023fe 100644
--- a/drivers/isdn/hisax/w6692.c
+++ b/drivers/isdn/hisax/w6692.c
@@ -1007,7 +1007,7 @@ setup_w6692(struct IsdnCard *card)
 		return (0);
 
 	while (id_list[id_idx].vendor_id) {
-		dev_w6692 = pci_find_device(id_list[id_idx].vendor_id,
+		dev_w6692 = hisax_find_pci_device(id_list[id_idx].vendor_id,
 					    id_list[id_idx].device_id,
 					    dev_w6692);
 		if (dev_w6692) {
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index b1ecefa2a23d..7858a117e80b 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -21,17 +21,6 @@ config PCI_MSI
 
 	   If you don't know what to do here, say N.
 
-config PCI_LEGACY
-	bool "Enable deprecated pci_find_* API"
-	depends on PCI
-	default y
-	help
-	  Say Y here if you want to include support for the deprecated
-	  pci_find_device() API.  Most drivers have been converted over
-	  to using the proper hotplug APIs, so this option serves to
-	  include/exclude only a few drivers that are still using this
-	  API.
-
 config PCI_DEBUG
 	bool "PCI Debugging"
 	depends on PCI && DEBUG_KERNEL
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index adb74253a996..8674c1ebe979 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -10,9 +10,6 @@ obj-$(CONFIG_SYSFS) += slot.o
 
 obj-$(CONFIG_PCI_QUIRKS) += quirks.o
 
-obj-$(CONFIG_PCI_LEGACY) += legacy.o
-CFLAGS_legacy.o += -Wno-deprecated-declarations
-
 # Build PCI Express stuff if needed
 obj-$(CONFIG_PCIEPORTBUS) += pcie/
 
diff --git a/drivers/pci/legacy.c b/drivers/pci/legacy.c
deleted file mode 100644
index 871f65c15936..000000000000
--- a/drivers/pci/legacy.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include "pci.h"
-
-/**
- * pci_find_device - begin or continue searching for a PCI device by vendor/device id
- * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
- * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
- * @from: Previous PCI device found in search, or %NULL for new search.
- *
- * Iterates through the list of known PCI devices.  If a PCI device is found
- * with a matching @vendor and @device, a pointer to its device structure is
- * returned.  Otherwise, %NULL is returned.
- * A new search is initiated by passing %NULL as the @from argument.
- * Otherwise if @from is not %NULL, searches continue from next device
- * on the global list.
- *
- * NOTE: Do not use this function any more; use pci_get_device() instead, as
- * the PCI device returned by this function can disappear at any moment in
- * time.
- */
-struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device,
-				struct pci_dev *from)
-{
-	struct pci_dev *pdev;
-
-	pci_dev_get(from);
-	pdev = pci_get_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
-	pci_dev_put(pdev);
-	return pdev;
-}
-EXPORT_SYMBOL(pci_find_device);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 449d51560fc2..8b79403cc072 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -646,12 +646,6 @@ extern void pci_sort_breadthfirst(void);
 
 /* Generic PCI functions exported to card drivers */
 
-#ifdef CONFIG_PCI_LEGACY
-struct pci_dev __deprecated *pci_find_device(unsigned int vendor,
-					     unsigned int device,
-					     struct pci_dev *from);
-#endif /* CONFIG_PCI_LEGACY */
-
 enum pci_lost_interrupt_reason {
 	PCI_LOST_IRQ_NO_INFORMATION = 0,
 	PCI_LOST_IRQ_DISABLE_MSI,
@@ -1010,13 +1004,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 _PCI_NOP_ALL(read, *)
 _PCI_NOP_ALL(write,)
 
-static inline struct pci_dev *pci_find_device(unsigned int vendor,
-					      unsigned int device,
-					      struct pci_dev *from)
-{
-	return NULL;
-}
-
 static inline struct pci_dev *pci_get_device(unsigned int vendor,
 					     unsigned int device,
 					     struct pci_dev *from)
-- 
cgit v1.2.3


From 6841ec681a88b66651e4563040b9c7a7ad25d7b5 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 22 Jan 2010 01:02:25 -0800
Subject: PCI: introduce pci_assign_unassigned_bridge_resources

For use by pciehp.

pci_setup_bridge() will not check enabled for the slot bridge, otherwise
update res is not updated to bridge BAR.  That is, bridge is already
enabled for port service.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Reviewed-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/setup-bus.c | 116 +++++++++++++++++++++++++++++++++++++-----------
 include/linux/pci.h     |   1 +
 2 files changed, 90 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index f76f6e90f3b9..b19a56b8b17a 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -71,35 +71,34 @@ static void free_failed_list(struct resource_list_x *head)
 	head->next = NULL;
 }
 
-static void pbus_assign_resources_sorted(const struct pci_bus *bus,
-					 struct resource_list_x *fail_head)
+static void __dev_sort_resources(struct pci_dev *dev,
+				 struct resource_list *head)
 {
-	struct pci_dev *dev;
-	struct resource *res;
-	struct resource_list head, *list, *tmp;
-	int idx;
+	u16 class = dev->class >> 8;
 
-	head.next = NULL;
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		u16 class = dev->class >> 8;
+	/* Don't touch classless devices or host bridges or ioapics.  */
+	if (class == PCI_CLASS_NOT_DEFINED || class == PCI_CLASS_BRIDGE_HOST)
+		return;
 
-		/* Don't touch classless devices or host bridges or ioapics.  */
-		if (class == PCI_CLASS_NOT_DEFINED ||
-		    class == PCI_CLASS_BRIDGE_HOST)
-			continue;
+	/* Don't touch ioapic devices already enabled by firmware */
+	if (class == PCI_CLASS_SYSTEM_PIC) {
+		u16 command;
+		pci_read_config_word(dev, PCI_COMMAND, &command);
+		if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
+			return;
+	}
 
-		/* Don't touch ioapic devices already enabled by firmware */
-		if (class == PCI_CLASS_SYSTEM_PIC) {
-			u16 command;
-			pci_read_config_word(dev, PCI_COMMAND, &command);
-			if (command & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY))
-				continue;
-		}
+	pdev_sort_resources(dev, head);
+}
 
-		pdev_sort_resources(dev, &head);
-	}
+static void __assign_resources_sorted(struct resource_list *head,
+				 struct resource_list_x *fail_head)
+{
+	struct resource *res;
+	struct resource_list *list, *tmp;
+	int idx;
 
-	for (list = head.next; list;) {
+	for (list = head->next; list;) {
 		res = list->res;
 		idx = res - &list->dev->resource[0];
 		if (pci_assign_resource(list->dev, idx)) {
@@ -115,6 +114,30 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus,
 	}
 }
 
+static void pdev_assign_resources_sorted(struct pci_dev *dev,
+				 struct resource_list_x *fail_head)
+{
+	struct resource_list head;
+
+	head.next = NULL;
+	__dev_sort_resources(dev, &head);
+	__assign_resources_sorted(&head, fail_head);
+
+}
+
+static void pbus_assign_resources_sorted(const struct pci_bus *bus,
+					 struct resource_list_x *fail_head)
+{
+	struct pci_dev *dev;
+	struct resource_list head;
+
+	head.next = NULL;
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		__dev_sort_resources(dev, &head);
+
+	__assign_resources_sorted(&head, fail_head);
+}
+
 void pci_setup_cardbus(struct pci_bus *bus)
 {
 	struct pci_dev *bridge = bus->self;
@@ -273,9 +296,6 @@ static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type)
 {
 	struct pci_dev *bridge = bus->self;
 
-	if (pci_is_enabled(bridge))
-		return;
-
 	dev_info(&bridge->dev, "PCI bridge to [bus %02x-%02x]\n",
 		 bus->secondary, bus->subordinate);
 
@@ -646,7 +666,8 @@ static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
 
 		switch (dev->class >> 8) {
 		case PCI_CLASS_BRIDGE_PCI:
-			pci_setup_bridge(b);
+			if (!pci_is_enabled(dev))
+				pci_setup_bridge(b);
 			break;
 
 		case PCI_CLASS_BRIDGE_CARDBUS:
@@ -667,6 +688,34 @@ void __ref pci_bus_assign_resources(const struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pci_bus_assign_resources);
 
+static void __ref __pci_bridge_assign_resources(const struct pci_dev *bridge,
+					 struct resource_list_x *fail_head)
+{
+	struct pci_bus *b;
+
+	pdev_assign_resources_sorted((struct pci_dev *)bridge, fail_head);
+
+	b = bridge->subordinate;
+	if (!b)
+		return;
+
+	__pci_bus_assign_resources(b, fail_head);
+
+	switch (bridge->class >> 8) {
+	case PCI_CLASS_BRIDGE_PCI:
+		pci_setup_bridge(b);
+		break;
+
+	case PCI_CLASS_BRIDGE_CARDBUS:
+		pci_setup_cardbus(b);
+		break;
+
+	default:
+		dev_info(&bridge->dev, "not setting up bridge for bus "
+			 "%04x:%02x\n", pci_domain_nr(b), b->number);
+		break;
+	}
+}
 static void pci_bridge_release_resources(struct pci_bus *bus,
 					  unsigned long type)
 {
@@ -911,3 +960,16 @@ enable_and_dump:
 		pci_bus_dump_resources(bus);
 	}
 }
+
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
+{
+	struct pci_bus *parent = bridge->subordinate;
+	int retval;
+
+	pci_bus_size_bridges(parent);
+	__pci_bridge_assign_resources(bridge, NULL);
+	retval = pci_reenable_device(bridge);
+	pci_set_master(bridge);
+	pci_enable_bridges(parent);
+}
+EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8b79403cc072..f0c056780a1f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -801,6 +801,7 @@ void pci_bus_assign_resources(const struct pci_bus *bus);
 void pci_bus_size_bridges(struct pci_bus *bus);
 int pci_claim_resource(struct pci_dev *, int);
 void pci_assign_unassigned_resources(void);
+void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
 void pdev_enable_device(struct pci_dev *);
 void pdev_sort_resources(struct pci_dev *, struct resource_list *);
 int pci_enable_resources(struct pci_dev *, int mask);
-- 
cgit v1.2.3


From 6d3be84aab461815978d970aa45f5bc9e52dd772 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Tue, 9 Feb 2010 12:21:27 +0900
Subject: PCI: mark is_pcie obsolete

The "is_pcie" field in struct pci_dev is no longer needed because
struct pci_dev has PCIe capability offset in "pcie_cap" field and
(pcie_cap != 0) means the device is PCIe capable. This patch marks
"is_pcie" fields obsolete.

Current users of "is_pcie" field are:

- drivers/ssb/scan.c
- drivers/net/wireless/ath/ath9k/pci.c
- drivers/net/wireless/ath/ath5k/attach.c
- drivers/net/wireless/ath/ath5k/reset.c
- drivers/acpi/hest.c
- drivers/pci/pcie/pme/pcie_pme.c

Will post patches for each to use pci_is_pcie() as a follow-up.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index f0c056780a1f..578a0770961a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -302,7 +302,8 @@ struct pci_dev {
 	unsigned int	msix_enabled:1;
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
-	unsigned int	is_pcie:1;
+	unsigned int	is_pcie:1;	/* Obsolete. Will be removed.
+					   Use pci_is_pcie() instead */
 	unsigned int    needs_freset:1; /* Dev requires fundamental reset */
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
-- 
cgit v1.2.3


From bf825f81b454fae2ffe1b675f3a549656726440e Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Mon, 22 Feb 2010 11:32:54 +0000
Subject: xfrm: introduce basic mark infrastructure

Add basic structuring and accessors for xfrm mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 12 +++++++++---
 include/net/xfrm.h   | 22 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 29e04beb1fc9..b971e3848493 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -267,8 +267,8 @@ enum xfrm_attr_type_t {
 	XFRMA_ALG_COMP,		/* struct xfrm_algo */
 	XFRMA_ENCAP,		/* struct xfrm_algo + struct xfrm_encap_tmpl */
 	XFRMA_TMPL,		/* 1 or more struct xfrm_user_tmpl */
-	XFRMA_SA,
-	XFRMA_POLICY,
+	XFRMA_SA,		/* struct xfrm_usersa_info  */
+	XFRMA_POLICY,		/*struct xfrm_userpolicy_info */
 	XFRMA_SEC_CTX,		/* struct xfrm_sec_ctx */
 	XFRMA_LTIME_VAL,
 	XFRMA_REPLAY_VAL,
@@ -276,17 +276,23 @@ enum xfrm_attr_type_t {
 	XFRMA_ETIMER_THRESH,
 	XFRMA_SRCADDR,		/* xfrm_address_t */
 	XFRMA_COADDR,		/* xfrm_address_t */
-	XFRMA_LASTUSED,
+	XFRMA_LASTUSED,		/* unsigned long  */
 	XFRMA_POLICY_TYPE,	/* struct xfrm_userpolicy_type */
 	XFRMA_MIGRATE,
 	XFRMA_ALG_AEAD,		/* struct xfrm_algo_aead */
 	XFRMA_KMADDRESS,        /* struct xfrm_user_kmaddress */
 	XFRMA_ALG_AUTH_TRUNC,	/* struct xfrm_algo_auth */
+	XFRMA_MARK,		/* struct xfrm_mark */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
 };
 
+struct xfrm_mark {
+	__u32           v; /* value */
+	__u32           m; /* mask */
+};
+
 enum xfrm_sadattr_type_t {
 	XFRMA_SAD_UNSPEC,
 	XFRMA_SAD_CNT,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0beb413c01c4..39f151c7f251 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -140,6 +140,7 @@ struct xfrm_state {
 
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
+	struct xfrm_mark	mark;
 
 	u32			genid;
 
@@ -481,6 +482,7 @@ struct xfrm_policy {
 
 	u32			priority;
 	u32			index;
+	struct xfrm_mark	mark;
 	struct xfrm_selector	selector;
 	struct xfrm_lifetime_cfg lft;
 	struct xfrm_lifetime_cur curlft;
@@ -1570,4 +1572,24 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
 }
 #endif
 
+static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m)
+{
+	if (attrs[XFRMA_MARK])
+		memcpy(m, nla_data(attrs[XFRMA_MARK]), sizeof(m));
+	else
+		m->v = m->m = 0;
+
+	return m->v & m->m;
+}
+
+static inline int xfrm_mark_put(struct sk_buff *skb, struct xfrm_mark *m)
+{
+	if (m->m | m->v)
+		NLA_PUT(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 #endif	/* _NET_XFRM_H */
-- 
cgit v1.2.3


From c7f486567c1d0acd2e4166c47069835b9f75e77b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 17 Feb 2010 23:39:08 +0100
Subject: PCI PM: PCIe PME root port service driver

PCIe native PME detection mechanism is based on interrupts generated
by root ports or event collectors every time a PCIe device sends a
PME message upstream.

Once a PME message has been sent by an endpoint device and received
by its root port (or event collector in the case of root complex
integrated endpoints), the Requester ID from the message header is
registered in the root port's Root Status register.  At the same
time, the PME Status bit of the Root Status register is set to
indicate that there's a PME to handle.  If PCIe PME interrupt is
enabled for the root port, it generates an interrupt once the PME
Status has been set.  After receiving the interrupt, the kernel can
identify the PCIe device that generated the PME using the Requester
ID from the root port's Root Status register. [For details, see PCI
Express Base Specification, Rev. 2.0.]

Implement a driver for the PCIe PME root port service working in
accordance with the above description.

Based on a patch from Shaohua Li <shaohua.li@intel.com>.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt  |   6 +
 drivers/pci/pcie/Kconfig             |   4 +
 drivers/pci/pcie/Makefile            |   2 +
 drivers/pci/pcie/pme/Makefile        |   8 +
 drivers/pci/pcie/pme/pcie_pme.c      | 493 +++++++++++++++++++++++++++++++++++
 drivers/pci/pcie/pme/pcie_pme.h      |  28 ++
 drivers/pci/pcie/pme/pcie_pme_acpi.c |  54 ++++
 include/linux/pci.h                  |   1 +
 8 files changed, 596 insertions(+)
 create mode 100644 drivers/pci/pcie/pme/Makefile
 create mode 100644 drivers/pci/pcie/pme/pcie_pme.c
 create mode 100644 drivers/pci/pcie/pme/pcie_pme.h
 create mode 100644 drivers/pci/pcie/pme/pcie_pme_acpi.c

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e7848a0d99eb..747a2c8f62f3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1998,6 +1998,12 @@ and is between 256 and 4096 characters. It is defined in the file
 		force	Enable ASPM even on devices that claim not to support it.
 			WARNING: Forcing ASPM on may cause system lockups.
 
+	pcie_pme=	[PCIE,PM] Native PCIe PME signaling options:
+		off	Do not use native PCIe PME signaling.
+		force	Use native PCIe PME signaling even if the BIOS refuses
+			to allow the kernel to control the relevant PCIe config
+			registers.
+
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 
 	pd.		[PARIDE]
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 5a0c6ad53f8e..b8b494b3e0d0 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -46,3 +46,7 @@ config PCIEASPM_DEBUG
 	help
 	  This enables PCI Express ASPM debug support. It will add per-device
 	  interface to control ASPM.
+
+config PCIE_PME
+	def_bool y
+	depends on PCIEPORTBUS && PM_RUNTIME && EXPERIMENTAL && ACPI
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 11f6bb1eae24..ea654545e7c4 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -11,3 +11,5 @@ obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
 
 # Build PCI Express AER if needed
 obj-$(CONFIG_PCIEAER)		+= aer/
+
+obj-$(CONFIG_PCIE_PME) += pme/
diff --git a/drivers/pci/pcie/pme/Makefile b/drivers/pci/pcie/pme/Makefile
new file mode 100644
index 000000000000..8b9238053080
--- /dev/null
+++ b/drivers/pci/pcie/pme/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for PCI-Express Root Port PME signaling driver
+#
+
+obj-$(CONFIG_PCIE_PME) += pmedriver.o
+
+pmedriver-objs := pcie_pme.o
+pmedriver-$(CONFIG_ACPI) += pcie_pme_acpi.o
diff --git a/drivers/pci/pcie/pme/pcie_pme.c b/drivers/pci/pcie/pme/pcie_pme.c
new file mode 100644
index 000000000000..b5f96fb3cd83
--- /dev/null
+++ b/drivers/pci/pcie/pme/pcie_pme.c
@@ -0,0 +1,493 @@
+/*
+ * PCIe Native PME support
+ *
+ * Copyright (C) 2007 - 2009 Intel Corp
+ * Copyright (C) 2007 - 2009 Shaohua Li <shaohua.li@intel.com>
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License V2.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/pcieport_if.h>
+#include <linux/acpi.h>
+#include <linux/pci-acpi.h>
+#include <linux/pm_runtime.h>
+
+#include "../../pci.h"
+#include "pcie_pme.h"
+
+#define PCI_EXP_RTSTA_PME	0x10000 /* PME status */
+#define PCI_EXP_RTSTA_PENDING	0x20000 /* PME pending */
+
+/*
+ * If set, this switch will prevent the PCIe root port PME service driver from
+ * being registered.  Consequently, the interrupt-based PCIe PME signaling will
+ * not be used by any PCIe root ports in that case.
+ */
+static bool pcie_pme_disabled;
+
+/*
+ * The PCI Express Base Specification 2.0, Section 6.1.8, states the following:
+ * "In order to maintain compatibility with non-PCI Express-aware system
+ * software, system power management logic must be configured by firmware to use
+ * the legacy mechanism of signaling PME by default.  PCI Express-aware system
+ * software must notify the firmware prior to enabling native, interrupt-based
+ * PME signaling."  However, if the platform doesn't provide us with a suitable
+ * notification mechanism or the notification fails, it is not clear whether or
+ * not we are supposed to use the interrupt-based PCIe PME signaling.  The
+ * switch below can be used to indicate the desired behaviour.  When set, it
+ * will make the kernel use the interrupt-based PCIe PME signaling regardless of
+ * the platform notification status, although the kernel will attempt to notify
+ * the platform anyway.  When unset, it will prevent the kernel from using the
+ * the interrupt-based PCIe PME signaling if the platform notification fails,
+ * which is the default.
+ */
+static bool pcie_pme_force_enable;
+
+static int __init pcie_pme_setup(char *str)
+{
+	if (!strcmp(str, "off"))
+		pcie_pme_disabled = true;
+	else if (!strcmp(str, "force"))
+		pcie_pme_force_enable = true;
+	return 1;
+}
+__setup("pcie_pme=", pcie_pme_setup);
+
+/**
+ * pcie_pme_platform_setup - Ensure that the kernel controls the PCIe PME.
+ * @srv: PCIe PME root port service to use for carrying out the check.
+ *
+ * Notify the platform that the native PCIe PME is going to be used and return
+ * 'true' if the control of the PCIe PME registers has been acquired from the
+ * platform.
+ */
+static bool pcie_pme_platform_setup(struct pcie_device *srv)
+{
+	return !pcie_pme_platform_notify(srv) || pcie_pme_force_enable;
+}
+
+struct pcie_pme_service_data {
+	spinlock_t lock;
+	struct pcie_device *srv;
+	struct work_struct work;
+	bool noirq; /* Don't enable the PME interrupt used by this service. */
+};
+
+/**
+ * pcie_pme_interrupt_enable - Enable/disable PCIe PME interrupt generation.
+ * @dev: PCIe root port or event collector.
+ * @enable: Enable or disable the interrupt.
+ */
+static void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable)
+{
+	int rtctl_pos;
+	u16 rtctl;
+
+	rtctl_pos = pci_find_capability(dev, PCI_CAP_ID_EXP) + PCI_EXP_RTCTL;
+
+	pci_read_config_word(dev, rtctl_pos, &rtctl);
+	if (enable)
+		rtctl |= PCI_EXP_RTCTL_PMEIE;
+	else
+		rtctl &= ~PCI_EXP_RTCTL_PMEIE;
+	pci_write_config_word(dev, rtctl_pos, rtctl);
+}
+
+/**
+ * pcie_pme_clear_status - Clear root port PME interrupt status.
+ * @dev: PCIe root port or event collector.
+ */
+static void pcie_pme_clear_status(struct pci_dev *dev)
+{
+	int rtsta_pos;
+	u32 rtsta;
+
+	rtsta_pos = pci_find_capability(dev, PCI_CAP_ID_EXP) + PCI_EXP_RTSTA;
+
+	pci_read_config_dword(dev, rtsta_pos, &rtsta);
+	rtsta |= PCI_EXP_RTSTA_PME;
+	pci_write_config_dword(dev, rtsta_pos, rtsta);
+}
+
+/**
+ * pcie_pme_walk_bus - Scan a PCI bus for devices asserting PME#.
+ * @bus: PCI bus to scan.
+ *
+ * Scan given PCI bus and all buses under it for devices asserting PME#.
+ */
+static bool pcie_pme_walk_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	bool ret = false;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		/* Skip PCIe devices in case we started from a root port. */
+		if (!dev->is_pcie && pci_check_pme_status(dev)) {
+			pm_request_resume(&dev->dev);
+			ret = true;
+		}
+
+		if (dev->subordinate && pcie_pme_walk_bus(dev->subordinate))
+			ret = true;
+	}
+
+	return ret;
+}
+
+/**
+ * pcie_pme_from_pci_bridge - Check if PCIe-PCI bridge generated a PME.
+ * @bus: Secondary bus of the bridge.
+ * @devfn: Device/function number to check.
+ *
+ * PME from PCI devices under a PCIe-PCI bridge may be converted to an in-band
+ * PCIe PME message.  In such that case the bridge should use the Requester ID
+ * of device/function number 0 on its secondary bus.
+ */
+static bool pcie_pme_from_pci_bridge(struct pci_bus *bus, u8 devfn)
+{
+	struct pci_dev *dev;
+	bool found = false;
+
+	if (devfn)
+		return false;
+
+	dev = pci_dev_get(bus->self);
+	if (!dev)
+		return false;
+
+	if (dev->is_pcie && dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) {
+		down_read(&pci_bus_sem);
+		if (pcie_pme_walk_bus(bus))
+			found = true;
+		up_read(&pci_bus_sem);
+	}
+
+	pci_dev_put(dev);
+	return found;
+}
+
+/**
+ * pcie_pme_handle_request - Find device that generated PME and handle it.
+ * @port: Root port or event collector that generated the PME interrupt.
+ * @req_id: PCIe Requester ID of the device that generated the PME.
+ */
+static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id)
+{
+	u8 busnr = req_id >> 8, devfn = req_id & 0xff;
+	struct pci_bus *bus;
+	struct pci_dev *dev;
+	bool found = false;
+
+	/* First, check if the PME is from the root port itself. */
+	if (port->devfn == devfn && port->bus->number == busnr) {
+		if (pci_check_pme_status(port)) {
+			pm_request_resume(&port->dev);
+			found = true;
+		} else {
+			/*
+			 * Apparently, the root port generated the PME on behalf
+			 * of a non-PCIe device downstream.  If this is done by
+			 * a root port, the Requester ID field in its status
+			 * register may contain either the root port's, or the
+			 * source device's information (PCI Express Base
+			 * Specification, Rev. 2.0, Section 6.1.9).
+			 */
+			down_read(&pci_bus_sem);
+			found = pcie_pme_walk_bus(port->subordinate);
+			up_read(&pci_bus_sem);
+		}
+		goto out;
+	}
+
+	/* Second, find the bus the source device is on. */
+	bus = pci_find_bus(pci_domain_nr(port->bus), busnr);
+	if (!bus)
+		goto out;
+
+	/* Next, check if the PME is from a PCIe-PCI bridge. */
+	found = pcie_pme_from_pci_bridge(bus, devfn);
+	if (found)
+		goto out;
+
+	/* Finally, try to find the PME source on the bus. */
+	down_read(&pci_bus_sem);
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		pci_dev_get(dev);
+		if (dev->devfn == devfn) {
+			found = true;
+			break;
+		}
+		pci_dev_put(dev);
+	}
+	up_read(&pci_bus_sem);
+
+	if (found) {
+		/* The device is there, but we have to check its PME status. */
+		found = pci_check_pme_status(dev);
+		if (found)
+			pm_request_resume(&dev->dev);
+		pci_dev_put(dev);
+	} else if (devfn) {
+		/*
+		 * The device is not there, but we can still try to recover by
+		 * assuming that the PME was reported by a PCIe-PCI bridge that
+		 * used devfn different from zero.
+		 */
+		dev_dbg(&port->dev, "PME interrupt generated for "
+			"non-existent device %02x:%02x.%d\n",
+			busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
+		found = pcie_pme_from_pci_bridge(bus, 0);
+	}
+
+ out:
+	if (!found)
+		dev_dbg(&port->dev, "Spurious native PME interrupt!\n");
+}
+
+/**
+ * pcie_pme_work_fn - Work handler for PCIe PME interrupt.
+ * @work: Work structure giving access to service data.
+ */
+static void pcie_pme_work_fn(struct work_struct *work)
+{
+	struct pcie_pme_service_data *data =
+			container_of(work, struct pcie_pme_service_data, work);
+	struct pci_dev *port = data->srv->port;
+	int rtsta_pos;
+	u32 rtsta;
+
+	rtsta_pos = pci_find_capability(port, PCI_CAP_ID_EXP) + PCI_EXP_RTSTA;
+
+	spin_lock_irq(&data->lock);
+
+	for (;;) {
+		if (data->noirq)
+			break;
+
+		pci_read_config_dword(port, rtsta_pos, &rtsta);
+		if (rtsta & PCI_EXP_RTSTA_PME) {
+			/*
+			 * Clear PME status of the port.  If there are other
+			 * pending PMEs, the status will be set again.
+			 */
+			pcie_pme_clear_status(port);
+
+			spin_unlock_irq(&data->lock);
+			pcie_pme_handle_request(port, rtsta & 0xffff);
+			spin_lock_irq(&data->lock);
+
+			continue;
+		}
+
+		/* No need to loop if there are no more PMEs pending. */
+		if (!(rtsta & PCI_EXP_RTSTA_PENDING))
+			break;
+
+		spin_unlock_irq(&data->lock);
+		cpu_relax();
+		spin_lock_irq(&data->lock);
+	}
+
+	if (!data->noirq)
+		pcie_pme_interrupt_enable(port, true);
+
+	spin_unlock_irq(&data->lock);
+}
+
+/**
+ * pcie_pme_irq - Interrupt handler for PCIe root port PME interrupt.
+ * @irq: Interrupt vector.
+ * @context: Interrupt context pointer.
+ */
+static irqreturn_t pcie_pme_irq(int irq, void *context)
+{
+	struct pci_dev *port;
+	struct pcie_pme_service_data *data;
+	int rtsta_pos;
+	u32 rtsta;
+	unsigned long flags;
+
+	port = ((struct pcie_device *)context)->port;
+	data = get_service_data((struct pcie_device *)context);
+
+	rtsta_pos = pci_find_capability(port, PCI_CAP_ID_EXP) + PCI_EXP_RTSTA;
+
+	spin_lock_irqsave(&data->lock, flags);
+	pci_read_config_dword(port, rtsta_pos, &rtsta);
+
+	if (!(rtsta & PCI_EXP_RTSTA_PME)) {
+		spin_unlock_irqrestore(&data->lock, flags);
+		return IRQ_NONE;
+	}
+
+	pcie_pme_interrupt_enable(port, false);
+	spin_unlock_irqrestore(&data->lock, flags);
+
+	/* We don't use pm_wq, because it's freezable. */
+	schedule_work(&data->work);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * pcie_pme_set_native - Set the PME interrupt flag for given device.
+ * @dev: PCI device to handle.
+ * @ign: Ignored.
+ */
+static int pcie_pme_set_native(struct pci_dev *dev, void *ign)
+{
+	dev_info(&dev->dev, "Signaling PME through PCIe PME interrupt\n");
+
+	device_set_run_wake(&dev->dev, true);
+	dev->pme_interrupt = true;
+	return 0;
+}
+
+/**
+ * pcie_pme_mark_devices - Set the PME interrupt flag for devices below a port.
+ * @port: PCIe root port or event collector to handle.
+ *
+ * For each device below given root port, including the port itself (or for each
+ * root complex integrated endpoint if @port is a root complex event collector)
+ * set the flag indicating that it can signal run-time wake-up events via PCIe
+ * PME interrupts.
+ */
+static void pcie_pme_mark_devices(struct pci_dev *port)
+{
+	pcie_pme_set_native(port, NULL);
+	if (port->subordinate) {
+		pci_walk_bus(port->subordinate, pcie_pme_set_native, NULL);
+	} else {
+		struct pci_bus *bus = port->bus;
+		struct pci_dev *dev;
+
+		/* Check if this is a root port event collector. */
+		if (port->pcie_type != PCI_EXP_TYPE_RC_EC || !bus)
+			return;
+
+		down_read(&pci_bus_sem);
+		list_for_each_entry(dev, &bus->devices, bus_list)
+			if (dev->is_pcie
+			    && dev->pcie_type == PCI_EXP_TYPE_RC_END)
+				pcie_pme_set_native(dev, NULL);
+		up_read(&pci_bus_sem);
+	}
+}
+
+/**
+ * pcie_pme_probe - Initialize PCIe PME service for given root port.
+ * @srv: PCIe service to initialize.
+ */
+static int pcie_pme_probe(struct pcie_device *srv)
+{
+	struct pci_dev *port;
+	struct pcie_pme_service_data *data;
+	int ret;
+
+	if (!pcie_pme_platform_setup(srv))
+		return -EACCES;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	spin_lock_init(&data->lock);
+	INIT_WORK(&data->work, pcie_pme_work_fn);
+	data->srv = srv;
+	set_service_data(srv, data);
+
+	port = srv->port;
+	pcie_pme_interrupt_enable(port, false);
+	pcie_pme_clear_status(port);
+
+	ret = request_irq(srv->irq, pcie_pme_irq, IRQF_SHARED, "PCIe PME", srv);
+	if (ret) {
+		kfree(data);
+	} else {
+		pcie_pme_mark_devices(port);
+		pcie_pme_interrupt_enable(port, true);
+	}
+
+	return ret;
+}
+
+/**
+ * pcie_pme_suspend - Suspend PCIe PME service device.
+ * @srv: PCIe service device to suspend.
+ */
+static int pcie_pme_suspend(struct pcie_device *srv)
+{
+	struct pcie_pme_service_data *data = get_service_data(srv);
+	struct pci_dev *port = srv->port;
+
+	spin_lock_irq(&data->lock);
+	pcie_pme_interrupt_enable(port, false);
+	pcie_pme_clear_status(port);
+	data->noirq = true;
+	spin_unlock_irq(&data->lock);
+
+	synchronize_irq(srv->irq);
+
+	return 0;
+}
+
+/**
+ * pcie_pme_resume - Resume PCIe PME service device.
+ * @srv - PCIe service device to resume.
+ */
+static int pcie_pme_resume(struct pcie_device *srv)
+{
+	struct pcie_pme_service_data *data = get_service_data(srv);
+	struct pci_dev *port = srv->port;
+
+	spin_lock_irq(&data->lock);
+	data->noirq = false;
+	pcie_pme_clear_status(port);
+	pcie_pme_interrupt_enable(port, true);
+	spin_unlock_irq(&data->lock);
+
+	return 0;
+}
+
+/**
+ * pcie_pme_remove - Prepare PCIe PME service device for removal.
+ * @srv - PCIe service device to resume.
+ */
+static void pcie_pme_remove(struct pcie_device *srv)
+{
+	pcie_pme_suspend(srv);
+	free_irq(srv->irq, srv);
+	kfree(get_service_data(srv));
+}
+
+static struct pcie_port_service_driver pcie_pme_driver = {
+	.name		= "pcie_pme",
+	.port_type 	= PCI_EXP_TYPE_ROOT_PORT,
+	.service 	= PCIE_PORT_SERVICE_PME,
+
+	.probe		= pcie_pme_probe,
+	.suspend	= pcie_pme_suspend,
+	.resume		= pcie_pme_resume,
+	.remove		= pcie_pme_remove,
+};
+
+/**
+ * pcie_pme_service_init - Register the PCIe PME service driver.
+ */
+static int __init pcie_pme_service_init(void)
+{
+	return pcie_pme_disabled ?
+		-ENODEV : pcie_port_service_register(&pcie_pme_driver);
+}
+
+module_init(pcie_pme_service_init);
diff --git a/drivers/pci/pcie/pme/pcie_pme.h b/drivers/pci/pcie/pme/pcie_pme.h
new file mode 100644
index 000000000000..b30d2b7c9775
--- /dev/null
+++ b/drivers/pci/pcie/pme/pcie_pme.h
@@ -0,0 +1,28 @@
+/*
+ * drivers/pci/pcie/pme/pcie_pme.h
+ *
+ * PCI Express Root Port PME signaling support
+ *
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ */
+
+#ifndef _PCIE_PME_H_
+#define _PCIE_PME_H_
+
+struct pcie_device;
+
+#ifdef CONFIG_ACPI
+extern int pcie_pme_acpi_setup(struct pcie_device *srv);
+
+static inline int pcie_pme_platform_notify(struct pcie_device *srv)
+{
+	return pcie_pme_acpi_setup(srv);
+}
+#else /* !CONFIG_ACPI */
+static inline int pcie_pme_platform_notify(struct pcie_device *srv)
+{
+	return 0;
+}
+#endif /* !CONFIG_ACPI */
+
+#endif
diff --git a/drivers/pci/pcie/pme/pcie_pme_acpi.c b/drivers/pci/pcie/pme/pcie_pme_acpi.c
new file mode 100644
index 000000000000..83ab2287ae3f
--- /dev/null
+++ b/drivers/pci/pcie/pme/pcie_pme_acpi.c
@@ -0,0 +1,54 @@
+/*
+ * PCIe Native PME support, ACPI-related part
+ *
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License V2.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/acpi.h>
+#include <linux/pci-acpi.h>
+#include <linux/pcieport_if.h>
+
+/**
+ * pcie_pme_acpi_setup - Request the ACPI BIOS to release control over PCIe PME.
+ * @srv - PCIe PME service for a root port or event collector.
+ *
+ * Invoked when the PCIe bus type loads PCIe PME service driver.  To avoid
+ * conflict with the BIOS PCIe support requires the BIOS to yield PCIe PME
+ * control to the kernel.
+ */
+int pcie_pme_acpi_setup(struct pcie_device *srv)
+{
+	acpi_status status = AE_NOT_FOUND;
+	struct pci_dev *port = srv->port;
+	acpi_handle handle;
+	int error = 0;
+
+	if (acpi_pci_disabled)
+		return -ENOSYS;
+
+	dev_info(&port->dev, "Requesting control of PCIe PME from ACPI BIOS\n");
+
+	handle = acpi_find_root_bridge_handle(port);
+	if (!handle)
+		return -EINVAL;
+
+	status = acpi_pci_osc_control_set(handle,
+			OSC_PCI_EXPRESS_PME_CONTROL |
+			OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
+	if (ACPI_FAILURE(status)) {
+		dev_info(&port->dev,
+			"Failed to receive control of PCIe PME service: %s\n",
+			(status == AE_SUPPORT || status == AE_NOT_FOUND) ?
+			"no _OSC support" : "ACPI _OSC failed");
+		error = -ENODEV;
+	}
+
+	return error;
+}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 578a0770961a..9fe4b2089b78 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -266,6 +266,7 @@ struct pci_dev {
 					   configuration space */
 	unsigned int	pme_support:5;	/* Bitmask of states from which PME#
 					   can be generated */
+	unsigned int	pme_interrupt:1;
 	unsigned int	d1_support:1;	/* Low power state D1 is supported */
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
-- 
cgit v1.2.3


From b67ea76172d4b1922c4b3c46c8ea8e9fec1ff38c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 17 Feb 2010 23:44:09 +0100
Subject: PCI / ACPI / PM: Platform support for PCI PME wake-up

Although the majority of PCI devices can generate PMEs that in
principle may be used to wake up devices suspended at run time,
platform support is generally necessary to convert PMEs into wake-up
events that can be delivered to the kernel.  If ACPI is used for this
purpose, PME signals generated by a PCI device will trigger the ACPI
GPE associated with the device to generate an ACPI wake-up event that
we can set up a handler for, provided that everything is configured
correctly.

Unfortunately, the subset of PCI devices that have GPEs associated
with them is quite limited.  The devices without dedicated GPEs have
to rely on the GPEs associated with other devices (in the majority of
cases their upstream bridges and, possibly, the root bridge) to
generate ACPI wake-up events in response to PME signals from them.

Add ACPI platform support for PCI PME wake-up:
o Add a framework making is possible to use ACPI system notify
  handlers for run-time PM.
o Add new PCI platform callback ->run_wake() to struct
  pci_platform_pm_ops allowing us to enable/disable the platform to
  generate wake-up events for given device.  Implemet this callback
  for the ACPI platform.
o Define ACPI wake-up handlers for PCI devices and PCI root buses and
  make the PCI-ACPI binding code register wake-up notifiers for all
  PCI devices present in the ACPI tables.
o Add function pci_dev_run_wake() which can be used by PCI drivers to
  check if given device is capable of generating wake-up events at
  run time.

Developed in cooperation with Matthew Garrett <mjg@redhat.com>.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/internal.h  |   2 -
 drivers/acpi/pci_bind.c  |  14 +++-
 drivers/acpi/pci_root.c  |   8 ++
 drivers/acpi/scan.c      |   1 +
 drivers/pci/pci-acpi.c   | 211 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.c        |  67 +++++++++++++++
 drivers/pci/pci.h        |   7 ++
 include/acpi/acpi_bus.h  |   4 +
 include/linux/pci-acpi.h |   7 ++
 include/linux/pci.h      |   1 +
 10 files changed, 319 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index cb28e0502acc..9c4c962e46e3 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -36,8 +36,6 @@ static inline int acpi_debug_init(void) { return 0; }
 int acpi_power_init(void);
 int acpi_device_sleep_wake(struct acpi_device *dev,
                            int enable, int sleep_state, int dev_state);
-int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state);
-int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 int acpi_power_get_inferred_state(struct acpi_device *device);
 int acpi_power_transition(struct acpi_device *device, int state);
 extern int acpi_power_nocheck;
diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index a5a77b78a723..2ef04098cc1d 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -26,7 +26,9 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/pci.h>
+#include <linux/pci-acpi.h>
 #include <linux/acpi.h>
+#include <linux/pm_runtime.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
@@ -38,7 +40,13 @@ static int acpi_pci_unbind(struct acpi_device *device)
 	struct pci_dev *dev;
 
 	dev = acpi_get_pci_dev(device->handle);
-	if (!dev || !dev->subordinate)
+	if (!dev)
+		goto out;
+
+	device_set_run_wake(&dev->dev, false);
+	pci_acpi_remove_pm_notifier(device);
+
+	if (!dev->subordinate)
 		goto out;
 
 	acpi_pci_irq_del_prt(dev->subordinate);
@@ -62,6 +70,10 @@ static int acpi_pci_bind(struct acpi_device *device)
 	if (!dev)
 		return 0;
 
+	pci_acpi_add_pm_notifier(device, dev);
+	if (device->wakeup.flags.run_wake)
+		device_set_run_wake(&dev->dev, true);
+
 	/*
 	 * Install the 'bind' function to facilitate callbacks for
 	 * children of the P2P bridge.
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 64f55b6db73c..9cd8bedb1e5a 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -30,6 +30,7 @@
 #include <linux/proc_fs.h>
 #include <linux/spinlock.h>
 #include <linux/pm.h>
+#include <linux/pm_runtime.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
 #include <linux/acpi.h>
@@ -528,6 +529,10 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	if (flags != base_flags)
 		acpi_pci_osc_support(root, flags);
 
+	pci_acpi_add_bus_pm_notifier(device, root->bus);
+	if (device->wakeup.flags.run_wake)
+		device_set_run_wake(root->bus->bridge, true);
+
 	return 0;
 
 end:
@@ -549,6 +554,9 @@ static int acpi_pci_root_remove(struct acpi_device *device, int type)
 {
 	struct acpi_pci_root *root = acpi_driver_data(device);
 
+	device_set_run_wake(root->bus->bridge, false);
+	pci_acpi_remove_bus_pm_notifier(device);
+
 	kfree(root);
 	return 0;
 }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 7491a52ad97a..fb7fc24fe727 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -753,6 +753,7 @@ static void acpi_bus_set_run_wake_flags(struct acpi_device *device)
 	acpi_event_status event_status;
 
 	device->wakeup.run_wake_count = 0;
+	device->wakeup.flags.notifier_present = 0;
 
 	/* Power button, Lid switch always enable wakeup */
 	if (!acpi_match_device_ids(device, button_device_ids)) {
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 7e2829538a4c..c0c73913833d 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -16,8 +16,144 @@
 #include <acpi/acpi_bus.h>
 
 #include <linux/pci-acpi.h>
+#include <linux/pm_runtime.h>
 #include "pci.h"
 
+static DEFINE_MUTEX(pci_acpi_pm_notify_mtx);
+
+/**
+ * pci_acpi_wake_bus - Wake-up notification handler for root buses.
+ * @handle: ACPI handle of a device the notification is for.
+ * @event: Type of the signaled event.
+ * @context: PCI root bus to wake up devices on.
+ */
+static void pci_acpi_wake_bus(acpi_handle handle, u32 event, void *context)
+{
+	struct pci_bus *pci_bus = context;
+
+	if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_bus)
+		pci_pme_wakeup_bus(pci_bus);
+}
+
+/**
+ * pci_acpi_wake_dev - Wake-up notification handler for PCI devices.
+ * @handle: ACPI handle of a device the notification is for.
+ * @event: Type of the signaled event.
+ * @context: PCI device object to wake up.
+ */
+static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
+{
+	struct pci_dev *pci_dev = context;
+
+	if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) {
+		pci_check_pme_status(pci_dev);
+		pm_runtime_resume(&pci_dev->dev);
+		if (pci_dev->subordinate)
+			pci_pme_wakeup_bus(pci_dev->subordinate);
+	}
+}
+
+/**
+ * add_pm_notifier - Register PM notifier for given ACPI device.
+ * @dev: ACPI device to add the notifier for.
+ * @context: PCI device or bus to check for PME status if an event is signaled.
+ *
+ * NOTE: @dev need not be a run-wake or wake-up device to be a valid source of
+ * PM wake-up events.  For example, wake-up events may be generated for bridges
+ * if one of the devices below the bridge is signaling PME, even if the bridge
+ * itself doesn't have a wake-up GPE associated with it.
+ */
+static acpi_status add_pm_notifier(struct acpi_device *dev,
+				   acpi_notify_handler handler,
+				   void *context)
+{
+	acpi_status status = AE_ALREADY_EXISTS;
+
+	mutex_lock(&pci_acpi_pm_notify_mtx);
+
+	if (dev->wakeup.flags.notifier_present)
+		goto out;
+
+	status = acpi_install_notify_handler(dev->handle,
+					     ACPI_SYSTEM_NOTIFY,
+					     handler, context);
+	if (ACPI_FAILURE(status))
+		goto out;
+
+	dev->wakeup.flags.notifier_present = true;
+
+ out:
+	mutex_unlock(&pci_acpi_pm_notify_mtx);
+	return status;
+}
+
+/**
+ * remove_pm_notifier - Unregister PM notifier from given ACPI device.
+ * @dev: ACPI device to remove the notifier from.
+ */
+static acpi_status remove_pm_notifier(struct acpi_device *dev,
+				      acpi_notify_handler handler)
+{
+	acpi_status status = AE_BAD_PARAMETER;
+
+	mutex_lock(&pci_acpi_pm_notify_mtx);
+
+	if (!dev->wakeup.flags.notifier_present)
+		goto out;
+
+	status = acpi_remove_notify_handler(dev->handle,
+					    ACPI_SYSTEM_NOTIFY,
+					    handler);
+	if (ACPI_FAILURE(status))
+		goto out;
+
+	dev->wakeup.flags.notifier_present = false;
+
+ out:
+	mutex_unlock(&pci_acpi_pm_notify_mtx);
+	return status;
+}
+
+/**
+ * pci_acpi_add_bus_pm_notifier - Register PM notifier for given PCI bus.
+ * @dev: ACPI device to add the notifier for.
+ * @pci_bus: PCI bus to walk checking for PME status if an event is signaled.
+ */
+acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev,
+					 struct pci_bus *pci_bus)
+{
+	return add_pm_notifier(dev, pci_acpi_wake_bus, pci_bus);
+}
+
+/**
+ * pci_acpi_remove_bus_pm_notifier - Unregister PCI bus PM notifier.
+ * @dev: ACPI device to remove the notifier from.
+ */
+acpi_status pci_acpi_remove_bus_pm_notifier(struct acpi_device *dev)
+{
+	return remove_pm_notifier(dev, pci_acpi_wake_bus);
+}
+
+/**
+ * pci_acpi_add_pm_notifier - Register PM notifier for given PCI device.
+ * @dev: ACPI device to add the notifier for.
+ * @pci_dev: PCI device to check for the PME status if an event is signaled.
+ */
+acpi_status pci_acpi_add_pm_notifier(struct acpi_device *dev,
+				     struct pci_dev *pci_dev)
+{
+	return add_pm_notifier(dev, pci_acpi_wake_dev, pci_dev);
+}
+
+/**
+ * pci_acpi_remove_pm_notifier - Unregister PCI device PM notifier.
+ * @dev: ACPI device to remove the notifier from.
+ */
+acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev)
+{
+	return remove_pm_notifier(dev, pci_acpi_wake_dev);
+}
+
 /*
  * _SxD returns the D-state with the highest power
  * (lowest D-state number) supported in the S-state "x".
@@ -131,12 +267,87 @@ static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable)
 	return 0;
 }
 
+/**
+ * acpi_dev_run_wake - Enable/disable wake-up for given device.
+ * @phys_dev: Device to enable/disable the platform to wake-up the system for.
+ * @enable: Whether enable or disable the wake-up functionality.
+ *
+ * Find the ACPI device object corresponding to @pci_dev and try to
+ * enable/disable the GPE associated with it.
+ */
+static int acpi_dev_run_wake(struct device *phys_dev, bool enable)
+{
+	struct acpi_device *dev;
+	acpi_handle handle;
+	int error = -ENODEV;
+
+	if (!device_run_wake(phys_dev))
+		return -EINVAL;
+
+	handle = DEVICE_ACPI_HANDLE(phys_dev);
+	if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &dev))) {
+		dev_dbg(phys_dev, "ACPI handle has no context in %s!\n",
+			__func__);
+		return -ENODEV;
+	}
+
+	if (enable) {
+		if (!dev->wakeup.run_wake_count++) {
+			acpi_enable_wakeup_device_power(dev, ACPI_STATE_S0);
+			acpi_enable_gpe(dev->wakeup.gpe_device,
+					dev->wakeup.gpe_number,
+					ACPI_GPE_TYPE_RUNTIME);
+		}
+	} else if (dev->wakeup.run_wake_count > 0) {
+		if (!--dev->wakeup.run_wake_count) {
+			acpi_disable_gpe(dev->wakeup.gpe_device,
+					 dev->wakeup.gpe_number,
+					 ACPI_GPE_TYPE_RUNTIME);
+			acpi_disable_wakeup_device_power(dev);
+		}
+	} else {
+		error = -EALREADY;
+	}
+
+	return error;
+}
+
+static void acpi_pci_propagate_run_wake(struct pci_bus *bus, bool enable)
+{
+	while (bus->parent) {
+		struct pci_dev *bridge = bus->self;
+
+		if (bridge->pme_interrupt)
+			return;
+		if (!acpi_dev_run_wake(&bridge->dev, enable))
+			return;
+		bus = bus->parent;
+	}
+
+	/* We have reached the root bus. */
+	if (bus->bridge)
+		acpi_dev_run_wake(bus->bridge, enable);
+}
+
+static int acpi_pci_run_wake(struct pci_dev *dev, bool enable)
+{
+	if (dev->pme_interrupt)
+		return 0;
+
+	if (!acpi_dev_run_wake(&dev->dev, enable))
+		return 0;
+
+	acpi_pci_propagate_run_wake(dev->bus, enable);
+	return 0;
+}
+
 static struct pci_platform_pm_ops acpi_pci_platform_pm = {
 	.is_manageable = acpi_pci_power_manageable,
 	.set_state = acpi_pci_set_power_state,
 	.choose_state = acpi_pci_choose_state,
 	.can_wakeup = acpi_pci_can_wakeup,
 	.sleep_wake = acpi_pci_sleep_wake,
+	.run_wake = acpi_pci_run_wake,
 };
 
 /* ACPI bus type */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 5723446544fd..df55a2f351b3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -20,6 +20,7 @@
 #include <linux/pm_wakeup.h>
 #include <linux/interrupt.h>
 #include <linux/device.h>
+#include <linux/pm_runtime.h>
 #include <asm/setup.h>
 #include "pci.h"
 
@@ -462,6 +463,12 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable)
 			pci_platform_pm->sleep_wake(dev, enable) : -ENODEV;
 }
 
+static inline int platform_pci_run_wake(struct pci_dev *dev, bool enable)
+{
+	return pci_platform_pm ?
+			pci_platform_pm->run_wake(dev, enable) : -ENODEV;
+}
+
 /**
  * pci_raw_set_power_state - Use PCI PM registers to set the power state of
  *                           given PCI device
@@ -1229,6 +1236,31 @@ bool pci_check_pme_status(struct pci_dev *dev)
 	return ret;
 }
 
+/**
+ * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set.
+ * @dev: Device to handle.
+ * @ign: Ignored.
+ *
+ * Check if @dev has generated PME and queue a resume request for it in that
+ * case.
+ */
+static int pci_pme_wakeup(struct pci_dev *dev, void *ign)
+{
+	if (pci_check_pme_status(dev))
+		pm_request_resume(&dev->dev);
+	return 0;
+}
+
+/**
+ * pci_pme_wakeup_bus - Walk given bus and wake up devices on it, if necessary.
+ * @bus: Top bus of the subtree to walk.
+ */
+void pci_pme_wakeup_bus(struct pci_bus *bus)
+{
+	if (bus)
+		pci_walk_bus(bus, pci_pme_wakeup, NULL);
+}
+
 /**
  * pci_pme_capable - check the capability of PCI device to generate PME#
  * @dev: PCI device to handle.
@@ -1433,6 +1465,41 @@ int pci_back_from_sleep(struct pci_dev *dev)
 	return pci_set_power_state(dev, PCI_D0);
 }
 
+/**
+ * pci_dev_run_wake - Check if device can generate run-time wake-up events.
+ * @dev: Device to check.
+ *
+ * Return true if the device itself is cabable of generating wake-up events
+ * (through the platform or using the native PCIe PME) or if the device supports
+ * PME and one of its upstream bridges can generate wake-up events.
+ */
+bool pci_dev_run_wake(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+
+	if (device_run_wake(&dev->dev))
+		return true;
+
+	if (!dev->pme_support)
+		return false;
+
+	while (bus->parent) {
+		struct pci_dev *bridge = bus->self;
+
+		if (device_run_wake(&bridge->dev))
+			return true;
+
+		bus = bus->parent;
+	}
+
+	/* We have reached the root bus. */
+	if (bus->bridge)
+		return device_run_wake(bus->bridge);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(pci_dev_run_wake);
+
 /**
  * pci_pm_init - Initialize PM functions of given PCI device
  * @dev: PCI device to handle.
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b95b0a077d31..286c50821949 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -35,6 +35,10 @@ int pci_probe_reset_function(struct pci_dev *dev);
  *
  * @sleep_wake: enables/disables the system wake up capability of given device
  *
+ * @run_wake: enables/disables the platform to generate run-time wake-up events
+ *		for given device (the device's wake-up capability has to be
+ *		enabled by @sleep_wake for this feature to work)
+ *
  * If given platform is generally capable of power managing PCI devices, all of
  * these callbacks are mandatory.
  */
@@ -44,12 +48,15 @@ struct pci_platform_pm_ops {
 	pci_power_t (*choose_state)(struct pci_dev *dev);
 	bool (*can_wakeup)(struct pci_dev *dev);
 	int (*sleep_wake)(struct pci_dev *dev, bool enable);
+	int (*run_wake)(struct pci_dev *dev, bool enable);
 };
 
 extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops);
 extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
 extern void pci_disable_enabled_device(struct pci_dev *dev);
 extern bool pci_check_pme_status(struct pci_dev *dev);
+extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
+extern void pci_pme_wakeup_bus(struct pci_bus *bus);
 extern void pci_pm_init(struct pci_dev *dev);
 extern void platform_pci_wakeup_init(struct pci_dev *dev);
 extern void pci_allocate_cap_save_buffers(struct pci_dev *dev);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 60fcff419352..54508ccea023 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -243,6 +243,7 @@ struct acpi_device_wakeup_flags {
 	u8 valid:1;		/* Can successfully enable wakeup? */
 	u8 run_wake:1;		/* Run-Wake GPE devices */
 	u8 always_enabled:1;	/* Run-wake devices that are always enabled */
+	u8 notifier_present:1;  /* Wake-up notify handler has been installed */
 };
 
 struct acpi_device_wakeup_state {
@@ -388,6 +389,9 @@ acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle))
 
+int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
+int acpi_disable_wakeup_device_power(struct acpi_device *dev);
+
 #ifdef CONFIG_PM_SLEEP
 int acpi_pm_device_sleep_state(struct device *, int *);
 int acpi_pm_device_sleep_wake(struct device *, bool);
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 93a7c08f869d..c8b6473c5f42 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -11,6 +11,13 @@
 #include <linux/acpi.h>
 
 #ifdef CONFIG_ACPI
+extern acpi_status pci_acpi_add_bus_pm_notifier(struct acpi_device *dev,
+						 struct pci_bus *pci_bus);
+extern acpi_status pci_acpi_remove_bus_pm_notifier(struct acpi_device *dev);
+extern acpi_status pci_acpi_add_pm_notifier(struct acpi_device *dev,
+					     struct pci_dev *pci_dev);
+extern acpi_status pci_acpi_remove_pm_notifier(struct acpi_device *dev);
+
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 {
 	struct pci_bus *pbus = pdev->bus;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9fe4b2089b78..3f787ce78bd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -782,6 +782,7 @@ int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
+bool pci_dev_run_wake(struct pci_dev *dev);
 
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
-- 
cgit v1.2.3


From 6cbf82148ff286ec22a55be6836c3a5bffc489c1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 17 Feb 2010 23:44:58 +0100
Subject: PCI PM: Run-time callbacks for PCI bus type

Introduce run-time PM callbacks for the PCI bus type.  Make the new
callbacks work in analogy with the existing system sleep PM
callbacks, so that the drivers already converted to struct dev_pm_ops
can use their suspend and resume routines for run-time PM without
modifications.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-driver.c | 160 +++++++++++++++++++++++++++++++++++++++--------
 drivers/pci/pci.c        |  43 +++++++++++--
 drivers/pci/pci.h        |   1 +
 include/linux/pci.h      |   9 ++-
 kernel/power/Kconfig     |   5 ++
 5 files changed, 184 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index e5d47be3c6d7..f9a0aec3abcf 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/cpu.h>
+#include <linux/pm_runtime.h>
 #include "pci.h"
 
 struct pci_dynid {
@@ -404,6 +405,35 @@ static void pci_device_shutdown(struct device *dev)
 	pci_msix_shutdown(pci_dev);
 }
 
+#ifdef CONFIG_PM_OPS
+
+/* Auxiliary functions used for system resume and run-time resume. */
+
+/**
+ * pci_restore_standard_config - restore standard config registers of PCI device
+ * @pci_dev: PCI device to handle
+ */
+static int pci_restore_standard_config(struct pci_dev *pci_dev)
+{
+	pci_update_current_state(pci_dev, PCI_UNKNOWN);
+
+	if (pci_dev->current_state != PCI_D0) {
+		int error = pci_set_power_state(pci_dev, PCI_D0);
+		if (error)
+			return error;
+	}
+
+	return pci_restore_state(pci_dev);
+}
+
+static void pci_pm_default_resume_early(struct pci_dev *pci_dev)
+{
+	pci_restore_standard_config(pci_dev);
+	pci_fixup_device(pci_fixup_resume_early, pci_dev);
+}
+
+#endif
+
 #ifdef CONFIG_PM_SLEEP
 
 /*
@@ -520,29 +550,6 @@ static int pci_legacy_resume(struct device *dev)
 
 /* Auxiliary functions used by the new power management framework */
 
-/**
- * pci_restore_standard_config - restore standard config registers of PCI device
- * @pci_dev: PCI device to handle
- */
-static int pci_restore_standard_config(struct pci_dev *pci_dev)
-{
-	pci_update_current_state(pci_dev, PCI_UNKNOWN);
-
-	if (pci_dev->current_state != PCI_D0) {
-		int error = pci_set_power_state(pci_dev, PCI_D0);
-		if (error)
-			return error;
-	}
-
-	return pci_restore_state(pci_dev);
-}
-
-static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev)
-{
-	pci_restore_standard_config(pci_dev);
-	pci_fixup_device(pci_fixup_resume_early, pci_dev);
-}
-
 static void pci_pm_default_resume(struct pci_dev *pci_dev)
 {
 	pci_fixup_device(pci_fixup_resume, pci_dev);
@@ -581,6 +588,17 @@ static int pci_pm_prepare(struct device *dev)
 	struct device_driver *drv = dev->driver;
 	int error = 0;
 
+	/*
+	 * PCI devices suspended at run time need to be resumed at this
+	 * point, because in general it is necessary to reconfigure them for
+	 * system suspend.  Namely, if the device is supposed to wake up the
+	 * system from the sleep state, we may need to reconfigure it for this
+	 * purpose.  In turn, if the device is not supposed to wake up the
+	 * system from the sleep state, we'll have to prevent it from signaling
+	 * wake-up.
+	 */
+	pm_runtime_resume(dev);
+
 	if (drv && drv->pm && drv->pm->prepare)
 		error = drv->pm->prepare(dev);
 
@@ -595,6 +613,13 @@ static void pci_pm_complete(struct device *dev)
 		drv->pm->complete(dev);
 }
 
+#else /* !CONFIG_PM_SLEEP */
+
+#define pci_pm_prepare	NULL
+#define pci_pm_complete	NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
 #ifdef CONFIG_SUSPEND
 
 static int pci_pm_suspend(struct device *dev)
@@ -681,7 +706,7 @@ static int pci_pm_resume_noirq(struct device *dev)
 	struct device_driver *drv = dev->driver;
 	int error = 0;
 
-	pci_pm_default_resume_noirq(pci_dev);
+	pci_pm_default_resume_early(pci_dev);
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_resume_early(dev);
@@ -879,7 +904,7 @@ static int pci_pm_restore_noirq(struct device *dev)
 	struct device_driver *drv = dev->driver;
 	int error = 0;
 
-	pci_pm_default_resume_noirq(pci_dev);
+	pci_pm_default_resume_early(pci_dev);
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_resume_early(dev);
@@ -931,6 +956,84 @@ static int pci_pm_restore(struct device *dev)
 
 #endif /* !CONFIG_HIBERNATION */
 
+#ifdef CONFIG_PM_RUNTIME
+
+static int pci_pm_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	pci_power_t prev = pci_dev->current_state;
+	int error;
+
+	if (!pm || !pm->runtime_suspend)
+		return -ENOSYS;
+
+	error = pm->runtime_suspend(dev);
+	suspend_report_result(pm->runtime_suspend, error);
+	if (error)
+		return error;
+
+	pci_fixup_device(pci_fixup_suspend, pci_dev);
+
+	if (!pci_dev->state_saved && pci_dev->current_state != PCI_D0
+	    && pci_dev->current_state != PCI_UNKNOWN) {
+		WARN_ONCE(pci_dev->current_state != prev,
+			"PCI PM: State of device not saved by %pF\n",
+			pm->runtime_suspend);
+		return 0;
+	}
+
+	if (!pci_dev->state_saved)
+		pci_save_state(pci_dev);
+
+	pci_finish_runtime_suspend(pci_dev);
+
+	return 0;
+}
+
+static int pci_pm_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (!pm || !pm->runtime_resume)
+		return -ENOSYS;
+
+	pci_pm_default_resume_early(pci_dev);
+	__pci_enable_wake(pci_dev, PCI_D0, true, false);
+	pci_fixup_device(pci_fixup_resume, pci_dev);
+
+	return pm->runtime_resume(dev);
+}
+
+static int pci_pm_runtime_idle(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (!pm)
+		return -ENOSYS;
+
+	if (pm->runtime_idle) {
+		int ret = pm->runtime_idle(dev);
+		if (ret)
+			return ret;
+	}
+
+	pm_runtime_suspend(dev);
+
+	return 0;
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+#define pci_pm_runtime_suspend	NULL
+#define pci_pm_runtime_resume	NULL
+#define pci_pm_runtime_idle	NULL
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+#ifdef CONFIG_PM_OPS
+
 const struct dev_pm_ops pci_dev_pm_ops = {
 	.prepare = pci_pm_prepare,
 	.complete = pci_pm_complete,
@@ -946,15 +1049,18 @@ const struct dev_pm_ops pci_dev_pm_ops = {
 	.thaw_noirq = pci_pm_thaw_noirq,
 	.poweroff_noirq = pci_pm_poweroff_noirq,
 	.restore_noirq = pci_pm_restore_noirq,
+	.runtime_suspend = pci_pm_runtime_suspend,
+	.runtime_resume = pci_pm_runtime_resume,
+	.runtime_idle = pci_pm_runtime_idle,
 };
 
 #define PCI_PM_OPS_PTR	(&pci_dev_pm_ops)
 
-#else /* !CONFIG_PM_SLEEP */
+#else /* !COMFIG_PM_OPS */
 
 #define PCI_PM_OPS_PTR	NULL
 
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* !COMFIG_PM_OPS */
 
 /**
  * __pci_register_driver - register a new pci driver
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index df55a2f351b3..d62a5de81672 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1302,9 +1302,10 @@ void pci_pme_active(struct pci_dev *dev, bool enable)
 }
 
 /**
- * pci_enable_wake - enable PCI device as wakeup event source
+ * __pci_enable_wake - enable PCI device as wakeup event source
  * @dev: PCI device affected
  * @state: PCI state from which device will issue wakeup events
+ * @runtime: True if the events are to be generated at run time
  * @enable: True to enable event generation; false to disable
  *
  * This enables the device as a wakeup event source, or disables it.
@@ -1320,11 +1321,12 @@ void pci_pme_active(struct pci_dev *dev, bool enable)
  * Error code depending on the platform is returned if both the platform and
  * the native mechanism fail to enable the generation of wake-up events
  */
-int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
+int __pci_enable_wake(struct pci_dev *dev, pci_power_t state,
+		      bool runtime, bool enable)
 {
 	int ret = 0;
 
-	if (enable && !device_may_wakeup(&dev->dev))
+	if (enable && !runtime && !device_may_wakeup(&dev->dev))
 		return -EINVAL;
 
 	/* Don't do the same thing twice in a row for one device. */
@@ -1344,19 +1346,24 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 			pci_pme_active(dev, true);
 		else
 			ret = 1;
-		error = platform_pci_sleep_wake(dev, true);
+		error = runtime ? platform_pci_run_wake(dev, true) :
+					platform_pci_sleep_wake(dev, true);
 		if (ret)
 			ret = error;
 		if (!ret)
 			dev->wakeup_prepared = true;
 	} else {
-		platform_pci_sleep_wake(dev, false);
+		if (runtime)
+			platform_pci_run_wake(dev, false);
+		else
+			platform_pci_sleep_wake(dev, false);
 		pci_pme_active(dev, false);
 		dev->wakeup_prepared = false;
 	}
 
 	return ret;
 }
+EXPORT_SYMBOL(__pci_enable_wake);
 
 /**
  * pci_wake_from_d3 - enable/disable device to wake up from D3_hot or D3_cold
@@ -1465,6 +1472,31 @@ int pci_back_from_sleep(struct pci_dev *dev)
 	return pci_set_power_state(dev, PCI_D0);
 }
 
+/**
+ * pci_finish_runtime_suspend - Carry out PCI-specific part of runtime suspend.
+ * @dev: PCI device being suspended.
+ *
+ * Prepare @dev to generate wake-up events at run time and put it into a low
+ * power state.
+ */
+int pci_finish_runtime_suspend(struct pci_dev *dev)
+{
+	pci_power_t target_state = pci_target_state(dev);
+	int error;
+
+	if (target_state == PCI_POWER_ERROR)
+		return -EIO;
+
+	__pci_enable_wake(dev, target_state, true, pci_dev_run_wake(dev));
+
+	error = pci_set_power_state(dev, target_state);
+
+	if (error)
+		__pci_enable_wake(dev, target_state, true, false);
+
+	return error;
+}
+
 /**
  * pci_dev_run_wake - Check if device can generate run-time wake-up events.
  * @dev: Device to check.
@@ -2978,7 +3010,6 @@ EXPORT_SYMBOL(pci_save_state);
 EXPORT_SYMBOL(pci_restore_state);
 EXPORT_SYMBOL(pci_pme_capable);
 EXPORT_SYMBOL(pci_pme_active);
-EXPORT_SYMBOL(pci_enable_wake);
 EXPORT_SYMBOL(pci_wake_from_d3);
 EXPORT_SYMBOL(pci_target_state);
 EXPORT_SYMBOL(pci_prepare_to_sleep);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 286c50821949..4eb10f48d270 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -55,6 +55,7 @@ extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops);
 extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state);
 extern void pci_disable_enabled_device(struct pci_dev *dev);
 extern bool pci_check_pme_status(struct pci_dev *dev);
+extern int pci_finish_runtime_suspend(struct pci_dev *dev);
 extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
 extern void pci_pme_wakeup_bus(struct pci_bus *bus);
 extern void pci_pm_init(struct pci_dev *dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3f787ce78bd1..1a29f9f6b2dc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -777,13 +777,20 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
 pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
 bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
 void pci_pme_active(struct pci_dev *dev, bool enable);
-int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable);
+int __pci_enable_wake(struct pci_dev *dev, pci_power_t state,
+		      bool runtime, bool enable);
 int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 pci_power_t pci_target_state(struct pci_dev *dev);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 bool pci_dev_run_wake(struct pci_dev *dev);
 
+static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state,
+				  bool enable)
+{
+	return __pci_enable_wake(dev, state, false, enable);
+}
+
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 91e09d3b2eb2..4c9cffcf69c7 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -222,3 +222,8 @@ config PM_RUNTIME
 	  and the bus type drivers of the buses the devices are on are
 	  responsible for the actual handling of the autosuspend requests and
 	  wake-up events.
+
+config PM_OPS
+	bool
+	depends on PM_SLEEP || PM_RUNTIME
+	default y
-- 
cgit v1.2.3


From d02f0cff1da4a1bd609e8d3eae3a2e85459cd8a1 Mon Sep 17 00:00:00 2001
From: dann frazier <dannf@hp.com>
Date: Tue, 23 Feb 2010 08:38:42 +0100
Subject: cciss: export linux/cciss_defs.h header

'make headers_check' began to fail after cciss_defs.h was introduced in:
   429c42c9d246f5bda868495c09974312a0177328

usr/include/linux/cciss_ioctl.h:6: included file 'linux/cciss_defs.h' is not exported

Fix this by exporting cciss_defs.h

Signed-off-by: dann frazier <dannf@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 756f831cbdd5..91be0d896322 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -43,6 +43,7 @@ header-y += blkpg.h
 header-y += bpqether.h
 header-y += bsg.h
 header-y += can.h
+header-y += cciss_defs.h
 header-y += cdk.h
 header-y += chio.h
 header-y += coda_psdev.h
-- 
cgit v1.2.3


From 28e1b773083d349d5223f586a39fa30f5d0f1c36 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 22 Feb 2010 23:49:09 +0100
Subject: ALSA: usbaudio: parse USB descriptors with structs

In preparation of support for v2.0 audio class, use the structs from
linux/usb/audio.h and add some new ones to describe the fields that are
actually parsed by the descriptor decoders.

Also, factor out code from usb_create_streams(). This makes it easier to
adopt the new iteration logic needed for v2.0.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio.h |  32 +++++++-
 sound/usb/usbaudio.c      | 198 ++++++++++++++++++++++++++++------------------
 sound/usb/usbmixer.c      |  37 +++++----
 3 files changed, 168 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index eaf9dffe0a01..44f82d8e09c5 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -81,7 +81,7 @@
 
 /* Terminal Control Selectors */
 /* 4.3.2  Class-Specific AC Interface Descriptor */
-struct uac_ac_header_descriptor {
+struct uac_ac_header_descriptor_v1 {
 	__u8  bLength;			/* 8 + n */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* UAC_MS_HEADER */
@@ -95,7 +95,7 @@ struct uac_ac_header_descriptor {
 
 /* As above, but more useful for defining your own descriptors: */
 #define DECLARE_UAC_AC_HEADER_DESCRIPTOR(n) 			\
-struct uac_ac_header_descriptor_##n {				\
+struct uac_ac_header_descriptor_v1_##n {			\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -131,7 +131,7 @@ struct uac_input_terminal_descriptor {
 #define UAC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
 
 /* 4.3.2.2 Output Terminal Descriptor */
-struct uac_output_terminal_descriptor {
+struct uac_output_terminal_descriptor_v1 {
 	__u8  bLength;			/* in bytes: 9 */
 	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
 	__u8  bDescriptorSubtype;	/* OUTPUT_TERMINAL descriptor subtype */
@@ -171,7 +171,7 @@ struct uac_feature_unit_descriptor_##ch {			\
 } __attribute__ ((packed))
 
 /* 4.5.2 Class-Specific AS Interface Descriptor */
-struct uac_as_header_descriptor {
+struct uac_as_header_descriptor_v1 {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* AS_GENERAL */
@@ -232,6 +232,19 @@ struct uac_format_type_i_discrete_descriptor_##n {		\
 
 #define UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
 
+/* Formats - Audio Data Format Type I Codes */
+
+struct uac_format_type_ii_discrete_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bFormatType;
+	__le16 wMaxBitRate;
+	__le16 wSamplesPerFrame;
+	__u8 bSamFreqType;
+	__u8 tSamFreq[][3];
+} __attribute__((packed));
+
 /* Formats - A.2 Format Type Codes */
 #define UAC_FORMAT_TYPE_UNDEFINED	0x0
 #define UAC_FORMAT_TYPE_I		0x1
@@ -253,6 +266,17 @@ struct uac_iso_endpoint_descriptor {
 #define UAC_EP_CS_ATTR_FILL_MAX		0x80
 
 /* A.10.2 Feature Unit Control Selectors */
+
+struct uac_feature_unit_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bUnitID;
+	__u8 bSourceID;
+	__u8 bControlSize;
+	__u8 controls[0]; /* variable length */
+} __attribute__((packed));
+
 #define UAC_FU_CONTROL_UNDEFINED	0x00
 #define UAC_MUTE_CONTROL		0x01
 #define UAC_VOLUME_CONTROL		0x02
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index c6b9c8cac59e..f833dea60180 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -46,6 +46,8 @@
 #include <linux/usb.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
+#include <linux/usb/audio.h>
+
 #include <sound/core.h>
 #include <sound/info.h>
 #include <sound/pcm.h>
@@ -2421,15 +2423,17 @@ static int is_big_endian_format(struct snd_usb_audio *chip, struct audioformat *
  * @fmt: the format type descriptor
  */
 static int parse_audio_format_i_type(struct snd_usb_audio *chip, struct audioformat *fp,
-				     int format, unsigned char *fmt)
+				     int format, void *fmt_raw)
 {
 	int pcm_format;
 	int sample_width, sample_bytes;
+	struct uac_format_type_i_discrete_descriptor *fmt = fmt_raw;
 
 	/* FIXME: correct endianess and sign? */
 	pcm_format = -1;
-	sample_width = fmt[6];
-	sample_bytes = fmt[5];
+	sample_width = fmt->bBitResolution;
+	sample_bytes = fmt->bSubframeSize;
+
 	switch (format) {
 	case 0: /* some devices don't define this correctly... */
 		snd_printdd(KERN_INFO "%d:%u:%d : format type 0 is detected, processed as PCM\n",
@@ -2442,7 +2446,7 @@ static int parse_audio_format_i_type(struct snd_usb_audio *chip, struct audiofor
 				   sample_width, sample_bytes);
 		}
 		/* check the format byte size */
-		switch (fmt[5]) {
+		switch (sample_bytes) {
 		case 1:
 			pcm_format = SNDRV_PCM_FORMAT_S8;
 			break;
@@ -2463,8 +2467,8 @@ static int parse_audio_format_i_type(struct snd_usb_audio *chip, struct audiofor
 			break;
 		default:
 			snd_printk(KERN_INFO "%d:%u:%d : unsupported sample bitwidth %d in %d bytes\n",
-				   chip->dev->devnum, fp->iface,
-				   fp->altsetting, sample_width, sample_bytes);
+				   chip->dev->devnum, fp->iface, fp->altsetting,
+				   sample_width, sample_bytes);
 			break;
 		}
 		break;
@@ -2564,11 +2568,12 @@ static int parse_audio_format_rates(struct snd_usb_audio *chip, struct audioform
  * parse the format type I and III descriptors
  */
 static int parse_audio_format_i(struct snd_usb_audio *chip, struct audioformat *fp,
-				int format, unsigned char *fmt)
+				int format, void *fmt_raw)
 {
 	int pcm_format;
+	struct uac_format_type_i_discrete_descriptor *fmt = fmt_raw;
 
-	if (fmt[3] == USB_FORMAT_TYPE_III) {
+	if (fmt->bFormatType == USB_FORMAT_TYPE_III) {
 		/* FIXME: the format type is really IECxxx
 		 *        but we give normal PCM format to get the existing
 		 *        apps working...
@@ -2590,23 +2595,27 @@ static int parse_audio_format_i(struct snd_usb_audio *chip, struct audioformat *
 		if (pcm_format < 0)
 			return -1;
 	}
+
 	fp->format = pcm_format;
-	fp->channels = fmt[4];
+	fp->channels = fmt->bNrChannels;
+
 	if (fp->channels < 1) {
 		snd_printk(KERN_ERR "%d:%u:%d : invalid channels %d\n",
 			   chip->dev->devnum, fp->iface, fp->altsetting, fp->channels);
 		return -1;
 	}
-	return parse_audio_format_rates(chip, fp, fmt, 7);
+	return parse_audio_format_rates(chip, fp, fmt_raw, 7);
 }
 
 /*
- * prase the format type II descriptor
+ * parse the format type II descriptor
  */
 static int parse_audio_format_ii(struct snd_usb_audio *chip, struct audioformat *fp,
-				 int format, unsigned char *fmt)
+				 int format, void *fmt_raw)
 {
 	int brate, framesize;
+	struct uac_format_type_ii_discrete_descriptor *fmt = fmt_raw;
+
 	switch (format) {
 	case USB_AUDIO_FORMAT_AC3:
 		/* FIXME: there is no AC3 format defined yet */
@@ -2622,20 +2631,25 @@ static int parse_audio_format_ii(struct snd_usb_audio *chip, struct audioformat
 		fp->format = SNDRV_PCM_FORMAT_MPEG;
 		break;
 	}
+
 	fp->channels = 1;
-	brate = combine_word(&fmt[4]); 	/* fmt[4,5] : wMaxBitRate (in kbps) */
-	framesize = combine_word(&fmt[6]); /* fmt[6,7]: wSamplesPerFrame */
+
+	brate = le16_to_cpu(fmt->wMaxBitRate);
+	framesize = le16_to_cpu(fmt->wSamplesPerFrame);
 	snd_printd(KERN_INFO "found format II with max.bitrate = %d, frame size=%d\n", brate, framesize);
 	fp->frame_size = framesize;
-	return parse_audio_format_rates(chip, fp, fmt, 8); /* fmt[8..] sample rates */
+	return parse_audio_format_rates(chip, fp, fmt_raw, 8); /* fmt[8..] sample rates */
 }
 
 static int parse_audio_format(struct snd_usb_audio *chip, struct audioformat *fp,
-			      int format, unsigned char *fmt, int stream)
+			      int format, void *fmt_raw, int stream)
 {
 	int err;
+	/* we only parse the common header of all format types here,
+	 * so it is safe to take a type_i struct */
+	struct uac_format_type_i_discrete_descriptor *fmt = fmt_raw;
 
-	switch (fmt[3]) {
+	switch (fmt->bFormatType) {
 	case USB_FORMAT_TYPE_I:
 	case USB_FORMAT_TYPE_III:
 		err = parse_audio_format_i(chip, fp, format, fmt);
@@ -2645,10 +2659,10 @@ static int parse_audio_format(struct snd_usb_audio *chip, struct audioformat *fp
 		break;
 	default:
 		snd_printd(KERN_INFO "%d:%u:%d : format type %d is not supported yet\n",
-			   chip->dev->devnum, fp->iface, fp->altsetting, fmt[3]);
+			   chip->dev->devnum, fp->iface, fp->altsetting, fmt->bFormatType);
 		return -1;
 	}
-	fp->fmt_type = fmt[3];
+	fp->fmt_type = fmt->bFormatType;
 	if (err < 0)
 		return err;
 #if 1
@@ -2659,7 +2673,7 @@ static int parse_audio_format(struct snd_usb_audio *chip, struct audioformat *fp
 	if (chip->usb_id == USB_ID(0x041e, 0x3000) ||
 	    chip->usb_id == USB_ID(0x041e, 0x3020) ||
 	    chip->usb_id == USB_ID(0x041e, 0x3061)) {
-		if (fmt[3] == USB_FORMAT_TYPE_I &&
+		if (fmt->bFormatType == USB_FORMAT_TYPE_I &&
 		    fp->rates != SNDRV_PCM_RATE_48000 &&
 		    fp->rates != SNDRV_PCM_RATE_96000)
 			return -1;
@@ -2708,6 +2722,8 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		num = 4;
 
 	for (i = 0; i < num; i++) {
+		struct uac_as_header_descriptor_v1 *as;
+
 		alts = &iface->altsetting[i];
 		altsd = get_iface_desc(alts);
 		/* skip invalid one */
@@ -2726,7 +2742,7 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		stream = (get_endpoint(alts, 0)->bEndpointAddress & USB_DIR_IN) ?
 			SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK;
 		altno = altsd->bAlternateSetting;
-	
+
 		/* audiophile usb: skip altsets incompatible with device_setup
 		 */
 		if (chip->usb_id == USB_ID(0x0763, 0x2003) && 
@@ -2734,20 +2750,21 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 			continue;
 
 		/* get audio formats */
-		fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, AS_GENERAL);
-		if (!fmt) {
+		as = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, AS_GENERAL);
+
+		if (!as) {
 			snd_printk(KERN_ERR "%d:%u:%d : AS_GENERAL descriptor not found\n",
 				   dev->devnum, iface_no, altno);
 			continue;
 		}
 
-		if (fmt[0] < 7) {
+		if (as->bLength < sizeof(*as)) {
 			snd_printk(KERN_ERR "%d:%u:%d : invalid AS_GENERAL desc\n",
 				   dev->devnum, iface_no, altno);
 			continue;
 		}
 
-		format = (fmt[6] << 8) | fmt[5]; /* remember the format value */
+		format = le16_to_cpu(as->wFormatTag); /* remember the format value */
 
 		/* get format type */
 		fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, FORMAT_TYPE);
@@ -2875,6 +2892,65 @@ static void snd_usb_stream_disconnect(struct list_head *head)
 	}
 }
 
+static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int interface)
+{
+	struct usb_device *dev = chip->dev;
+	struct usb_host_interface *alts;
+	struct usb_interface_descriptor *altsd;
+	struct usb_interface *iface = usb_ifnum_to_if(dev, interface);
+
+	if (!iface) {
+		snd_printk(KERN_ERR "%d:%u:%d : does not exist\n",
+			   dev->devnum, ctrlif, interface);
+		return -EINVAL;
+	}
+
+	if (usb_interface_claimed(iface)) {
+		snd_printdd(KERN_INFO "%d:%d:%d: skipping, already claimed\n",
+						dev->devnum, ctrlif, interface);
+		return -EINVAL;
+	}
+
+	alts = &iface->altsetting[0];
+	altsd = get_iface_desc(alts);
+	if ((altsd->bInterfaceClass == USB_CLASS_AUDIO ||
+	     altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC) &&
+	    altsd->bInterfaceSubClass == USB_SUBCLASS_MIDI_STREAMING) {
+		int err = snd_usbmidi_create(chip->card, iface,
+					     &chip->midi_list, NULL);
+		if (err < 0) {
+			snd_printk(KERN_ERR "%d:%u:%d: cannot create sequencer device\n",
+						dev->devnum, ctrlif, interface);
+			return -EINVAL;
+		}
+		usb_driver_claim_interface(&usb_audio_driver, iface, (void *)-1L);
+
+		return 0;
+	}
+
+	if ((altsd->bInterfaceClass != USB_CLASS_AUDIO &&
+	     altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) ||
+	    altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIO_STREAMING) {
+		snd_printdd(KERN_ERR "%d:%u:%d: skipping non-supported interface %d\n",
+					dev->devnum, ctrlif, interface, altsd->bInterfaceClass);
+		/* skip non-supported classes */
+		return -EINVAL;
+	}
+
+	if (snd_usb_get_speed(dev) == USB_SPEED_LOW) {
+		snd_printk(KERN_ERR "low speed audio streaming not supported\n");
+		return -EINVAL;
+	}
+
+	if (! parse_audio_endpoints(chip, interface)) {
+		usb_set_interface(dev, interface, 0); /* reset the current interface */
+		usb_driver_claim_interface(&usb_audio_driver, iface, (void *)-1L);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /*
  * parse audio control descriptor and create pcm/midi streams
  */
@@ -2882,69 +2958,36 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif)
 {
 	struct usb_device *dev = chip->dev;
 	struct usb_host_interface *host_iface;
-	struct usb_interface *iface;
-	unsigned char *p1;
-	int i, j;
+	struct uac_ac_header_descriptor_v1 *h1;
+	void *control_header;
+	int i;
 
 	/* find audiocontrol interface */
 	host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0];
-	if (!(p1 = snd_usb_find_csint_desc(host_iface->extra, host_iface->extralen, NULL, HEADER))) {
+	control_header = snd_usb_find_csint_desc(host_iface->extra,
+						 host_iface->extralen,
+						 NULL, HEADER);
+
+	if (!control_header) {
 		snd_printk(KERN_ERR "cannot find HEADER\n");
 		return -EINVAL;
 	}
-	if (! p1[7] || p1[0] < 8 + p1[7]) {
-		snd_printk(KERN_ERR "invalid HEADER\n");
+
+	h1 = control_header;
+
+	if (!h1->bInCollection) {
+		snd_printk(KERN_INFO "skipping empty audio interface (v1)\n");
 		return -EINVAL;
 	}
 
-	/*
-	 * parse all USB audio streaming interfaces
-	 */
-	for (i = 0; i < p1[7]; i++) {
-		struct usb_host_interface *alts;
-		struct usb_interface_descriptor *altsd;
-		j = p1[8 + i];
-		iface = usb_ifnum_to_if(dev, j);
-		if (!iface) {
-			snd_printk(KERN_ERR "%d:%u:%d : does not exist\n",
-				   dev->devnum, ctrlif, j);
-			continue;
-		}
-		if (usb_interface_claimed(iface)) {
-			snd_printdd(KERN_INFO "%d:%d:%d: skipping, already claimed\n", dev->devnum, ctrlif, j);
-			continue;
-		}
-		alts = &iface->altsetting[0];
-		altsd = get_iface_desc(alts);
-		if ((altsd->bInterfaceClass == USB_CLASS_AUDIO ||
-		     altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC) &&
-		    altsd->bInterfaceSubClass == USB_SUBCLASS_MIDI_STREAMING) {
-			int err = snd_usbmidi_create(chip->card, iface,
-						     &chip->midi_list, NULL);
-			if (err < 0) {
-				snd_printk(KERN_ERR "%d:%u:%d: cannot create sequencer device\n", dev->devnum, ctrlif, j);
-				continue;
-			}
-			usb_driver_claim_interface(&usb_audio_driver, iface, (void *)-1L);
-			continue;
-		}
-		if ((altsd->bInterfaceClass != USB_CLASS_AUDIO &&
-		     altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) ||
-		    altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIO_STREAMING) {
-			snd_printdd(KERN_ERR "%d:%u:%d: skipping non-supported interface %d\n", dev->devnum, ctrlif, j, altsd->bInterfaceClass);
-			/* skip non-supported classes */
-			continue;
-		}
-		if (snd_usb_get_speed(dev) == USB_SPEED_LOW) {
-			snd_printk(KERN_ERR "low speed audio streaming not supported\n");
-			continue;
-		}
-		if (! parse_audio_endpoints(chip, j)) {
-			usb_set_interface(dev, j, 0); /* reset the current interface */
-			usb_driver_claim_interface(&usb_audio_driver, iface, (void *)-1L);
-		}
+	if (h1->bLength < sizeof(*h1) + h1->bInCollection) {
+		snd_printk(KERN_ERR "invalid HEADER (v1)\n");
+		return -EINVAL;
 	}
 
+	for (i = 0; i < h1->bInCollection; i++)
+		snd_usb_create_stream(chip, ctrlif, h1->baInterfaceNr[i]);
+
 	return 0;
 }
 
@@ -3607,7 +3650,6 @@ static void *snd_usb_audio_probe(struct usb_device *dev,
 	ifnum = get_iface_desc(alts)->bInterfaceNumber;
 	id = USB_ID(le16_to_cpu(dev->descriptor.idVendor),
 		    le16_to_cpu(dev->descriptor.idProduct));
-
 	if (quirk && quirk->ifnum >= 0 && ifnum != quirk->ifnum)
 		goto __err_val;
 
diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c
index 35b4830fb0c4..11636a6112d5 100644
--- a/sound/usb/usbmixer.c
+++ b/sound/usb/usbmixer.c
@@ -32,6 +32,8 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/usb.h>
+#include <linux/usb/audio.h>
+
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/hwdep.h>
@@ -1086,29 +1088,30 @@ static void build_feature_ctl(struct mixer_build *state, unsigned char *desc,
  *
  * most of controlls are defined here.
  */
-static int parse_audio_feature_unit(struct mixer_build *state, int unitid, unsigned char *ftr)
+static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void *_ftr)
 {
 	int channels, i, j;
 	struct usb_audio_term iterm;
 	unsigned int master_bits, first_ch_bits;
 	int err, csize;
+	struct uac_feature_unit_descriptor *ftr = _ftr;
 
-	if (ftr[0] < 7 || ! (csize = ftr[5]) || ftr[0] < 7 + csize) {
+	if (ftr->bLength < 7 || ! (csize = ftr->bControlSize) || ftr->bLength < 7 + csize) {
 		snd_printk(KERN_ERR "usbaudio: unit %u: invalid FEATURE_UNIT descriptor\n", unitid);
 		return -EINVAL;
 	}
 
 	/* parse the source unit */
-	if ((err = parse_audio_unit(state, ftr[4])) < 0)
+	if ((err = parse_audio_unit(state, ftr->bSourceID)) < 0)
 		return err;
 
 	/* determine the input source type and name */
-	if (check_input_term(state, ftr[4], &iterm) < 0)
+	if (check_input_term(state, ftr->bSourceID, &iterm) < 0)
 		return -EINVAL;
 
-	channels = (ftr[0] - 7) / csize - 1;
+	channels = (ftr->bLength - 7) / csize - 1;
 
-	master_bits = snd_usb_combine_bytes(ftr + 6, csize);
+	master_bits = snd_usb_combine_bytes(ftr->controls, csize);
 	/* master configuration quirks */
 	switch (state->chip->usb_id) {
 	case USB_ID(0x08bb, 0x2702):
@@ -1119,21 +1122,21 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, unsig
 		break;
 	}
 	if (channels > 0)
-		first_ch_bits = snd_usb_combine_bytes(ftr + 6 + csize, csize);
+		first_ch_bits = snd_usb_combine_bytes(ftr->controls + csize, csize);
 	else
 		first_ch_bits = 0;
 	/* check all control types */
 	for (i = 0; i < 10; i++) {
 		unsigned int ch_bits = 0;
 		for (j = 0; j < channels; j++) {
-			unsigned int mask = snd_usb_combine_bytes(ftr + 6 + csize * (j+1), csize);
+			unsigned int mask = snd_usb_combine_bytes(ftr->controls + csize * (j+1), csize);
 			if (mask & (1 << i))
 				ch_bits |= (1 << j);
 		}
 		if (ch_bits & 1) /* the first channel must be set (for ease of programming) */
-			build_feature_ctl(state, ftr, ch_bits, i, &iterm, unitid);
+			build_feature_ctl(state, _ftr, ch_bits, i, &iterm, unitid);
 		if (master_bits & (1 << i))
-			build_feature_ctl(state, ftr, 0, i, &iterm, unitid);
+			build_feature_ctl(state, _ftr, 0, i, &iterm, unitid);
 	}
 
 	return 0;
@@ -1780,7 +1783,7 @@ static int snd_usb_mixer_dev_free(struct snd_device *device)
  */
 static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 {
-	unsigned char *desc;
+	struct uac_output_terminal_descriptor_v1 *desc;
 	struct mixer_build state;
 	int err;
 	const struct usbmix_ctl_map *map;
@@ -1805,13 +1808,13 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 
 	desc = NULL;
 	while ((desc = snd_usb_find_csint_desc(hostif->extra, hostif->extralen, desc, OUTPUT_TERMINAL)) != NULL) {
-		if (desc[0] < 9)
+		if (desc->bLength < 9)
 			continue; /* invalid descriptor? */
-		set_bit(desc[3], state.unitbitmap);  /* mark terminal ID as visited */
-		state.oterm.id = desc[3];
-		state.oterm.type = combine_word(&desc[4]);
-		state.oterm.name = desc[8];
-		err = parse_audio_unit(&state, desc[7]);
+		set_bit(desc->bTerminalID, state.unitbitmap);  /* mark terminal ID as visited */
+		state.oterm.id = desc->bTerminalID;
+		state.oterm.type = le16_to_cpu(desc->wTerminalType);
+		state.oterm.name = desc->iTerminal;
+		err = parse_audio_unit(&state, desc->bSourceID);
 		if (err < 0)
 			return err;
 	}
-- 
cgit v1.2.3


From 8fee4aff8c89c229593b76a6ab172a9cad24b412 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 22 Feb 2010 23:49:10 +0100
Subject: ALSA: usbaudio: introduce new types for audio class v2

This patch adds some definitions for audio class v2.

Unfortunately, the UNIT types PROCESSING_UNIT and EXTENSION_UNIT have
different numerical representations in both standards, so there is need
for a _V1 add-on now. usbmixer.c is changed accordingly.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio.h | 57 +++++++++++++++++++++++++++++++++++++++++++++++
 sound/usb/usbaudio.h      | 19 +++++++++++++---
 sound/usb/usbmixer.c      | 14 ++++++------
 3 files changed, 80 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 44f82d8e09c5..fb1a97bf943d 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -25,6 +25,9 @@
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
 
+#define UAC_VERSION_1			0x00
+#define UAC_VERSION_2			0x20
+
 /* A.5 Audio Class-Specific AC Interface Descriptor Subtypes */
 #define UAC_HEADER			0x01
 #define UAC_INPUT_TERMINAL		0x02
@@ -180,6 +183,19 @@ struct uac_as_header_descriptor_v1 {
 	__le16 wFormatTag;		/* The Audio Data Format */
 } __attribute__ ((packed));
 
+struct uac_as_header_descriptor_v2 {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bTerminalLink;
+	__u8 bmControls;
+	__u8 bFormatType;
+	__u32 bmFormats;
+	__u8 bNrChannels;
+	__u32 bmChannelConfig;
+	__u8 iChannelNames;
+} __attribute__((packed));
+
 #define UAC_DT_AS_HEADER_SIZE		7
 
 /* Formats - A.1.1 Audio Data Format Type I Codes */
@@ -232,6 +248,19 @@ struct uac_format_type_i_discrete_descriptor_##n {		\
 
 #define UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
 
+struct uac_format_type_i_ext_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bSubslotSize;
+	__u8 bFormatType;
+	__u8 bBitResolution;
+	__u8 bHeaderLength;
+	__u8 bControlSize;
+	__u8 bSideBandProtocol;
+} __attribute__((packed));
+
+
 /* Formats - Audio Data Format Type I Codes */
 
 struct uac_format_type_ii_discrete_descriptor {
@@ -245,11 +274,26 @@ struct uac_format_type_ii_discrete_descriptor {
 	__u8 tSamFreq[][3];
 } __attribute__((packed));
 
+struct uac_format_type_ii_ext_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bFormatType;
+	__u16 wMaxBitRate;
+	__u16 wSamplesPerFrame;
+	__u8 bHeaderLength;
+	__u8 bSideBandProtocol;
+} __attribute__((packed));
+
+
 /* Formats - A.2 Format Type Codes */
 #define UAC_FORMAT_TYPE_UNDEFINED	0x0
 #define UAC_FORMAT_TYPE_I		0x1
 #define UAC_FORMAT_TYPE_II		0x2
 #define UAC_FORMAT_TYPE_III		0x3
+#define UAC_EXT_FORMAT_TYPE_I		0x81
+#define UAC_EXT_FORMAT_TYPE_II		0x82
+#define UAC_EXT_FORMAT_TYPE_III		0x83
 
 struct uac_iso_endpoint_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
@@ -265,6 +309,19 @@ struct uac_iso_endpoint_descriptor {
 #define UAC_EP_CS_ATTR_PITCH_CONTROL	0x02
 #define UAC_EP_CS_ATTR_FILL_MAX		0x80
 
+/* Audio class v2.0: CLOCK_SOURCE descriptor */
+
+struct uac_clock_source_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bClockID;
+	__u8 bmAttributes;
+	__u8 bmControls;
+	__u8 bAssocTerminal;
+	__u8 iClockSource;
+} __attribute__((packed));
+
 /* A.10.2 Feature Unit Control Selectors */
 
 struct uac_feature_unit_descriptor {
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 9d8cea48fc5f..4f482939e8e8 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -36,8 +36,17 @@
 #define MIXER_UNIT			0x04
 #define SELECTOR_UNIT			0x05
 #define FEATURE_UNIT			0x06
-#define PROCESSING_UNIT			0x07
-#define EXTENSION_UNIT			0x08
+#define PROCESSING_UNIT_V1		0x07
+#define EXTENSION_UNIT_V1		0x08
+
+/* audio class v2 */
+#define EFFECT_UNIT			0x07
+#define PROCESSING_UNIT_V2		0x08
+#define EXTENSION_UNIT_V2		0x09
+#define CLOCK_SOURCE			0x0a
+#define CLOCK_SELECTOR			0x0b
+#define CLOCK_MULTIPLIER		0x0c
+#define SAMPLE_RATE_CONVERTER		0x0d
 
 #define AS_GENERAL			0x01
 #define FORMAT_TYPE			0x02
@@ -60,7 +69,7 @@
 #define EP_CS_ATTR_PITCH_CONTROL	0x02
 #define EP_CS_ATTR_FILL_MAX		0x80
 
-/* Audio Class specific Request Codes */
+/* Audio Class specific Request Codes (v1) */
 
 #define SET_CUR    0x01
 #define GET_CUR    0x81
@@ -74,6 +83,10 @@
 #define GET_MEM    0x85
 #define GET_STAT   0xff
 
+/* Audio Class specific Request Codes (v2) */
+#define CS_CUR     0x01
+#define CS_RANGE   0x02
+
 /* Terminal Control Selectors */
 
 #define COPY_PROTECT_CONTROL       0x01
diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c
index 11636a6112d5..ca7949598191 100644
--- a/sound/usb/usbmixer.c
+++ b/sound/usb/usbmixer.c
@@ -286,7 +286,7 @@ static void *find_audio_control_unit(struct mixer_build *state, unsigned char un
 	p = NULL;
 	while ((p = snd_usb_find_desc(state->buffer, state->buflen, p,
 				      USB_DT_CS_INTERFACE)) != NULL) {
-		if (p[0] >= 4 && p[2] >= INPUT_TERMINAL && p[2] <= EXTENSION_UNIT && p[3] == unit)
+		if (p[0] >= 4 && p[2] >= INPUT_TERMINAL && p[2] <= EXTENSION_UNIT_V1 && p[3] == unit)
 			return p;
 	}
 	return NULL;
@@ -607,9 +607,9 @@ static int get_term_name(struct mixer_build *state, struct usb_audio_term *iterm
 		switch (iterm->type >> 16) {
 		case SELECTOR_UNIT:
 			strcpy(name, "Selector"); return 8;
-		case PROCESSING_UNIT:
+		case PROCESSING_UNIT_V1:
 			strcpy(name, "Process Unit"); return 12;
-		case EXTENSION_UNIT:
+		case EXTENSION_UNIT_V1:
 			strcpy(name, "Ext Unit"); return 8;
 		case MIXER_UNIT:
 			strcpy(name, "Mixer"); return 5;
@@ -673,8 +673,8 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_
 			term->id = id;
 			term->name = p1[9 + p1[0] - 1];
 			return 0;
-		case PROCESSING_UNIT:
-		case EXTENSION_UNIT:
+		case PROCESSING_UNIT_V1:
+		case EXTENSION_UNIT_V1:
 			if (p1[6] == 1) {
 				id = p1[7];
 				break; /* continue to parse */
@@ -1747,9 +1747,9 @@ static int parse_audio_unit(struct mixer_build *state, int unitid)
 		return parse_audio_selector_unit(state, unitid, p1);
 	case FEATURE_UNIT:
 		return parse_audio_feature_unit(state, unitid, p1);
-	case PROCESSING_UNIT:
+	case PROCESSING_UNIT_V1:
 		return parse_audio_processing_unit(state, unitid, p1);
-	case EXTENSION_UNIT:
+	case EXTENSION_UNIT_V1:
 		return parse_audio_extension_unit(state, unitid, p1);
 	default:
 		snd_printk(KERN_ERR "usbaudio: unit %u: unexpected type 0x%02x\n", unitid, p1[2]);
-- 
cgit v1.2.3


From 79da0644a8e0838522828f106e4049639eea6baf Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 23 Feb 2010 08:40:43 +0100
Subject: Revert "block: improve queue_should_plug() by looking at IO depths"

This reverts commit fb1e75389bd06fd5987e9cda1b4e0305c782f854.

"Benjamin S." <sbenni@gmx.de> reports that the patch in question
causes a big drop in sequential throughput for him, dropping from
200MB/sec down to only 70MB/sec.

Needs to be investigated more fully, for now lets just revert the
offending commit.

Conflicts:

	include/linux/blkdev.h

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 11 ++---------
 include/linux/blkdev.h |  4 +---
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e6d37f..d1a9a0a64f95 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1147,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
  */
 static inline bool queue_should_plug(struct request_queue *q)
 {
-	return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
+	return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
 }
 
 static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1859,15 +1859,8 @@ void blk_dequeue_request(struct request *rq)
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
-	if (blk_account_rq(rq)) {
+	if (blk_account_rq(rq))
 		q->in_flight[rq_is_sync(rq)]++;
-		/*
-		 * Mark this device as supporting hardware queuing, if
-		 * we have more IOs in flight than 4.
-		 */
-		if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
-			set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
-	}
 }
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c8018977efa..1896e868854f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -461,8 +461,7 @@ struct request_queue
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
-#define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
-#define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
+#define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -586,7 +585,6 @@ enum {
 
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
-#define blk_queue_queuing(q)	test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
-- 
cgit v1.2.3


From de48c7bc6f93c6c8e0be8612c9d72a2dc92eaa01 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 22 Feb 2010 23:49:13 +0100
Subject: ALSA: usbaudio: consolidate header files

Use the definitions from linux/usb/audio.h all over the ALSA USB audio
driver and add some missing definitions there as well.

Use the endpoint attribute macros from linux/usb/ch9 and remove the own
things from sound/usb/usbaudio.h.

Now things are also nicely prefixed which makes understanding the code
easier.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio.h |  31 +++++++++++-
 sound/usb/usbaudio.c      | 125 +++++++++++++++++++++++-----------------------
 sound/usb/usbaudio.h      | 100 -------------------------------------
 sound/usb/usbmidi.c       |  10 ++--
 sound/usb/usbmixer.c      |  62 +++++++++++------------
 sound/usb/usbquirks.h     |  34 ++++++-------
 sound/usb/usx2y/us122l.c  |   6 ++-
 7 files changed, 150 insertions(+), 218 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index fb1a97bf943d..6bb293684eb8 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -35,8 +35,17 @@
 #define UAC_MIXER_UNIT			0x04
 #define UAC_SELECTOR_UNIT		0x05
 #define UAC_FEATURE_UNIT		0x06
-#define UAC_PROCESSING_UNIT		0x07
-#define UAC_EXTENSION_UNIT		0x08
+#define UAC_PROCESSING_UNIT_V1		0x07
+#define UAC_EXTENSION_UNIT_V1		0x08
+
+/* UAC v2.0 types */
+#define UAC_EFFECT_UNIT			0x07
+#define UAC_PROCESSING_UNIT_V2		0x08
+#define UAC_EXTENSION_UNIT_V2		0x09
+#define UAC_CLOCK_SOURCE		0x0a
+#define UAC_CLOCK_SELECTOR		0x0b
+#define UAC_CLOCK_MULTIPLIER		0x0c
+#define UAC_SAMPLE_RATE_CONVERTER	0x0d
 
 /* A.6 Audio Class-Specific AS Interface Descriptor Subtypes */
 #define UAC_AS_GENERAL			0x01
@@ -69,6 +78,10 @@
 
 #define UAC_GET_STAT			0xff
 
+/* Audio class v2.0 handles all the parameter calls differently */
+#define UAC2_CS_CUR			0x01
+#define UAC2_CS_RANGE			0x02
+
 /* MIDI - A.1 MS Class-Specific Interface Descriptor Subtypes */
 #define UAC_MS_HEADER			0x01
 #define UAC_MIDI_IN_JACK		0x02
@@ -133,6 +146,10 @@ struct uac_input_terminal_descriptor {
 #define UAC_INPUT_TERMINAL_MICROPHONE_ARRAY		0x205
 #define UAC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
 
+/* Terminals - control selectors */
+
+#define UAC_TERMINAL_CS_COPY_PROTECT_CONTROL		0x01
+
 /* 4.3.2.2 Output Terminal Descriptor */
 struct uac_output_terminal_descriptor_v1 {
 	__u8  bLength;			/* in bytes: 9 */
@@ -263,6 +280,9 @@ struct uac_format_type_i_ext_descriptor {
 
 /* Formats - Audio Data Format Type I Codes */
 
+#define UAC_FORMAT_TYPE_II_MPEG	0x1001
+#define UAC_FORMAT_TYPE_II_AC3	0x1002
+
 struct uac_format_type_ii_discrete_descriptor {
 	__u8 bLength;
 	__u8 bDescriptorType;
@@ -285,6 +305,13 @@ struct uac_format_type_ii_ext_descriptor {
 	__u8 bSideBandProtocol;
 } __attribute__((packed));
 
+/* type III */
+#define UAC_FORMAT_TYPE_III_IEC1937_AC3	0x2001
+#define UAC_FORMAT_TYPE_III_IEC1937_MPEG1_LAYER1	0x2002
+#define UAC_FORMAT_TYPE_III_IEC1937_MPEG2_NOEXT	0x2003
+#define UAC_FORMAT_TYPE_III_IEC1937_MPEG2_EXT	0x2004
+#define UAC_FORMAT_TYPE_III_IEC1937_MPEG2_LAYER1_LS	0x2005
+#define UAC_FORMAT_TYPE_III_IEC1937_MPEG2_LAYER23_LS	0x2006
 
 /* Formats - A.2 Format Type Codes */
 #define UAC_FORMAT_TYPE_UNDEFINED	0x0
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 411a6cf43c21..c539f7fe292f 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -47,6 +47,7 @@
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/usb/audio.h>
+#include <linux/usb/ch9.h>
 
 #include <sound/core.h>
 #include <sound/info.h>
@@ -598,7 +599,7 @@ static int prepare_playback_urb(struct snd_usb_substream *subs,
 		if (subs->transfer_done >= runtime->period_size) {
 			subs->transfer_done -= runtime->period_size;
 			period_elapsed = 1;
-			if (subs->fmt_type == USB_FORMAT_TYPE_II) {
+			if (subs->fmt_type == UAC_FORMAT_TYPE_II) {
 				if (subs->transfer_done > 0) {
 					/* FIXME: fill-max mode is not
 					 * supported yet */
@@ -1106,7 +1107,7 @@ static int init_substream_urbs(struct snd_usb_substream *subs, unsigned int peri
 		u->packets = (i + 1) * total_packs / subs->nurbs
 			- i * total_packs / subs->nurbs;
 		u->buffer_size = maxsize * u->packets;
-		if (subs->fmt_type == USB_FORMAT_TYPE_II)
+		if (subs->fmt_type == UAC_FORMAT_TYPE_II)
 			u->packets++; /* for transfer delimiter */
 		u->urb = usb_alloc_urb(u->packets, GFP_KERNEL);
 		if (!u->urb)
@@ -1182,7 +1183,7 @@ static struct audioformat *find_format(struct snd_usb_substream *subs, unsigned
 			if (i >= fp->nr_rates)
 				continue;
 		}
-		attr = fp->ep_attr & EP_ATTR_MASK;
+		attr = fp->ep_attr & USB_ENDPOINT_SYNCTYPE;
 		if (! found) {
 			found = fp;
 			cur_attr = attr;
@@ -1194,14 +1195,14 @@ static struct audioformat *find_format(struct snd_usb_substream *subs, unsigned
 		 * M-audio audiophile USB.
 		 */
 		if (attr != cur_attr) {
-			if ((attr == EP_ATTR_ASYNC &&
+			if ((attr == USB_ENDPOINT_SYNC_ASYNC &&
 			     subs->direction == SNDRV_PCM_STREAM_PLAYBACK) ||
-			    (attr == EP_ATTR_ADAPTIVE &&
+			    (attr == USB_ENDPOINT_SYNC_ADAPTIVE &&
 			     subs->direction == SNDRV_PCM_STREAM_CAPTURE))
 				continue;
-			if ((cur_attr == EP_ATTR_ASYNC &&
+			if ((cur_attr == USB_ENDPOINT_SYNC_ASYNC &&
 			     subs->direction == SNDRV_PCM_STREAM_PLAYBACK) ||
-			    (cur_attr == EP_ATTR_ADAPTIVE &&
+			    (cur_attr == USB_ENDPOINT_SYNC_ADAPTIVE &&
 			     subs->direction == SNDRV_PCM_STREAM_CAPTURE)) {
 				found = fp;
 				cur_attr = attr;
@@ -1231,11 +1232,11 @@ static int init_usb_pitch(struct usb_device *dev, int iface,
 
 	ep = get_endpoint(alts, 0)->bEndpointAddress;
 	/* if endpoint has pitch control, enable it */
-	if (fmt->attributes & EP_CS_ATTR_PITCH_CONTROL) {
+	if (fmt->attributes & UAC_EP_CS_ATTR_PITCH_CONTROL) {
 		data[0] = 1;
-		if ((err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), SET_CUR,
+		if ((err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR,
 					   USB_TYPE_CLASS|USB_RECIP_ENDPOINT|USB_DIR_OUT,
-					   PITCH_CONTROL << 8, ep, data, 1, 1000)) < 0) {
+					   UAC_EP_CS_ATTR_PITCH_CONTROL << 8, ep, data, 1, 1000)) < 0) {
 			snd_printk(KERN_ERR "%d:%d:%d: cannot set enable PITCH\n",
 				   dev->devnum, iface, ep);
 			return err;
@@ -1254,21 +1255,21 @@ static int init_usb_sample_rate(struct usb_device *dev, int iface,
 
 	ep = get_endpoint(alts, 0)->bEndpointAddress;
 	/* if endpoint has sampling rate control, set it */
-	if (fmt->attributes & EP_CS_ATTR_SAMPLE_RATE) {
+	if (fmt->attributes & UAC_EP_CS_ATTR_SAMPLE_RATE) {
 		int crate;
 		data[0] = rate;
 		data[1] = rate >> 8;
 		data[2] = rate >> 16;
-		if ((err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), SET_CUR,
+		if ((err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR,
 					   USB_TYPE_CLASS|USB_RECIP_ENDPOINT|USB_DIR_OUT,
-					   SAMPLING_FREQ_CONTROL << 8, ep, data, 3, 1000)) < 0) {
+					   UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep, data, 3, 1000)) < 0) {
 			snd_printk(KERN_ERR "%d:%d:%d: cannot set freq %d to ep %#x\n",
 				   dev->devnum, iface, fmt->altsetting, rate, ep);
 			return err;
 		}
-		if ((err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), GET_CUR,
+		if ((err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC_GET_CUR,
 					   USB_TYPE_CLASS|USB_RECIP_ENDPOINT|USB_DIR_IN,
-					   SAMPLING_FREQ_CONTROL << 8, ep, data, 3, 1000)) < 0) {
+					   UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep, data, 3, 1000)) < 0) {
 			snd_printk(KERN_WARNING "%d:%d:%d: cannot get freq at ep %#x\n",
 				   dev->devnum, iface, fmt->altsetting, ep);
 			return 0; /* some devices don't support reading */
@@ -1386,9 +1387,9 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt)
 	 * descriptors which fool us.  if it has only one EP,
 	 * assume it as adaptive-out or sync-in.
 	 */
-	attr = fmt->ep_attr & EP_ATTR_MASK;
-	if (((is_playback && attr == EP_ATTR_ASYNC) ||
-	     (! is_playback && attr == EP_ATTR_ADAPTIVE)) &&
+	attr = fmt->ep_attr & USB_ENDPOINT_SYNCTYPE;
+	if (((is_playback && attr == USB_ENDPOINT_SYNC_ASYNC) ||
+	     (! is_playback && attr == USB_ENDPOINT_SYNC_ADAPTIVE)) &&
 	    altsd->bNumEndpoints >= 2) {
 		/* check sync-pipe endpoint */
 		/* ... and check descriptor size before accessing bSynchAddress
@@ -1428,7 +1429,7 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt)
 	}
 
 	/* always fill max packet size */
-	if (fmt->attributes & EP_CS_ATTR_FILL_MAX)
+	if (fmt->attributes & UAC_EP_CS_ATTR_FILL_MAX)
 		subs->fill_max = 1;
 
 	if ((err = init_usb_pitch(dev, subs->interface, alts, fmt)) < 0)
@@ -1886,7 +1887,7 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre
 			runtime->hw.channels_min = fp->channels;
 		if (runtime->hw.channels_max < fp->channels)
 			runtime->hw.channels_max = fp->channels;
-		if (fp->fmt_type == USB_FORMAT_TYPE_II && fp->frame_size > 0) {
+		if (fp->fmt_type == UAC_FORMAT_TYPE_II && fp->frame_size > 0) {
 			/* FIXME: there might be more than one audio formats... */
 			runtime->hw.period_bytes_min = runtime->hw.period_bytes_max =
 				fp->frame_size;
@@ -2120,7 +2121,7 @@ static struct usb_device_id usb_audio_ids [] = {
 #include "usbquirks.h"
     { .match_flags = (USB_DEVICE_ID_MATCH_INT_CLASS | USB_DEVICE_ID_MATCH_INT_SUBCLASS),
       .bInterfaceClass = USB_CLASS_AUDIO,
-      .bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL },
+      .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL },
     { }						/* Terminating entry */
 };
 
@@ -2159,7 +2160,7 @@ static void proc_dump_substream_formats(struct snd_usb_substream *subs, struct s
 		snd_iprintf(buffer, "    Endpoint: %d %s (%s)\n",
 			    fp->endpoint & USB_ENDPOINT_NUMBER_MASK,
 			    fp->endpoint & USB_DIR_IN ? "IN" : "OUT",
-			    sync_types[(fp->ep_attr & EP_ATTR_MASK) >> 2]);
+			    sync_types[(fp->ep_attr & USB_ENDPOINT_SYNCTYPE) >> 2]);
 		if (fp->rates & SNDRV_PCM_RATE_CONTINUOUS) {
 			snd_iprintf(buffer, "    Rates: %d - %d (continuous)\n",
 				    fp->rate_min, fp->rate_max);
@@ -2471,11 +2472,11 @@ static int parse_audio_format_i_type(struct snd_usb_audio *chip,
 	pcm_format = -1;
 
 	switch (format) {
-	case 0: /* some devices don't define this correctly... */
+	case UAC_FORMAT_TYPE_I_UNDEFINED: /* some devices don't define this correctly... */
 		snd_printdd(KERN_INFO "%d:%u:%d : format type 0 is detected, processed as PCM\n",
 			    chip->dev->devnum, fp->iface, fp->altsetting);
 		/* fall-through */
-	case USB_AUDIO_FORMAT_PCM:
+	case UAC_FORMAT_TYPE_I_PCM:
 		if (sample_width > sample_bytes * 8) {
 			snd_printk(KERN_INFO "%d:%u:%d : sample bitwidth %d in over sample bytes %d\n",
 				   chip->dev->devnum, fp->iface, fp->altsetting,
@@ -2509,7 +2510,7 @@ static int parse_audio_format_i_type(struct snd_usb_audio *chip,
 			break;
 		}
 		break;
-	case USB_AUDIO_FORMAT_PCM8:
+	case UAC_FORMAT_TYPE_I_PCM8:
 		pcm_format = SNDRV_PCM_FORMAT_U8;
 
 		/* Dallas DS4201 workaround: it advertises U8 format, but really
@@ -2517,13 +2518,13 @@ static int parse_audio_format_i_type(struct snd_usb_audio *chip,
 		if (chip->usb_id == USB_ID(0x04fa, 0x4201))
 			pcm_format = SNDRV_PCM_FORMAT_S8;
 		break;
-	case USB_AUDIO_FORMAT_IEEE_FLOAT:
+	case UAC_FORMAT_TYPE_I_IEEE_FLOAT:
 		pcm_format = SNDRV_PCM_FORMAT_FLOAT_LE;
 		break;
-	case USB_AUDIO_FORMAT_ALAW:
+	case UAC_FORMAT_TYPE_I_ALAW:
 		pcm_format = SNDRV_PCM_FORMAT_A_LAW;
 		break;
-	case USB_AUDIO_FORMAT_MU_LAW:
+	case UAC_FORMAT_TYPE_I_MULAW:
 		pcm_format = SNDRV_PCM_FORMAT_MU_LAW;
 		break;
 	default:
@@ -2551,7 +2552,7 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof
 	int nr_rates = fmt[offset];
 
 	if (fmt[0] < offset + 1 + 3 * (nr_rates ? nr_rates : 2)) {
-		snd_printk(KERN_ERR "%d:%u:%d : invalid FORMAT_TYPE desc\n",
+		snd_printk(KERN_ERR "%d:%u:%d : invalid UAC_FORMAT_TYPE desc\n",
 				   chip->dev->devnum, fp->iface, fp->altsetting);
 		return -1;
 	}
@@ -2614,7 +2615,7 @@ static int parse_audio_format_rates_v2(struct snd_usb_audio *chip,
 	int i, nr_rates, data_size, ret = 0;
 
 	/* get the number of sample rates first by only fetching 2 bytes */
-	ret = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), CS_RANGE,
+	ret = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_RANGE,
 			       USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
 			       0x0100, chip->clock_id << 8, tmp, sizeof(tmp), 1000);
 
@@ -2632,7 +2633,7 @@ static int parse_audio_format_rates_v2(struct snd_usb_audio *chip,
 	}
 
 	/* now get the full information */
-	ret = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), CS_RANGE,
+	ret = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_RANGE,
 			       USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
 			       0x0100, chip->clock_id << 8, data, data_size, 1000);
 
@@ -2682,7 +2683,7 @@ static int parse_audio_format_i(struct snd_usb_audio *chip,
 	int protocol = altsd->bInterfaceProtocol;
 	int pcm_format, ret;
 
-	if (fmt->bFormatType == USB_FORMAT_TYPE_III) {
+	if (fmt->bFormatType == UAC_FORMAT_TYPE_III) {
 		/* FIXME: the format type is really IECxxx
 		 *        but we give normal PCM format to get the existing
 		 *        apps working...
@@ -2745,12 +2746,12 @@ static int parse_audio_format_ii(struct snd_usb_audio *chip,
 	int protocol = altsd->bInterfaceProtocol;
 
 	switch (format) {
-	case USB_AUDIO_FORMAT_AC3:
+	case UAC_FORMAT_TYPE_II_AC3:
 		/* FIXME: there is no AC3 format defined yet */
 		// fp->format = SNDRV_PCM_FORMAT_AC3;
 		fp->format = SNDRV_PCM_FORMAT_U8; /* temporarily hack to receive byte streams */
 		break;
-	case USB_AUDIO_FORMAT_MPEG:
+	case UAC_FORMAT_TYPE_II_MPEG:
 		fp->format = SNDRV_PCM_FORMAT_MPEG;
 		break;
 	default:
@@ -2793,11 +2794,11 @@ static int parse_audio_format(struct snd_usb_audio *chip, struct audioformat *fp
 	int err;
 
 	switch (fmt[3]) {
-	case USB_FORMAT_TYPE_I:
-	case USB_FORMAT_TYPE_III:
+	case UAC_FORMAT_TYPE_I:
+	case UAC_FORMAT_TYPE_III:
 		err = parse_audio_format_i(chip, fp, format, fmt, iface);
 		break;
-	case USB_FORMAT_TYPE_II:
+	case UAC_FORMAT_TYPE_II:
 		err = parse_audio_format_ii(chip, fp, format, fmt, iface);
 		break;
 	default:
@@ -2816,7 +2817,7 @@ static int parse_audio_format(struct snd_usb_audio *chip, struct audioformat *fp
 	if (chip->usb_id == USB_ID(0x041e, 0x3000) ||
 	    chip->usb_id == USB_ID(0x041e, 0x3020) ||
 	    chip->usb_id == USB_ID(0x041e, 0x3061)) {
-		if (fmt[3] == USB_FORMAT_TYPE_I &&
+		if (fmt[3] == UAC_FORMAT_TYPE_I &&
 		    fp->rates != SNDRV_PCM_RATE_48000 &&
 		    fp->rates != SNDRV_PCM_RATE_96000)
 			return -1;
@@ -2871,7 +2872,7 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		/* skip invalid one */
 		if ((altsd->bInterfaceClass != USB_CLASS_AUDIO &&
 		     altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) ||
-		    (altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIO_STREAMING &&
+		    (altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIOSTREAMING &&
 		     altsd->bInterfaceSubClass != USB_SUBCLASS_VENDOR_SPEC) ||
 		    altsd->bNumEndpoints < 1 ||
 		    le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize) == 0)
@@ -2895,16 +2896,16 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		switch (protocol) {
 		case UAC_VERSION_1: {
 			struct uac_as_header_descriptor_v1 *as =
-				snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, AS_GENERAL);
+				snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
 
 			if (!as) {
-				snd_printk(KERN_ERR "%d:%u:%d : AS_GENERAL descriptor not found\n",
+				snd_printk(KERN_ERR "%d:%u:%d : UAC_AS_GENERAL descriptor not found\n",
 					   dev->devnum, iface_no, altno);
 				continue;
 			}
 
 			if (as->bLength < sizeof(*as)) {
-				snd_printk(KERN_ERR "%d:%u:%d : invalid AS_GENERAL desc\n",
+				snd_printk(KERN_ERR "%d:%u:%d : invalid UAC_AS_GENERAL desc\n",
 					   dev->devnum, iface_no, altno);
 				continue;
 			}
@@ -2915,16 +2916,16 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 
 		case UAC_VERSION_2: {
 			struct uac_as_header_descriptor_v2 *as =
-				snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, AS_GENERAL);
+				snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
 
 			if (!as) {
-				snd_printk(KERN_ERR "%d:%u:%d : AS_GENERAL descriptor not found\n",
+				snd_printk(KERN_ERR "%d:%u:%d : UAC_AS_GENERAL descriptor not found\n",
 					   dev->devnum, iface_no, altno);
 				continue;
 			}
 
 			if (as->bLength < sizeof(*as)) {
-				snd_printk(KERN_ERR "%d:%u:%d : invalid AS_GENERAL desc\n",
+				snd_printk(KERN_ERR "%d:%u:%d : invalid UAC_AS_GENERAL desc\n",
 					   dev->devnum, iface_no, altno);
 				continue;
 			}
@@ -2942,15 +2943,15 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		}
 
 		/* get format type */
-		fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, FORMAT_TYPE);
+		fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_FORMAT_TYPE);
 		if (!fmt) {
-			snd_printk(KERN_ERR "%d:%u:%d : no FORMAT_TYPE desc\n",
+			snd_printk(KERN_ERR "%d:%u:%d : no UAC_FORMAT_TYPE desc\n",
 				   dev->devnum, iface_no, altno);
 			continue;
 		}
 		if (((protocol == UAC_VERSION_1) && (fmt[0] < 8)) ||
 		    ((protocol == UAC_VERSION_2) && (fmt[0] != 6))) {
-			snd_printk(KERN_ERR "%d:%u:%d : invalid FORMAT_TYPE desc\n",
+			snd_printk(KERN_ERR "%d:%u:%d : invalid UAC_FORMAT_TYPE desc\n",
 				   dev->devnum, iface_no, altno);
 			continue;
 		}
@@ -2972,7 +2973,7 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		/* Creamware Noah has this descriptor after the 2nd endpoint */
 		if (!csep && altsd->bNumEndpoints >= 2)
 			csep = snd_usb_find_desc(alts->endpoint[1].extra, alts->endpoint[1].extralen, NULL, USB_DT_CS_ENDPOINT);
-		if (!csep || csep[0] < 7 || csep[2] != EP_GENERAL) {
+		if (!csep || csep[0] < 7 || csep[2] != UAC_EP_GENERAL) {
 			snd_printk(KERN_WARNING "%d:%u:%d : no or invalid"
 				   " class specific endpoint descriptor\n",
 				   dev->devnum, iface_no, altno);
@@ -3006,12 +3007,12 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 			/* Optoplay sets the sample rate attribute although
 			 * it seems not supporting it in fact.
 			 */
-			fp->attributes &= ~EP_CS_ATTR_SAMPLE_RATE;
+			fp->attributes &= ~UAC_EP_CS_ATTR_SAMPLE_RATE;
 			break;
 		case USB_ID(0x041e, 0x3020): /* Creative SB Audigy 2 NX */
 		case USB_ID(0x0763, 0x2003): /* M-Audio Audiophile USB */
 			/* doesn't set the sample rate attribute, but supports it */
-			fp->attributes |= EP_CS_ATTR_SAMPLE_RATE;
+			fp->attributes |= UAC_EP_CS_ATTR_SAMPLE_RATE;
 			break;
 		case USB_ID(0x047f, 0x0ca1): /* plantronics headset */
 		case USB_ID(0x077d, 0x07af): /* Griffin iMic (note that there is
@@ -3020,11 +3021,11 @@ static int parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no)
 		 * plantronics headset and Griffin iMic have set adaptive-in
 		 * although it's really not...
 		 */
-			fp->ep_attr &= ~EP_ATTR_MASK;
+			fp->ep_attr &= ~USB_ENDPOINT_SYNCTYPE;
 			if (stream == SNDRV_PCM_STREAM_PLAYBACK)
-				fp->ep_attr |= EP_ATTR_ADAPTIVE;
+				fp->ep_attr |= USB_ENDPOINT_SYNC_ADAPTIVE;
 			else
-				fp->ep_attr |= EP_ATTR_SYNC;
+				fp->ep_attr |= USB_ENDPOINT_SYNC_SYNC;
 			break;
 		}
 
@@ -3094,7 +3095,7 @@ static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int int
 	altsd = get_iface_desc(alts);
 	if ((altsd->bInterfaceClass == USB_CLASS_AUDIO ||
 	     altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC) &&
-	    altsd->bInterfaceSubClass == USB_SUBCLASS_MIDI_STREAMING) {
+	    altsd->bInterfaceSubClass == USB_SUBCLASS_MIDISTREAMING) {
 		int err = snd_usbmidi_create(chip->card, iface,
 					     &chip->midi_list, NULL);
 		if (err < 0) {
@@ -3109,7 +3110,7 @@ static int snd_usb_create_stream(struct snd_usb_audio *chip, int ctrlif, int int
 
 	if ((altsd->bInterfaceClass != USB_CLASS_AUDIO &&
 	     altsd->bInterfaceClass != USB_CLASS_VENDOR_SPEC) ||
-	    altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIO_STREAMING) {
+	    altsd->bInterfaceSubClass != USB_SUBCLASS_AUDIOSTREAMING) {
 		snd_printdd(KERN_ERR "%d:%u:%d: skipping non-supported interface %d\n",
 					dev->devnum, ctrlif, interface, altsd->bInterfaceClass);
 		/* skip non-supported classes */
@@ -3145,12 +3146,12 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif)
 	host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0];
 	control_header = snd_usb_find_csint_desc(host_iface->extra,
 						 host_iface->extralen,
-						 NULL, HEADER);
+						 NULL, UAC_HEADER);
 	altsd = get_iface_desc(host_iface);
 	protocol = altsd->bInterfaceProtocol;
 
 	if (!control_header) {
-		snd_printk(KERN_ERR "cannot find HEADER\n");
+		snd_printk(KERN_ERR "cannot find UAC_HEADER\n");
 		return -EINVAL;
 	}
 
@@ -3164,7 +3165,7 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif)
 		}
 
 		if (h1->bLength < sizeof(*h1) + h1->bInCollection) {
-			snd_printk(KERN_ERR "invalid HEADER (v1)\n");
+			snd_printk(KERN_ERR "invalid UAC_HEADER (v1)\n");
 			return -EINVAL;
 		}
 
@@ -3190,7 +3191,7 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif)
 		 * clock selectors and sample rate conversion units. */
 
 		cs = snd_usb_find_csint_desc(host_iface->extra, host_iface->extralen,
-						NULL, CLOCK_SOURCE);
+						NULL, UAC_CLOCK_SOURCE);
 
 		if (!cs) {
 			snd_printk(KERN_ERR "CLOCK_SOURCE descriptor not found\n");
@@ -3302,7 +3303,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip,
 	static const struct audioformat ua_format = {
 		.format = SNDRV_PCM_FORMAT_S24_3LE,
 		.channels = 2,
-		.fmt_type = USB_FORMAT_TYPE_I,
+		.fmt_type = UAC_FORMAT_TYPE_I,
 		.altsetting = 1,
 		.altset_idx = 1,
 		.rates = SNDRV_PCM_RATE_CONTINUOUS,
@@ -3394,7 +3395,7 @@ static int create_ua1000_quirk(struct snd_usb_audio *chip,
 {
 	static const struct audioformat ua1000_format = {
 		.format = SNDRV_PCM_FORMAT_S32_LE,
-		.fmt_type = USB_FORMAT_TYPE_I,
+		.fmt_type = UAC_FORMAT_TYPE_I,
 		.altsetting = 1,
 		.altset_idx = 1,
 		.attributes = 0,
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 26daf68631eb..6b016d4aac6b 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -21,106 +21,6 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-
-/*
- */
-
-#define USB_SUBCLASS_AUDIO_CONTROL	0x01
-#define USB_SUBCLASS_AUDIO_STREAMING	0x02
-#define USB_SUBCLASS_MIDI_STREAMING	0x03
-#define USB_SUBCLASS_VENDOR_SPEC	0xff
-
-#define HEADER				0x01
-#define INPUT_TERMINAL			0x02
-#define OUTPUT_TERMINAL			0x03
-#define MIXER_UNIT			0x04
-#define SELECTOR_UNIT			0x05
-#define FEATURE_UNIT			0x06
-#define PROCESSING_UNIT_V1		0x07
-#define EXTENSION_UNIT_V1		0x08
-
-/* audio class v2 */
-#define EFFECT_UNIT			0x07
-#define PROCESSING_UNIT_V2		0x08
-#define EXTENSION_UNIT_V2		0x09
-#define CLOCK_SOURCE			0x0a
-#define CLOCK_SELECTOR			0x0b
-#define CLOCK_MULTIPLIER		0x0c
-#define SAMPLE_RATE_CONVERTER		0x0d
-
-#define AS_GENERAL			0x01
-#define FORMAT_TYPE			0x02
-#define FORMAT_SPECIFIC			0x03
-
-#define EP_GENERAL			0x01
-
-#define MS_GENERAL			0x01
-#define MIDI_IN_JACK			0x02
-#define MIDI_OUT_JACK			0x03
-
-/* endpoint attributes */
-#define EP_ATTR_MASK			0x0c
-#define EP_ATTR_ASYNC			0x04
-#define EP_ATTR_ADAPTIVE		0x08
-#define EP_ATTR_SYNC			0x0c
-
-/* cs endpoint attributes */
-#define EP_CS_ATTR_SAMPLE_RATE		0x01
-#define EP_CS_ATTR_PITCH_CONTROL	0x02
-#define EP_CS_ATTR_FILL_MAX		0x80
-
-/* Audio Class specific Request Codes (v1) */
-
-#define SET_CUR    0x01
-#define GET_CUR    0x81
-#define SET_MIN    0x02
-#define GET_MIN    0x82
-#define SET_MAX    0x03
-#define GET_MAX    0x83
-#define SET_RES    0x04
-#define GET_RES    0x84
-#define SET_MEM    0x05
-#define GET_MEM    0x85
-#define GET_STAT   0xff
-
-/* Audio Class specific Request Codes (v2) */
-#define CS_CUR     0x01
-#define CS_RANGE   0x02
-
-/* Terminal Control Selectors */
-
-#define COPY_PROTECT_CONTROL       0x01
-
-/* Endpoint Control Selectors */
-
-#define SAMPLING_FREQ_CONTROL      0x01
-#define PITCH_CONTROL              0x02
-
-/* Format Types */
-#define USB_FORMAT_TYPE_I	0x01
-#define USB_FORMAT_TYPE_II	0x02
-#define USB_FORMAT_TYPE_III	0x03
-
-/* type I */
-#define USB_AUDIO_FORMAT_PCM	0x01
-#define USB_AUDIO_FORMAT_PCM8	0x02
-#define USB_AUDIO_FORMAT_IEEE_FLOAT	0x03
-#define USB_AUDIO_FORMAT_ALAW	0x04
-#define USB_AUDIO_FORMAT_MU_LAW	0x05
-
-/* type II */
-#define USB_AUDIO_FORMAT_MPEG	0x1001
-#define USB_AUDIO_FORMAT_AC3	0x1002
-
-/* type III */
-#define USB_AUDIO_FORMAT_IEC1937_AC3	0x2001
-#define USB_AUDIO_FORMAT_IEC1937_MPEG1_LAYER1	0x2002
-#define USB_AUDIO_FORMAT_IEC1937_MPEG2_NOEXT	0x2003
-#define USB_AUDIO_FORMAT_IEC1937_MPEG2_EXT	0x2004
-#define USB_AUDIO_FORMAT_IEC1937_MPEG2_LAYER1_LS	0x2005
-#define USB_AUDIO_FORMAT_IEC1937_MPEG2_LAYER23_LS	0x2006
-
-
 /* maximum number of endpoints per interface */
 #define MIDI_MAX_ENDPOINTS 2
 
diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c
index b2da478a0fae..2c59afd99611 100644
--- a/sound/usb/usbmidi.c
+++ b/sound/usb/usbmidi.c
@@ -46,6 +46,8 @@
 #include <linux/timer.h>
 #include <linux/usb.h>
 #include <linux/wait.h>
+#include <linux/usb/audio.h>
+
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/rawmidi.h>
@@ -1540,7 +1542,7 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi* umidi,
 	if (hostif->extralen >= 7 &&
 	    ms_header->bLength >= 7 &&
 	    ms_header->bDescriptorType == USB_DT_CS_INTERFACE &&
-	    ms_header->bDescriptorSubtype == HEADER)
+	    ms_header->bDescriptorSubtype == UAC_HEADER)
 		snd_printdd(KERN_INFO "MIDIStreaming version %02x.%02x\n",
 			    ms_header->bcdMSC[1], ms_header->bcdMSC[0]);
 	else
@@ -1556,7 +1558,7 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi* umidi,
 		if (hostep->extralen < 4 ||
 		    ms_ep->bLength < 4 ||
 		    ms_ep->bDescriptorType != USB_DT_CS_ENDPOINT ||
-		    ms_ep->bDescriptorSubtype != MS_GENERAL)
+		    ms_ep->bDescriptorSubtype != UAC_MS_GENERAL)
 			continue;
 		if (usb_endpoint_dir_out(ep)) {
 			if (endpoints[epidx].out_ep) {
@@ -1768,9 +1770,9 @@ static int snd_usbmidi_detect_yamaha(struct snd_usb_midi* umidi,
 	     cs_desc < hostif->extra + hostif->extralen && cs_desc[0] >= 2;
 	     cs_desc += cs_desc[0]) {
 		if (cs_desc[1] == USB_DT_CS_INTERFACE) {
-			if (cs_desc[2] == MIDI_IN_JACK)
+			if (cs_desc[2] == UAC_MIDI_IN_JACK)
 				endpoint->in_cables = (endpoint->in_cables << 1) | 1;
-			else if (cs_desc[2] == MIDI_OUT_JACK)
+			else if (cs_desc[2] == UAC_MIDI_OUT_JACK)
 				endpoint->out_cables = (endpoint->out_cables << 1) | 1;
 		}
 	}
diff --git a/sound/usb/usbmixer.c b/sound/usb/usbmixer.c
index 42bb95c739a8..8e8f871b74ca 100644
--- a/sound/usb/usbmixer.c
+++ b/sound/usb/usbmixer.c
@@ -286,7 +286,7 @@ static void *find_audio_control_unit(struct mixer_build *state, unsigned char un
 	p = NULL;
 	while ((p = snd_usb_find_desc(state->buffer, state->buflen, p,
 				      USB_DT_CS_INTERFACE)) != NULL) {
-		if (p[0] >= 4 && p[2] >= INPUT_TERMINAL && p[2] <= EXTENSION_UNIT_V1 && p[3] == unit)
+		if (p[0] >= 4 && p[2] >= UAC_INPUT_TERMINAL && p[2] <= UAC_EXTENSION_UNIT_V1 && p[3] == unit)
 			return p;
 	}
 	return NULL;
@@ -407,14 +407,14 @@ static int get_ctl_value(struct usb_mixer_elem_info *cval, int request, int vali
 
 static int get_cur_ctl_value(struct usb_mixer_elem_info *cval, int validx, int *value)
 {
-	return get_ctl_value(cval, GET_CUR, validx, value);
+	return get_ctl_value(cval, UAC_GET_CUR, validx, value);
 }
 
 /* channel = 0: master, 1 = first channel */
 static inline int get_cur_mix_raw(struct usb_mixer_elem_info *cval,
 				  int channel, int *value)
 {
-	return get_ctl_value(cval, GET_CUR, (cval->control << 8) | channel, value);
+	return get_ctl_value(cval, UAC_GET_CUR, (cval->control << 8) | channel, value);
 }
 
 static int get_cur_mix_value(struct usb_mixer_elem_info *cval,
@@ -468,14 +468,14 @@ static int set_ctl_value(struct usb_mixer_elem_info *cval, int request, int vali
 
 static int set_cur_ctl_value(struct usb_mixer_elem_info *cval, int validx, int value)
 {
-	return set_ctl_value(cval, SET_CUR, validx, value);
+	return set_ctl_value(cval, UAC_SET_CUR, validx, value);
 }
 
 static int set_cur_mix_value(struct usb_mixer_elem_info *cval, int channel,
 			     int index, int value)
 {
 	int err;
-	err = set_ctl_value(cval, SET_CUR, (cval->control << 8) | channel,
+	err = set_ctl_value(cval, UAC_SET_CUR, (cval->control << 8) | channel,
 			    value);
 	if (err < 0)
 		return err;
@@ -605,13 +605,13 @@ static int get_term_name(struct mixer_build *state, struct usb_audio_term *iterm
 		if (term_only)
 			return 0;
 		switch (iterm->type >> 16) {
-		case SELECTOR_UNIT:
+		case UAC_SELECTOR_UNIT:
 			strcpy(name, "Selector"); return 8;
-		case PROCESSING_UNIT_V1:
+		case UAC_PROCESSING_UNIT_V1:
 			strcpy(name, "Process Unit"); return 12;
-		case EXTENSION_UNIT_V1:
+		case UAC_EXTENSION_UNIT_V1:
 			strcpy(name, "Ext Unit"); return 8;
-		case MIXER_UNIT:
+		case UAC_MIXER_UNIT:
 			strcpy(name, "Mixer"); return 5;
 		default:
 			return sprintf(name, "Unit %d", iterm->id);
@@ -650,22 +650,22 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_
 	while ((p1 = find_audio_control_unit(state, id)) != NULL) {
 		term->id = id;
 		switch (p1[2]) {
-		case INPUT_TERMINAL:
+		case UAC_INPUT_TERMINAL:
 			term->type = combine_word(p1 + 4);
 			term->channels = p1[7];
 			term->chconfig = combine_word(p1 + 8);
 			term->name = p1[11];
 			return 0;
-		case FEATURE_UNIT:
+		case UAC_FEATURE_UNIT:
 			id = p1[4];
 			break; /* continue to parse */
-		case MIXER_UNIT:
+		case UAC_MIXER_UNIT:
 			term->type = p1[2] << 16; /* virtual type */
 			term->channels = p1[5 + p1[4]];
 			term->chconfig = combine_word(p1 + 6 + p1[4]);
 			term->name = p1[p1[0] - 1];
 			return 0;
-		case SELECTOR_UNIT:
+		case UAC_SELECTOR_UNIT:
 			/* call recursively to retrieve the channel info */
 			if (check_input_term(state, p1[5], term) < 0)
 				return -ENODEV;
@@ -673,8 +673,8 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_
 			term->id = id;
 			term->name = p1[9 + p1[0] - 1];
 			return 0;
-		case PROCESSING_UNIT_V1:
-		case EXTENSION_UNIT_V1:
+		case UAC_PROCESSING_UNIT_V1:
+		case UAC_EXTENSION_UNIT_V1:
 			if (p1[6] == 1) {
 				id = p1[7];
 				break; /* continue to parse */
@@ -752,23 +752,23 @@ static int get_min_max(struct usb_mixer_elem_info *cval, int default_min)
 					break;
 				}
 		}
-		if (get_ctl_value(cval, GET_MAX, (cval->control << 8) | minchn, &cval->max) < 0 ||
-		    get_ctl_value(cval, GET_MIN, (cval->control << 8) | minchn, &cval->min) < 0) {
+		if (get_ctl_value(cval, UAC_GET_MAX, (cval->control << 8) | minchn, &cval->max) < 0 ||
+		    get_ctl_value(cval, UAC_GET_MIN, (cval->control << 8) | minchn, &cval->min) < 0) {
 			snd_printd(KERN_ERR "%d:%d: cannot get min/max values for control %d (id %d)\n",
 				   cval->id, cval->mixer->ctrlif, cval->control, cval->id);
 			return -EINVAL;
 		}
-		if (get_ctl_value(cval, GET_RES, (cval->control << 8) | minchn, &cval->res) < 0) {
+		if (get_ctl_value(cval, UAC_GET_RES, (cval->control << 8) | minchn, &cval->res) < 0) {
 			cval->res = 1;
 		} else {
 			int last_valid_res = cval->res;
 
 			while (cval->res > 1) {
-				if (set_ctl_value(cval, SET_RES, (cval->control << 8) | minchn, cval->res / 2) < 0)
+				if (set_ctl_value(cval, UAC_SET_RES, (cval->control << 8) | minchn, cval->res / 2) < 0)
 					break;
 				cval->res /= 2;
 			}
-			if (get_ctl_value(cval, GET_RES, (cval->control << 8) | minchn, &cval->res) < 0)
+			if (get_ctl_value(cval, UAC_GET_RES, (cval->control << 8) | minchn, &cval->res) < 0)
 				cval->res = last_valid_res;
 		}
 		if (cval->res == 0)
@@ -1097,7 +1097,7 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void
 	struct uac_feature_unit_descriptor *ftr = _ftr;
 
 	if (ftr->bLength < 7 || ! (csize = ftr->bControlSize) || ftr->bLength < 7 + csize) {
-		snd_printk(KERN_ERR "usbaudio: unit %u: invalid FEATURE_UNIT descriptor\n", unitid);
+		snd_printk(KERN_ERR "usbaudio: unit %u: invalid UAC_FEATURE_UNIT descriptor\n", unitid);
 		return -EINVAL;
 	}
 
@@ -1739,17 +1739,17 @@ static int parse_audio_unit(struct mixer_build *state, int unitid)
 	}
 
 	switch (p1[2]) {
-	case INPUT_TERMINAL:
+	case UAC_INPUT_TERMINAL:
 		return 0; /* NOP */
-	case MIXER_UNIT:
+	case UAC_MIXER_UNIT:
 		return parse_audio_mixer_unit(state, unitid, p1);
-	case SELECTOR_UNIT:
+	case UAC_SELECTOR_UNIT:
 		return parse_audio_selector_unit(state, unitid, p1);
-	case FEATURE_UNIT:
+	case UAC_FEATURE_UNIT:
 		return parse_audio_feature_unit(state, unitid, p1);
-	case PROCESSING_UNIT_V1:
+	case UAC_PROCESSING_UNIT_V1:
 		return parse_audio_processing_unit(state, unitid, p1);
-	case EXTENSION_UNIT_V1:
+	case UAC_EXTENSION_UNIT_V1:
 		return parse_audio_extension_unit(state, unitid, p1);
 	default:
 		snd_printk(KERN_ERR "usbaudio: unit %u: unexpected type 0x%02x\n", unitid, p1[2]);
@@ -1779,7 +1779,7 @@ static int snd_usb_mixer_dev_free(struct snd_device *device)
 /*
  * create mixer controls
  *
- * walk through all OUTPUT_TERMINAL descriptors to search for mixers
+ * walk through all UAC_OUTPUT_TERMINAL descriptors to search for mixers
  */
 static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 {
@@ -1807,7 +1807,7 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 	}
 
 	desc = NULL;
-	while ((desc = snd_usb_find_csint_desc(hostif->extra, hostif->extralen, desc, OUTPUT_TERMINAL)) != NULL) {
+	while ((desc = snd_usb_find_csint_desc(hostif->extra, hostif->extralen, desc, UAC_OUTPUT_TERMINAL)) != NULL) {
 		if (desc->bLength < 9)
 			continue; /* invalid descriptor? */
 		set_bit(desc->bTerminalID, state.unitbitmap);  /* mark terminal ID as visited */
@@ -2047,7 +2047,7 @@ static int snd_usb_soundblaster_remote_init(struct usb_mixer_interface *mixer)
 	}
 	mixer->rc_setup_packet->bRequestType =
 		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE;
-	mixer->rc_setup_packet->bRequest = GET_MEM;
+	mixer->rc_setup_packet->bRequest = UAC_GET_MEM;
 	mixer->rc_setup_packet->wValue = cpu_to_le16(0);
 	mixer->rc_setup_packet->wIndex = cpu_to_le16(0);
 	mixer->rc_setup_packet->wLength = cpu_to_le16(len);
@@ -2170,7 +2170,7 @@ static void snd_audigy2nx_proc_read(struct snd_info_entry *entry,
 		snd_iprintf(buffer, "%s: ", jacks[i].name);
 		err = snd_usb_ctl_msg(mixer->chip->dev,
 				      usb_rcvctrlpipe(mixer->chip->dev, 0),
-				      GET_MEM, USB_DIR_IN | USB_TYPE_CLASS |
+				      UAC_GET_MEM, USB_DIR_IN | USB_TYPE_CLASS |
 				      USB_RECIP_INTERFACE, 0,
 				      jacks[i].unitid << 8, buf, 3, 100);
 		if (err == 3 && (buf[0] == 3 || buf[0] == 6))
diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h
index fc1d2cd6ccc3..f06faf7917b9 100644
--- a/sound/usb/usbquirks.h
+++ b/sound/usb/usbquirks.h
@@ -91,7 +91,7 @@
 	.idVendor = 0x046d,
 	.idProduct = 0x0850,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL
 },
 {
 	.match_flags = USB_DEVICE_ID_MATCH_DEVICE |
@@ -100,7 +100,7 @@
 	.idVendor = 0x046d,
 	.idProduct = 0x08ae,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL
 },
 {
 	.match_flags = USB_DEVICE_ID_MATCH_DEVICE |
@@ -109,7 +109,7 @@
 	.idVendor = 0x046d,
 	.idProduct = 0x08c6,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL
 },
 {
 	.match_flags = USB_DEVICE_ID_MATCH_DEVICE |
@@ -118,7 +118,7 @@
 	.idVendor = 0x046d,
 	.idProduct = 0x08f0,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL
 },
 {
 	.match_flags = USB_DEVICE_ID_MATCH_DEVICE |
@@ -127,7 +127,7 @@
 	.idVendor = 0x046d,
 	.idProduct = 0x08f5,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL
 },
 {
 	.match_flags = USB_DEVICE_ID_MATCH_DEVICE |
@@ -136,7 +136,7 @@
 	.idVendor = 0x046d,
 	.idProduct = 0x08f6,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL
 },
 {
 	USB_DEVICE(0x046d, 0x0990),
@@ -301,7 +301,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 					.iface = 1,
 					.altsetting = 1,
 					.altset_idx = 1,
-					.attributes = EP_CS_ATTR_FILL_MAX,
+					.attributes = UAC_EP_CS_ATTR_FILL_MAX,
 					.endpoint = 0x81,
 					.ep_attr = 0x05,
 					.rates = SNDRV_PCM_RATE_CONTINUOUS,
@@ -2108,7 +2108,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		       USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL,
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
 	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
 		.vendor_name = "Hauppauge",
 		.product_name = "HVR-950Q",
@@ -2122,7 +2122,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		       USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL,
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
 	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
 		.vendor_name = "Hauppauge",
 		.product_name = "HVR-950Q",
@@ -2136,7 +2136,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		       USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL,
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
 	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
 		.vendor_name = "Hauppauge",
 		.product_name = "HVR-950Q",
@@ -2150,7 +2150,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		       USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL,
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
 	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
 		.vendor_name = "Hauppauge",
 		.product_name = "HVR-950Q",
@@ -2164,7 +2164,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		       USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL,
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
 	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
 		.vendor_name = "Hauppauge",
 		.product_name = "HVR-950Q",
@@ -2178,7 +2178,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		       USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL,
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
 	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
 		.vendor_name = "Hauppauge",
 		.product_name = "HVR-950Q",
@@ -2192,7 +2192,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		       USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL,
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
 	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
 		.vendor_name = "Hauppauge",
 		.product_name = "HVR-950Q",
@@ -2206,7 +2206,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 		       USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL,
+	.bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
 	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
 		.vendor_name = "Hauppauge",
 		.product_name = "HVR-850",
@@ -2238,7 +2238,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 					.iface = 1,
 					.altsetting = 1,
 					.altset_idx = 1,
-					.attributes = EP_CS_ATTR_SAMPLE_RATE,
+					.attributes = UAC_EP_CS_ATTR_SAMPLE_RATE,
 					.endpoint = 0x02,
 					.ep_attr = 0x01,
 					.maxpacksize = 0x130,
@@ -2268,7 +2268,7 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 	.match_flags = USB_DEVICE_ID_MATCH_INT_CLASS |
 		       USB_DEVICE_ID_MATCH_INT_SUBCLASS,
 	.bInterfaceClass = USB_CLASS_AUDIO,
-	.bInterfaceSubClass = USB_SUBCLASS_MIDI_STREAMING,
+	.bInterfaceSubClass = USB_SUBCLASS_MIDISTREAMING,
 	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
 		.ifnum = QUIRK_ANY_INTERFACE,
 		.type = QUIRK_MIDI_STANDARD_INTERFACE
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index 91bb29666d26..44deb21b1777 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -16,6 +16,8 @@
  * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/usb.h>
+#include <linux/usb/audio.h>
 #include <sound/core.h>
 #include <sound/hwdep.h>
 #include <sound/pcm.h>
@@ -315,9 +317,9 @@ static int us122l_set_sample_rate(struct usb_device *dev, int rate)
 	data[0] = rate;
 	data[1] = rate >> 8;
 	data[2] = rate >> 16;
-	err = us122l_ctl_msg(dev, usb_sndctrlpipe(dev, 0), SET_CUR,
+	err = us122l_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR,
 			     USB_TYPE_CLASS|USB_RECIP_ENDPOINT|USB_DIR_OUT,
-			     SAMPLING_FREQ_CONTROL << 8, ep, data, 3, 1000);
+			     UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep, data, 3, 1000);
 	if (err < 0)
 		snd_printk(KERN_ERR "%d: cannot set freq %d to ep 0x%x\n",
 			   dev->devnum, rate, ep);
-- 
cgit v1.2.3


From bddd87c7e622ea681c665049027ed84cdcafcb09 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 23 Feb 2010 08:55:42 +0100
Subject: blk-core: use BIO list management functions

Now that the bio list management stuff is generic, convert
generic_make_request to use bio lists instead of its own private bio
list implementation.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c      | 31 ++++++++++++++-----------------
 include/linux/sched.h |  4 ++--
 2 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 718897e6d37f..44b6d691728d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1490,9 +1490,9 @@ end_io:
 /*
  * We only want one ->make_request_fn to be active at a time,
  * else stack usage with stacked devices could be a problem.
- * So use current->bio_{list,tail} to keep a list of requests
+ * So use current->bio_list to keep a list of requests
  * submited by a make_request_fn function.
- * current->bio_tail is also used as a flag to say if
+ * current->bio_list is also used as a flag to say if
  * generic_make_request is currently active in this task or not.
  * If it is NULL, then no make_request is active.  If it is non-NULL,
  * then a make_request is active, and new requests should be added
@@ -1500,11 +1500,11 @@ end_io:
  */
 void generic_make_request(struct bio *bio)
 {
-	if (current->bio_tail) {
+	struct bio_list bio_list_on_stack;
+
+	if (current->bio_list) {
 		/* make_request is active */
-		*(current->bio_tail) = bio;
-		bio->bi_next = NULL;
-		current->bio_tail = &bio->bi_next;
+		bio_list_add(current->bio_list, bio);
 		return;
 	}
 	/* following loop may be a bit non-obvious, and so deserves some
@@ -1512,30 +1512,27 @@ void generic_make_request(struct bio *bio)
 	 * Before entering the loop, bio->bi_next is NULL (as all callers
 	 * ensure that) so we have a list with a single bio.
 	 * We pretend that we have just taken it off a longer list, so
-	 * we assign bio_list to the next (which is NULL) and bio_tail
-	 * to &bio_list, thus initialising the bio_list of new bios to be
+	 * we assign bio_list to a pointer to the bio_list_on_stack,
+	 * thus initialising the bio_list of new bios to be
 	 * added.  __generic_make_request may indeed add some more bios
 	 * through a recursive call to generic_make_request.  If it
 	 * did, we find a non-NULL value in bio_list and re-enter the loop
 	 * from the top.  In this case we really did just take the bio
-	 * of the top of the list (no pretending) and so fixup bio_list and
-	 * bio_tail or bi_next, and call into __generic_make_request again.
+	 * of the top of the list (no pretending) and so remove it from
+	 * bio_list, and call into __generic_make_request again.
 	 *
 	 * The loop was structured like this to make only one call to
 	 * __generic_make_request (which is important as it is large and
 	 * inlined) and to keep the structure simple.
 	 */
 	BUG_ON(bio->bi_next);
+	bio_list_init(&bio_list_on_stack);
+	current->bio_list = &bio_list_on_stack;
 	do {
-		current->bio_list = bio->bi_next;
-		if (bio->bi_next == NULL)
-			current->bio_tail = &current->bio_list;
-		else
-			bio->bi_next = NULL;
 		__generic_make_request(bio);
-		bio = current->bio_list;
+		bio = bio_list_pop(current->bio_list);
 	} while (bio);
-	current->bio_tail = NULL; /* deactivate */
+	current->bio_list = NULL; /* deactivate */
 }
 EXPORT_SYMBOL(generic_make_request);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78efe7c485ac..7eb82975e1ee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -97,7 +97,7 @@ struct sched_param {
 struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
-struct bio;
+struct bio_list;
 struct fs_struct;
 struct bts_context;
 struct perf_event_context;
@@ -1466,7 +1466,7 @@ struct task_struct {
 	void *journal_info;
 
 /* stacked block device info */
-	struct bio *bio_list, **bio_tail;
+	struct bio_list *bio_list;
 
 /* VM state */
 	struct reclaim_state *reclaim_state;
-- 
cgit v1.2.3


From 89a74ecccd1f78e51faf6287e5c0e93a92ac096e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 23 Feb 2010 10:24:31 -0700
Subject: PCI: add pci_bus_for_each_resource(), remove direct bus->resource[]
 refs

No functional change; this converts loops that iterate from 0 to
PCI_BUS_NUM_RESOURCES through pci_bus resource[] table to use the
pci_bus_for_each_resource() iterator instead.

This doesn't change the way resources are stored; it merely removes
dependencies on the fact that they're in a table.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/ia64/pci/pci.c                  |  5 ++---
 arch/mn10300/unit-asb2305/pci.c      |  6 ++----
 arch/powerpc/kernel/pci-common.c     | 11 ++++-------
 arch/powerpc/platforms/fsl_uli1575.c | 12 ++++++------
 drivers/pci/bus.c                    |  4 ++--
 drivers/pci/hotplug/shpchp_sysfs.c   |  9 +++------
 drivers/pci/pci.c                    |  5 ++---
 drivers/pci/setup-bus.c              | 10 ++++------
 drivers/pcmcia/rsrc_nonstatic.c      |  3 +--
 drivers/pcmcia/yenta_socket.c        |  5 +++--
 include/linux/pci.h                  |  3 +++
 11 files changed, 32 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 783c83bb2b49..89f957ca3eb2 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -452,13 +452,12 @@ EXPORT_SYMBOL(pcibios_bus_to_resource);
 static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
 {
 	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-	struct resource *devr = &dev->resource[idx];
+	struct resource *devr = &dev->resource[idx], *busr;
 
 	if (!dev->bus)
 		return 0;
-	for (i=0; i<PCI_BUS_NUM_RESOURCES; i++) {
-		struct resource *busr = dev->bus->resource[i];
 
+	pci_bus_for_each_resource(dev->bus, busr, i) {
 		if (!busr || ((busr->flags ^ devr->flags) & type_mask))
 			continue;
 		if ((devr->start) && (devr->start >= busr->start) &&
diff --git a/arch/mn10300/unit-asb2305/pci.c b/arch/mn10300/unit-asb2305/pci.c
index 2cb7e75ba1c0..6d8720a0a599 100644
--- a/arch/mn10300/unit-asb2305/pci.c
+++ b/arch/mn10300/unit-asb2305/pci.c
@@ -331,12 +331,10 @@ static int __init pci_check_direct(void)
 static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
 {
 	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
-	struct resource *devr = &dev->resource[idx];
+	struct resource *devr = &dev->resource[idx], *busr;
 
 	if (dev->bus) {
-		for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
-			struct resource *busr = dev->bus->resource[i];
-
+		pci_bus_for_each_resource(dev->bus, busr, i) {
 			if (!busr || (busr->flags ^ devr->flags) & type_mask)
 				continue;
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e640810e813f..2597f9545d8a 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1047,10 +1047,8 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus)
 
 	struct pci_dev *dev = bus->self;
 
-	for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) {
-		if ((res = bus->resource[i]) == NULL)
-			continue;
-		if (!res->flags)
+	pci_bus_for_each_resource(bus, res, i) {
+		if (!res || !res->flags)
 			continue;
 		if (i >= 3 && bus->self->transparent)
 			continue;
@@ -1277,9 +1275,8 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
 	pr_debug("PCI: Allocating bus resources for %04x:%02x...\n",
 		 pci_domain_nr(bus), bus->number);
 
-	for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) {
-		if ((res = bus->resource[i]) == NULL || !res->flags
-		    || res->start > res->end || res->parent)
+	pci_bus_for_each_resource(bus, res, i) {
+		if (!res || !res->flags || res->start > res->end || res->parent)
 			continue;
 		if (bus->parent == NULL)
 			pr = (res->flags & IORESOURCE_IO) ?
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index fd23a1d4b39d..8b0c2082a783 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -222,6 +222,7 @@ static void __devinit quirk_final_uli5249(struct pci_dev *dev)
 	int i;
 	u8 *dummy;
 	struct pci_bus *bus = dev->bus;
+	struct resource *res;
 	resource_size_t end = 0;
 
 	for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCES+3; i++) {
@@ -230,13 +231,12 @@ static void __devinit quirk_final_uli5249(struct pci_dev *dev)
 			end = pci_resource_end(dev, i);
 	}
 
-	for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
-		if ((bus->resource[i]) &&
-			(bus->resource[i]->flags & IORESOURCE_MEM)) {
-			if (bus->resource[i]->end == end)
-				dummy = ioremap(bus->resource[i]->start, 0x4);
+	pci_bus_for_each_resource(bus, res, i) {
+		if (res && res->flags & IORESOURCE_MEM) {
+			if (res->end == end)
+				dummy = ioremap(res->start, 0x4);
 			else
-				dummy = ioremap(bus->resource[i]->end - 3, 0x4);
+				dummy = ioremap(res->end - 3, 0x4);
 			if (dummy) {
 				in_8(dummy);
 				iounmap(dummy);
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index a26135bb0ffd..e75d219fd107 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -43,6 +43,7 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
 		void *alignf_data)
 {
 	int i, ret = -ENOMEM;
+	struct resource *r;
 	resource_size_t max = -1;
 
 	type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
@@ -51,8 +52,7 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
 	if (!(res->flags & IORESOURCE_MEM_64))
 		max = PCIBIOS_MAX_MEM_32;
 
-	for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
-		struct resource *r = bus->resource[i];
+	pci_bus_for_each_resource(bus, r, i) {
 		if (!r)
 			continue;
 
diff --git a/drivers/pci/hotplug/shpchp_sysfs.c b/drivers/pci/hotplug/shpchp_sysfs.c
index 29fa9d26adae..071b7dc0094b 100644
--- a/drivers/pci/hotplug/shpchp_sysfs.c
+++ b/drivers/pci/hotplug/shpchp_sysfs.c
@@ -47,8 +47,7 @@ static ssize_t show_ctrl (struct device *dev, struct device_attribute *attr, cha
 	bus = pdev->subordinate;
 
 	out += sprintf(buf, "Free resources: memory\n");
-	for (index = 0; index < PCI_BUS_NUM_RESOURCES; index++) {
-		res = bus->resource[index];
+	pci_bus_for_each_resource(bus, res, index) {
 		if (res && (res->flags & IORESOURCE_MEM) &&
 				!(res->flags & IORESOURCE_PREFETCH)) {
 			out += sprintf(out, "start = %8.8llx, "
@@ -58,8 +57,7 @@ static ssize_t show_ctrl (struct device *dev, struct device_attribute *attr, cha
 		}
 	}
 	out += sprintf(out, "Free resources: prefetchable memory\n");
-	for (index = 0; index < PCI_BUS_NUM_RESOURCES; index++) {
-		res = bus->resource[index];
+	pci_bus_for_each_resource(bus, res, index) {
 		if (res && (res->flags & IORESOURCE_MEM) &&
 			       (res->flags & IORESOURCE_PREFETCH)) {
 			out += sprintf(out, "start = %8.8llx, "
@@ -69,8 +67,7 @@ static ssize_t show_ctrl (struct device *dev, struct device_attribute *attr, cha
 		}
 	}
 	out += sprintf(out, "Free resources: IO\n");
-	for (index = 0; index < PCI_BUS_NUM_RESOURCES; index++) {
-		res = bus->resource[index];
+	pci_bus_for_each_resource(bus, res, index) {
 		if (res && (res->flags & IORESOURCE_IO)) {
 			out += sprintf(out, "start = %8.8llx, "
 					"length = %8.8llx\n",
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d62a5de81672..f4a2738bf0bf 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -386,10 +386,9 @@ pci_find_parent_resource(const struct pci_dev *dev, struct resource *res)
 {
 	const struct pci_bus *bus = dev->bus;
 	int i;
-	struct resource *best = NULL;
+	struct resource *best = NULL, *r;
 
-	for(i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
-		struct resource *r = bus->resource[i];
+	pci_bus_for_each_resource(bus, r, i) {
 		if (!r)
 			continue;
 		if (res->start && !(res->start >= r->start && res->end <= r->end))
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 743ed8c48b9c..bf32f07c4efb 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -387,8 +387,7 @@ static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned lon
 	unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
 				  IORESOURCE_PREFETCH;
 
-	for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
-		r = bus->resource[i];
+	pci_bus_for_each_resource(bus, r, i) {
 		if (r == &ioport_resource || r == &iomem_resource)
 			continue;
 		if (r && (r->flags & type_mask) == type && !r->parent)
@@ -803,11 +802,10 @@ static void __ref pci_bus_release_bridge_resources(struct pci_bus *bus,
 
 static void pci_bus_dump_res(struct pci_bus *bus)
 {
-        int i;
-
-        for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
-                struct resource *res = bus->resource[i];
+	struct resource *res;
+	int i;
 
+	pci_bus_for_each_resource(bus, res, i) {
 		if (!res || !res->end || !res->flags)
                         continue;
 
diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c
index 45d75dc452f0..c67638fe6914 100644
--- a/drivers/pcmcia/rsrc_nonstatic.c
+++ b/drivers/pcmcia/rsrc_nonstatic.c
@@ -803,8 +803,7 @@ static int nonstatic_autoadd_resources(struct pcmcia_socket *s)
 		return -EINVAL;
 #endif
 
-	for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
-		res = s->cb_dev->bus->resource[i];
+	pci_bus_for_each_resource(s->cb_dev->bus, res, i) {
 		if (!res)
 			continue;
 
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index e4d12acdd525..1f2039d5e966 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -649,9 +649,10 @@ static int yenta_search_one_res(struct resource *root, struct resource *res,
 static int yenta_search_res(struct yenta_socket *socket, struct resource *res,
 			    u32 min)
 {
+	struct resource *root;
 	int i;
-	for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
-		struct resource *root = socket->dev->bus->resource[i];
+
+	pci_bus_for_each_resource(socket->dev->bus, root, i) {
 		if (!root)
 			continue;
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1a29f9f6b2dc..2ff9d26a078f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -829,6 +829,9 @@ int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
+#define pci_bus_for_each_resource(bus, res, i)				\
+	for (i = 0; res = bus->resource[i], i < PCI_BUS_NUM_RESOURCES; i++)
+
 int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 			struct resource *res, resource_size_t size,
 			resource_size_t align, resource_size_t min,
-- 
cgit v1.2.3


From 2fe2abf896c1e7a0ee65faaf3ef0ce654848abbd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 23 Feb 2010 10:24:36 -0700
Subject: PCI: augment bus resource table with a list

Previously we used a table of size PCI_BUS_NUM_RESOURCES (16) for resources
forwarded to a bus by its upstream bridge.  We've increased this size
several times when the table overflowed.

But there's no good limit on the number of resources because host bridges
and subtractive decode bridges can forward any number of ranges to their
secondary buses.

This patch reduces the table to only PCI_BRIDGE_RESOURCE_NUM (4) entries,
which corresponds to the number of windows a PCI-to-PCI (3) or CardBus (4)
bridge can positively decode.  Any additional resources, e.g., PCI host
bridge windows or subtractively-decoded regions, are kept in a list.

I'd prefer a single list rather than this split table/list approach, but
that requires simultaneous changes to every architecture.  This approach
only requires immediate changes where we set up (a) host bridges with more
than four windows and (b) subtractive-decode P2P bridges, and we can
incrementally change other architectures to use the list.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/ia64/pci/pci.c     | 12 +++---------
 arch/x86/pci/acpi.c     | 33 ++++-----------------------------
 arch/x86/pci/bus_numa.c |  3 ++-
 arch/x86/pci/bus_numa.h |  3 +--
 drivers/pci/bus.c       | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/probe.c     | 17 +++++++++++++----
 include/linux/pci.h     | 35 +++++++++++++++++++++++++++++------
 7 files changed, 98 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 89f957ca3eb2..64aff520b899 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -320,9 +320,9 @@ static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
 static void __devinit
 pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
 {
-	int i, j;
+	int i;
 
-	j = 0;
+	pci_bus_remove_resources(bus);
 	for (i = 0; i < ctrl->windows; i++) {
 		struct resource *res = &ctrl->window[i].resource;
 		/* HP's firmware has a hack to work around a Windows bug.
@@ -330,13 +330,7 @@ pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
 		if ((res->flags & IORESOURCE_MEM) &&
 		    (res->end - res->start < 16))
 			continue;
-		if (j >= PCI_BUS_NUM_RESOURCES) {
-			dev_warn(&bus->dev,
-				 "ignoring host bridge window %pR (no space)\n",
-				 res);
-			continue;
-		}
-		bus->resource[j++] = res;
+		pci_bus_add_resource(bus, res, 0);
 	}
 }
 
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 959e548a7039..a2f8cdb8c1d5 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -45,20 +45,6 @@ count_resource(struct acpi_resource *acpi_res, void *data)
 	return AE_OK;
 }
 
-static int
-bus_has_transparent_bridge(struct pci_bus *bus)
-{
-	struct pci_dev *dev;
-
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		u16 class = dev->class >> 8;
-
-		if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent)
-			return true;
-	}
-	return false;
-}
-
 static void
 align_resource(struct acpi_device *bridge, struct resource *res)
 {
@@ -92,12 +78,8 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
 	acpi_status status;
 	unsigned long flags;
 	struct resource *root;
-	int max_root_bus_resources = PCI_BUS_NUM_RESOURCES;
 	u64 start, end;
 
-	if (bus_has_transparent_bridge(info->bus))
-		max_root_bus_resources -= 3;
-
 	status = resource_to_addr(acpi_res, &addr);
 	if (!ACPI_SUCCESS(status))
 		return AE_OK;
@@ -115,15 +97,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
 
 	start = addr.minimum + addr.translation_offset;
 	end = start + addr.address_length - 1;
-	if (info->res_num >= max_root_bus_resources) {
-		if (pci_probe & PCI_USE__CRS)
-			printk(KERN_WARNING "PCI: Failed to allocate "
-			       "0x%lx-0x%lx from %s for %s due to _CRS "
-			       "returning more than %d resource descriptors\n",
-			       (unsigned long) start, (unsigned long) end,
-			       root->name, info->name, max_root_bus_resources);
-		return AE_OK;
-	}
 
 	res = &info->res[info->res_num];
 	res->name = info->name;
@@ -143,7 +116,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
 		dev_err(&info->bridge->dev,
 			"can't allocate host bridge window %pR\n", res);
 	} else {
-		info->bus->resource[info->res_num] = res;
+		pci_bus_add_resource(info->bus, res, 0);
 		info->res_num++;
 		if (addr.translation_offset)
 			dev_info(&info->bridge->dev, "host bridge window %pR "
@@ -164,7 +137,9 @@ get_current_resources(struct acpi_device *device, int busnum,
 	struct pci_root_info info;
 	size_t size;
 
-	if (!(pci_probe & PCI_USE__CRS))
+	if (pci_probe & PCI_USE__CRS)
+		pci_bus_remove_resources(bus);
+	else
 		dev_info(&device->dev,
 			 "ignoring host bridge windows from ACPI; "
 			 "boot with \"pci=use_crs\" to use them\n");
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index f939d603adfa..12d54ff3654d 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -36,13 +36,14 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
 	printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n",
 			b->number);
 
+	pci_bus_remove_resources(b);
 	info = &pci_root_info[i];
 	for (j = 0; j < info->res_num; j++) {
 		struct resource *res;
 		struct resource *root;
 
 		res = &info->res[j];
-		b->resource[j] = res;
+		pci_bus_add_resource(b, res, 0);
 		if (res->flags & IORESOURCE_IO)
 			root = &ioport_resource;
 		else
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
index adbc23fe82ac..731b64ee8d84 100644
--- a/arch/x86/pci/bus_numa.h
+++ b/arch/x86/pci/bus_numa.h
@@ -2,8 +2,7 @@
 
 /*
  * sub bus (transparent) will use entres from 3 to store extra from
- * root, so need to make sure we have enough slot there, Should we
- * increase PCI_BUS_NUM_RESOURCES?
+ * root, so need to make sure we have enough slot there.
  */
 #define RES_NUM 16
 struct pci_root_info {
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index e75d219fd107..712250f5874a 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -17,6 +17,52 @@
 
 #include "pci.h"
 
+void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
+			  unsigned int flags)
+{
+	struct pci_bus_resource *bus_res;
+
+	bus_res = kzalloc(sizeof(struct pci_bus_resource), GFP_KERNEL);
+	if (!bus_res) {
+		dev_err(&bus->dev, "can't add %pR resource\n", res);
+		return;
+	}
+
+	bus_res->res = res;
+	bus_res->flags = flags;
+	list_add_tail(&bus_res->list, &bus->resources);
+}
+
+struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n)
+{
+	struct pci_bus_resource *bus_res;
+
+	if (n < PCI_BRIDGE_RESOURCE_NUM)
+		return bus->resource[n];
+
+	n -= PCI_BRIDGE_RESOURCE_NUM;
+	list_for_each_entry(bus_res, &bus->resources, list) {
+		if (n-- == 0)
+			return bus_res->res;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_bus_resource_n);
+
+void pci_bus_remove_resources(struct pci_bus *bus)
+{
+	struct pci_bus_resource *bus_res, *tmp;
+	int i;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+		bus->resource[i] = 0;
+
+	list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
+		list_del(&bus_res->list);
+		kfree(bus_res);
+	}
+}
+
 /**
  * pci_bus_alloc_resource - allocate a resource from a parent bus
  * @bus: PCI bus
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 70c4ed2e67cc..270d069819f7 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -89,6 +89,7 @@ static void release_pcibus_dev(struct device *dev)
 
 	if (pci_bus->bridge)
 		put_device(pci_bus->bridge);
+	pci_bus_remove_resources(pci_bus);
 	kfree(pci_bus);
 }
 
@@ -394,6 +395,7 @@ static void __devinit pci_read_bridge_mmio_pref(struct pci_bus *child)
 void __devinit pci_read_bridge_bases(struct pci_bus *child)
 {
 	struct pci_dev *dev = child->self;
+	struct resource *res;
 	int i;
 
 	if (pci_is_root_bus(child))	/* It's a host bus, nothing to read */
@@ -403,17 +405,23 @@ void __devinit pci_read_bridge_bases(struct pci_bus *child)
 		 child->secondary, child->subordinate,
 		 dev->transparent ? " (subtractive decode)" : "");
 
+	pci_bus_remove_resources(child);
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+		child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
+
 	pci_read_bridge_io(child);
 	pci_read_bridge_mmio(child);
 	pci_read_bridge_mmio_pref(child);
 
 	if (dev->transparent) {
-		for (i = 3; i < PCI_BUS_NUM_RESOURCES; i++) {
-			child->resource[i] = child->parent->resource[i - 3];
-			if (child->resource[i])
+		pci_bus_for_each_resource(child->parent, res, i) {
+			if (res) {
+				pci_bus_add_resource(child, res,
+						     PCI_SUBTRACTIVE_DECODE);
 				dev_printk(KERN_DEBUG, &dev->dev,
 					   "  bridge window %pR (subtractive decode)\n",
-					   child->resource[i]);
+					   res);
+			}
 		}
 	}
 }
@@ -428,6 +436,7 @@ static struct pci_bus * pci_alloc_bus(void)
 		INIT_LIST_HEAD(&b->children);
 		INIT_LIST_HEAD(&b->devices);
 		INIT_LIST_HEAD(&b->slots);
+		INIT_LIST_HEAD(&b->resources);
 		b->max_bus_speed = PCI_SPEED_UNKNOWN;
 		b->cur_bus_speed = PCI_SPEED_UNKNOWN;
 	}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2ff9d26a078f..e19a69613d8f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -364,9 +364,26 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev,
 	hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space);
 }
 
-#ifndef PCI_BUS_NUM_RESOURCES
-#define PCI_BUS_NUM_RESOURCES	16
-#endif
+/*
+ * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond
+ * to P2P or CardBus bridge windows) go in a table.  Additional ones (for
+ * buses below host bridges or subtractive decode bridges) go in the list.
+ * Use pci_bus_for_each_resource() to iterate through all the resources.
+ */
+
+/*
+ * PCI_SUBTRACTIVE_DECODE means the bridge forwards the window implicitly
+ * and there's no way to program the bridge with the details of the window.
+ * This does not apply to ACPI _CRS windows, even with the _DEC subtractive-
+ * decode bit set, because they are explicit and can be programmed with _SRS.
+ */
+#define PCI_SUBTRACTIVE_DECODE	0x1
+
+struct pci_bus_resource {
+	struct list_head list;
+	struct resource *res;
+	unsigned int flags;
+};
 
 #define PCI_REGION_FLAG_MASK	0x0fU	/* These bits of resource flags tell us the PCI region flags */
 
@@ -377,8 +394,8 @@ struct pci_bus {
 	struct list_head devices;	/* list of devices on this bus */
 	struct pci_dev	*self;		/* bridge device as seen by parent */
 	struct list_head slots;		/* list of slots on this bus */
-	struct resource	*resource[PCI_BUS_NUM_RESOURCES];
-					/* address space routed to this bus */
+	struct resource *resource[PCI_BRIDGE_RESOURCE_NUM];
+	struct list_head resources;	/* address space routed to this bus */
 
 	struct pci_ops	*ops;		/* configuration access functions */
 	void		*sysdata;	/* hook for sys-specific extension */
@@ -829,8 +846,14 @@ int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
+void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, unsigned int flags);
+struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
+void pci_bus_remove_resources(struct pci_bus *bus);
+
 #define pci_bus_for_each_resource(bus, res, i)				\
-	for (i = 0; res = bus->resource[i], i < PCI_BUS_NUM_RESOURCES; i++)
+	for (i = 0;							\
+	    (res = pci_bus_resource_n(bus, i)) || i < PCI_BRIDGE_RESOURCE_NUM; \
+	     i++)
 
 int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 			struct resource *res, resource_size_t size,
-- 
cgit v1.2.3


From 189b3b1c89761054fee3438f063d7f257306e2d8 Mon Sep 17 00:00:00 2001
From: "wzt.wzt@gmail.com" <wzt.wzt@gmail.com>
Date: Tue, 23 Feb 2010 23:15:28 +0800
Subject: Security: add static to security_ops and default_security_ops
 variable

Enhance the security framework to support resetting the active security
module. This eliminates the need for direct use of the security_ops and
default_security_ops variables outside of security.c, so make security_ops
and default_security_ops static. Also remove the secondary_ops variable as
a cleanup since there is no use for that. secondary_ops was originally used by
SELinux to call the "secondary" security module (capability or dummy),
but that was replaced by direct calls to capability and the only
remaining use is to save and restore the original security ops pointer
value if SELinux is disabled by early userspace based on /etc/selinux/config.
Further, if we support this directly in the security framework, then we can
just use &default_security_ops for this purpose since that is now available.

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h |  2 ++
 security/capability.c    |  4 ----
 security/security.c      | 11 +++++++++--
 security/selinux/hooks.c | 13 +------------
 4 files changed, 12 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index a4dc74d86ac6..233d20b52c1b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,6 +95,8 @@ struct seq_file;
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
+void reset_security_ops(void);
+
 #ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
diff --git a/security/capability.c b/security/capability.c
index 5c700e1a4fd3..4875142b858d 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -906,10 +906,6 @@ static void cap_audit_rule_free(void *lsmrule)
 }
 #endif /* CONFIG_AUDIT */
 
-struct security_operations default_security_ops = {
-	.name	= "default",
-};
-
 #define set_to_cap_if_null(ops, function)				\
 	do {								\
 		if (!ops->function) {					\
diff --git a/security/security.c b/security/security.c
index 971092c06f31..edae56b78771 100644
--- a/security/security.c
+++ b/security/security.c
@@ -23,10 +23,12 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] =
 	CONFIG_DEFAULT_SECURITY;
 
 /* things that live in capability.c */
-extern struct security_operations default_security_ops;
 extern void security_fixup_ops(struct security_operations *ops);
 
-struct security_operations *security_ops;	/* Initialized to NULL */
+static struct security_operations *security_ops;
+static struct security_operations default_security_ops = {
+	.name	= "default",
+};
 
 static inline int verify(struct security_operations *ops)
 {
@@ -63,6 +65,11 @@ int __init security_init(void)
 	return 0;
 }
 
+void reset_security_ops(void)
+{
+	security_ops = &default_security_ops;
+}
+
 /* Save user chosen LSM */
 static int __init choose_lsm(char *str)
 {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6b36ce2eef2e..dc7660074b99 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -126,13 +126,6 @@ __setup("selinux=", selinux_enabled_setup);
 int selinux_enabled = 1;
 #endif
 
-
-/*
- * Minimal support for a secondary security module,
- * just to allow the use of the capability module.
- */
-static struct security_operations *secondary_ops;
-
 /* Lists of inode and superblock security structures initialized
    before the policy was loaded. */
 static LIST_HEAD(superblock_security_head);
@@ -5674,9 +5667,6 @@ static __init int selinux_init(void)
 					    0, SLAB_PANIC, NULL);
 	avc_init();
 
-	secondary_ops = security_ops;
-	if (!secondary_ops)
-		panic("SELinux: No initial security operations\n");
 	if (register_security(&selinux_ops))
 		panic("SELinux: Unable to register with kernel.\n");
 
@@ -5837,8 +5827,7 @@ int selinux_disable(void)
 	selinux_disabled = 1;
 	selinux_enabled = 0;
 
-	/* Reset security_ops to the secondary module, dummy or capability. */
-	security_ops = secondary_ops;
+	reset_security_ops();
 
 	/* Try to destroy the avc node cache */
 	avc_disable();
-- 
cgit v1.2.3


From c6a0dd7ec6fb2d4927979ed4dc562fc5c122d826 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 22 Feb 2010 14:51:32 -0800
Subject: ptrace: Fix ptrace_regset() comments and diagnose errors specifically

Return -EINVAL for the bad size and for unrecognized NT_* type in
ptrace_regset() instead of -EIO.

Also update the comments for this ptrace interface with more clarifications.

Requested-by: Roland McGrath <roland@redhat.com>
Requested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <20100222225240.397523600@sbs-t61.sc.intel.com>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/ptrace.h | 5 +++++
 kernel/ptrace.c        | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index dbfa821d5a6e..c5eab89da51e 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -30,6 +30,11 @@
 /*
  * Generic ptrace interface that exports the architecture specific regsets
  * using the corresponding NT_* types (which are also used in the core dump).
+ * Please note that the NT_PRSTATUS note type in a core dump contains a full
+ * 'struct elf_prstatus'. But the user_regset for NT_PRSTATUS contains just the
+ * elf_gregset_t that is the pr_reg field of 'struct elf_prstatus'. For all the
+ * other user_regset flavors, the user_regset layout and the ELF core dump note
+ * payload are exactly the same layout.
  *
  * This interface usage is as follows:
  *	struct iovec iov = { buf, len};
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 13b4554d8fbb..42ad8ae729a0 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -537,7 +537,7 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
 	int regset_no;
 
 	if (!regset || (kiov->iov_len % regset->size) != 0)
-		return -EIO;
+		return -EINVAL;
 
 	regset_no = regset - view->regsets;
 	kiov->iov_len = min(kiov->iov_len,
-- 
cgit v1.2.3


From 9564e138b1f6eb137f7149772438d3f3fb3277dd Mon Sep 17 00:00:00 2001
From: Adam Litke <agl@us.ibm.com>
Date: Mon, 30 Nov 2009 10:14:15 -0600
Subject: virtio: Add memory statistics reporting to the balloon driver (V4)

Changes since V3:
 - Do not do endian conversions as they will be done in the host
 - Report stats that reference a quantity of memory in bytes
 - Minor coding style updates

Changes since V2:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat and fix endianness conversion

Changes since V1:
 - Use a virtqueue instead of the device config space

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch enables the guest-side support by adding stats collection and
reporting to the virtio balloon driver.

Signed-off-by: Adam Litke <agl@us.ibm.com>
Cc: Anthony Liguori <anthony@codemonkey.ws>
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (minor fixes)
---
 drivers/virtio/virtio_balloon.c | 94 +++++++++++++++++++++++++++++++++++++----
 include/linux/virtio_balloon.h  | 15 +++++++
 2 files changed, 101 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 505be88c82ae..cd778b1752b5 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -28,7 +28,7 @@
 struct virtio_balloon
 {
 	struct virtio_device *vdev;
-	struct virtqueue *inflate_vq, *deflate_vq;
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
 
 	/* Where the ballooning thread waits for config to change. */
 	wait_queue_head_t config_change;
@@ -49,6 +49,9 @@ struct virtio_balloon
 	/* The array of pfns we tell the Host about. */
 	unsigned int num_pfns;
 	u32 pfns[256];
+
+	/* Memory statistics */
+	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 };
 
 static struct virtio_device_id id_table[] = {
@@ -154,6 +157,62 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 	}
 }
 
+static inline void update_stat(struct virtio_balloon *vb, int idx,
+			       u16 tag, u64 val)
+{
+	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
+	vb->stats[idx].tag = tag;
+	vb->stats[idx].val = val;
+}
+
+#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
+
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+	unsigned long events[NR_VM_EVENT_ITEMS];
+	struct sysinfo i;
+	int idx = 0;
+
+	all_vm_events(events);
+	si_meminfo(&i);
+
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
+				pages_to_bytes(events[PSWPIN]));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
+				pages_to_bytes(events[PSWPOUT]));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
+				pages_to_bytes(i.freeram));
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
+				pages_to_bytes(i.totalram));
+}
+
+/*
+ * While most virtqueues communicate guest-initiated requests to the hypervisor,
+ * the stats queue operates in reverse.  The driver initializes the virtqueue
+ * with a single buffer.  From that point forward, all conversations consist of
+ * a hypervisor request (a call to this function) which directs us to refill
+ * the virtqueue with a fresh stats buffer.
+ */
+static void stats_ack(struct virtqueue *vq)
+{
+	struct virtio_balloon *vb;
+	unsigned int len;
+	struct scatterlist sg;
+
+	vb = vq->vq_ops->get_buf(vq, &len);
+	if (!vb)
+		return;
+
+	update_balloon_stats(vb);
+
+	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
+		BUG();
+	vq->vq_ops->kick(vq);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb = vdev->priv;
@@ -204,10 +263,10 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
-	struct virtqueue *vqs[2];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
-	const char *names[] = { "inflate", "deflate" };
-	int err;
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
+	const char *names[] = { "inflate", "deflate", "stats" };
+	int err, nvqs;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
 	if (!vb) {
@@ -220,13 +279,29 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	init_waitqueue_head(&vb->config_change);
 	vb->vdev = vdev;
 
-	/* We expect two virtqueues. */
-	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
+	/* We expect two virtqueues: inflate and deflate,
+	 * and optionally stat. */
+	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
+	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
 	if (err)
 		goto out_free_vb;
 
 	vb->inflate_vq = vqs[0];
 	vb->deflate_vq = vqs[1];
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		struct scatterlist sg;
+		vb->stats_vq = vqs[2];
+
+		/*
+		 * Prime this virtqueue with one buffer so the hypervisor can
+		 * use it to signal us later.
+		 */
+		sg_init_one(&sg, vb->stats, sizeof vb->stats);
+		if (vb->stats_vq->vq_ops->add_buf(vb->stats_vq,
+						  &sg, 1, 0, vb) < 0)
+			BUG();
+		vb->stats_vq->vq_ops->kick(vb->stats_vq);
+	}
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
@@ -264,7 +339,10 @@ static void __devexit virtballoon_remove(struct virtio_device *vdev)
 	kfree(vb);
 }
 
-static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
+static unsigned int features[] = {
+	VIRTIO_BALLOON_F_MUST_TELL_HOST,
+	VIRTIO_BALLOON_F_STATS_VQ,
+};
 
 static struct virtio_driver virtio_balloon_driver = {
 	.feature_table = features,
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 1418f048cb34..a50ecd1b81a2 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -7,6 +7,7 @@
 
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
+#define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -18,4 +19,18 @@ struct virtio_balloon_config
 	/* Number of pages we've actually got in balloon. */
 	__le32 actual;
 };
+
+#define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
+#define VIRTIO_BALLOON_S_SWAP_OUT 1   /* Amount of memory swapped out */
+#define VIRTIO_BALLOON_S_MAJFLT   2   /* Number of major faults */
+#define VIRTIO_BALLOON_S_MINFLT   3   /* Number of minor faults */
+#define VIRTIO_BALLOON_S_MEMFREE  4   /* Total amount of free memory */
+#define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
+#define VIRTIO_BALLOON_S_NR       6
+
+struct virtio_balloon_stat {
+	u16 tag;
+	u64 val;
+} __attribute__((packed));
+
 #endif /* _LINUX_VIRTIO_BALLOON_H */
-- 
cgit v1.2.3


From 69740c8ba878f58bc3c71f74618fc2cd1da990da Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Feb 2010 14:22:25 -0600
Subject: virtio_blk: add block topology support

Allow reading various alignment values from the config page.  This
allows the guest to much better align I/O requests depending on the
storage topology.

Note that the formats for the config values appear a bit messed up,
but we follow the formats used by ATA and SCSI so they are expected in
the storage world.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 61 ++++++++++++++++++++++++++++++++++------------
 include/linux/virtio_blk.h | 13 ++++++++++
 2 files changed, 59 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 51042f0ba7e1..7eff828b2117 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -243,10 +243,12 @@ static int index_to_minor(int index)
 static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
+	struct request_queue *q;
 	int err;
 	u64 cap;
-	u32 v;
-	u32 blk_size, sg_elems;
+	u32 v, blk_size, sg_elems, opt_io_size;
+	u16 min_io_size;
+	u8 physical_block_exp, alignment_offset;
 
 	if (index_to_minor(index) >= 1 << MINORBITS)
 		return -ENOSPC;
@@ -293,13 +295,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 		goto out_mempool;
 	}
 
-	vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
-	if (!vblk->disk->queue) {
+	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
+	if (!q) {
 		err = -ENOMEM;
 		goto out_put_disk;
 	}
 
-	vblk->disk->queue->queuedata = vblk;
+	q->queuedata = vblk;
 
 	if (index < 26) {
 		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
@@ -323,10 +325,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 
 	/* If barriers are supported, tell block layer that queue is ordered */
 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
-		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH,
+		blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH,
 				  virtblk_prepare_flush);
 	else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
-		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
+		blk_queue_ordered(q, QUEUE_ORDERED_TAG, NULL);
 
 	/* If disk is read-only in the host, the guest should obey */
 	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
@@ -345,14 +347,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	set_capacity(vblk->disk, cap);
 
 	/* We can handle whatever the host told us to handle. */
-	blk_queue_max_phys_segments(vblk->disk->queue, vblk->sg_elems-2);
-	blk_queue_max_hw_segments(vblk->disk->queue, vblk->sg_elems-2);
+	blk_queue_max_phys_segments(q, vblk->sg_elems-2);
+	blk_queue_max_hw_segments(q, vblk->sg_elems-2);
 
 	/* No need to bounce any requests */
-	blk_queue_bounce_limit(vblk->disk->queue, BLK_BOUNCE_ANY);
+	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 
 	/* No real sector limit. */
-	blk_queue_max_sectors(vblk->disk->queue, -1U);
+	blk_queue_max_sectors(q, -1U);
 
 	/* Host can optionally specify maximum segment size and number of
 	 * segments. */
@@ -360,16 +362,45 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 				offsetof(struct virtio_blk_config, size_max),
 				&v);
 	if (!err)
-		blk_queue_max_segment_size(vblk->disk->queue, v);
+		blk_queue_max_segment_size(q, v);
 	else
-		blk_queue_max_segment_size(vblk->disk->queue, -1U);
+		blk_queue_max_segment_size(q, -1U);
 
 	/* Host can optionally specify the block size of the device */
 	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
 				offsetof(struct virtio_blk_config, blk_size),
 				&blk_size);
 	if (!err)
-		blk_queue_logical_block_size(vblk->disk->queue, blk_size);
+		blk_queue_logical_block_size(q, blk_size);
+	else
+		blk_size = queue_logical_block_size(q);
+
+	/* Use topology information if available */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, physical_block_exp),
+			&physical_block_exp);
+	if (!err && physical_block_exp)
+		blk_queue_physical_block_size(q,
+				blk_size * (1 << physical_block_exp));
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, alignment_offset),
+			&alignment_offset);
+	if (!err && alignment_offset)
+		blk_queue_alignment_offset(q, blk_size * alignment_offset);
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, min_io_size),
+			&min_io_size);
+	if (!err && min_io_size)
+		blk_queue_io_min(q, blk_size * min_io_size);
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, opt_io_size),
+			&opt_io_size);
+	if (!err && opt_io_size)
+		blk_queue_io_opt(q, blk_size * opt_io_size);
+
 
 	add_disk(vblk->disk);
 	return 0;
@@ -412,7 +443,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
 };
 
 /*
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index fd294c56d571..e52029e98919 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -15,6 +15,7 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
 #define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
+#define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 
 struct virtio_blk_config {
 	/* The capacity (in 512-byte sectors). */
@@ -29,8 +30,20 @@ struct virtio_blk_config {
 		__u8 heads;
 		__u8 sectors;
 	} geometry;
+
 	/* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
 	__u32 blk_size;
+
+	/* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY  */
+	/* exponent for physical block per logical block. */
+	__u8 physical_block_exp;
+	/* alignment offset in logical blocks. */
+	__u8 alignment_offset;
+	/* minimum I/O size without performance penalty in logical blocks. */
+	__u16 min_io_size;
+	/* optimal sustained I/O size in logical blocks. */
+	__u32 opt_io_size;
+
 } __attribute__((packed));
 
 /*
-- 
cgit v1.2.3


From c021eac4148c16bf53baa0dd14e8ebee6f39dab5 Mon Sep 17 00:00:00 2001
From: Shirley Ma <mashirle@us.ibm.com>
Date: Mon, 18 Jan 2010 19:15:23 +0530
Subject: virtio: Add ability to detach unused buffers from vrings

There's currently no way for a virtio driver to ask for unused
buffers, so it has to keep a list itself to reclaim them at shutdown.
This is redundant, since virtio_ring stores that information.  So
add a new hook to do this.

Signed-off-by: Shirley Ma <xma@us.ibm.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++
 include/linux/virtio.h       |  4 ++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 827f7e042610..782b7292a3d8 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -351,6 +351,30 @@ static bool vring_enable_cb(struct virtqueue *_vq)
 	return true;
 }
 
+static void *vring_detach_unused_buf(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->vring.num; i++) {
+		if (!vq->data[i])
+			continue;
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->data[i];
+		detach_buf(vq, i);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->num_free != vq->vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
 irqreturn_t vring_interrupt(int irq, void *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -377,6 +401,7 @@ static struct virtqueue_ops vring_vq_ops = {
 	.kick = vring_kick,
 	.disable_cb = vring_disable_cb,
 	.enable_cb = vring_enable_cb,
+	.detach_unused_buf = vring_detach_unused_buf,
 };
 
 struct virtqueue *vring_new_virtqueue(unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 057a2e010758..f508c651e53d 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -51,6 +51,9 @@ struct virtqueue {
  *	This re-enables callbacks; it returns "false" if there are pending
  *	buffers in the queue, to detect a possible race between the driver
  *	checking for more work, and enabling callbacks.
+ * @detach_unused_buf: detach first unused buffer
+ * 	vq: the struct virtqueue we're talking about.
+ * 	Returns NULL or the "data" token handed to add_buf
  *
  * Locking rules are straightforward: the driver is responsible for
  * locking.  No two operations may be invoked simultaneously, with the exception
@@ -71,6 +74,7 @@ struct virtqueue_ops {
 
 	void (*disable_cb)(struct virtqueue *vq);
 	bool (*enable_cb)(struct virtqueue *vq);
+	void *(*detach_unused_buf)(struct virtqueue *vq);
 };
 
 /**
-- 
cgit v1.2.3


From a23ea92474e558b071d3e43d961ec767c31faebd Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 18 Jan 2010 19:14:55 +0530
Subject: virtio: console: comment cleanup

Remove old lguest-style comments.

[Amit: - wingify comments acc. to kernel style
       - indent comments ]

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/virtio_console.c  | 108 ++++++++++++++++++++---------------------
 include/linux/virtio_console.h |   6 ++-
 2 files changed, 58 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index a035ae39a359..26e238cd7d2f 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1,18 +1,5 @@
-/*D:300
- * The Guest console driver
- *
- * Writing console drivers is one of the few remaining Dark Arts in Linux.
- * Fortunately for us, the path of virtual consoles has been well-trodden by
- * the PowerPC folks, who wrote "hvc_console.c" to generically support any
- * virtual console.  We use that infrastructure which only requires us to write
- * the basic put_chars and get_chars functions and call the right register
- * functions.
- :*/
-
-/*M:002 The console can be flooded: while the Guest is processing input the
- * Host can send more.  Buffering in the Host could alleviate this, but it is a
- * difficult problem in general. :*/
-/* Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
+/*
+ * Copyright (C) 2006, 2007, 2009 Rusty Russell, IBM Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,8 +21,6 @@
 #include <linux/virtio_console.h>
 #include "hvc_console.h"
 
-/*D:340 These represent our input and output console queues, and the virtio
- * operations for them. */
 static struct virtqueue *in_vq, *out_vq;
 static struct virtio_device *vdev;
 
@@ -49,12 +34,14 @@ static struct hv_ops virtio_cons;
 /* The hvc device */
 static struct hvc_struct *hvc;
 
-/*D:310 The put_chars() callback is pretty straightforward.
+/*
+ * The put_chars() callback is pretty straightforward.
  *
- * We turn the characters into a scatter-gather list, add it to the output
- * queue and then kick the Host.  Then we sit here waiting for it to finish:
- * inefficient in theory, but in practice implementations will do it
- * immediately (lguest's Launcher does). */
+ * We turn the characters into a scatter-gather list, add it to the
+ * output queue and then kick the Host.  Then we sit here waiting for
+ * it to finish: inefficient in theory, but in practice
+ * implementations will do it immediately (lguest's Launcher does).
+ */
 static int put_chars(u32 vtermno, const char *buf, int count)
 {
 	struct scatterlist sg[1];
@@ -63,8 +50,10 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 	/* This is a convenient routine to initialize a single-elem sg list */
 	sg_init_one(sg, buf, count);
 
-	/* add_buf wants a token to identify this buffer: we hand it any
-	 * non-NULL pointer, since there's only ever one buffer. */
+	/*
+	 * add_buf wants a token to identify this buffer: we hand it
+	 * any non-NULL pointer, since there's only ever one buffer.
+	 */
 	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) {
 		/* Tell Host to go! */
 		out_vq->vq_ops->kick(out_vq);
@@ -77,8 +66,10 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 	return count;
 }
 
-/* Create a scatter-gather list representing our input buffer and put it in the
- * queue. */
+/*
+ * Create a scatter-gather list representing our input buffer and put
+ * it in the queue.
+ */
 static void add_inbuf(void)
 {
 	struct scatterlist sg[1];
@@ -90,12 +81,14 @@ static void add_inbuf(void)
 	in_vq->vq_ops->kick(in_vq);
 }
 
-/*D:350 get_chars() is the callback from the hvc_console infrastructure when
- * an interrupt is received.
+/*
+ * get_chars() is the callback from the hvc_console infrastructure
+ * when an interrupt is received.
  *
- * Most of the code deals with the fact that the hvc_console() infrastructure
- * only asks us for 16 bytes at a time.  We keep in_offset and in_used fields
- * for partially-filled buffers. */
+ * Most of the code deals with the fact that the hvc_console()
+ * infrastructure only asks us for 16 bytes at a time.  We keep
+ * in_offset and in_used fields for partially-filled buffers.
+ */
 static int get_chars(u32 vtermno, char *buf, int count)
 {
 	/* If we don't have an input queue yet, we can't get input. */
@@ -123,14 +116,16 @@ static int get_chars(u32 vtermno, char *buf, int count)
 
 	return count;
 }
-/*:*/
 
-/*D:320 Console drivers are initialized very early so boot messages can go out,
- * so we do things slightly differently from the generic virtio initialization
- * of the net and block drivers.
+/*
+ * Console drivers are initialized very early so boot messages can go
+ * out, so we do things slightly differently from the generic virtio
+ * initialization of the net and block drivers.
  *
- * At this stage, the console is output-only.  It's too early to set up a
- * virtqueue, so we let the drivers do some boutique early-output thing. */
+ * At this stage, the console is output-only.  It's too early to set
+ * up a virtqueue, so we let the drivers do some boutique early-output
+ * thing.
+ */
 int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
 {
 	virtio_cons.put_chars = put_chars;
@@ -157,8 +152,8 @@ static void virtcons_apply_config(struct virtio_device *dev)
 }
 
 /*
- * we support only one console, the hvc struct is a global var
- * We set the configuration at this point, since we now have a tty
+ * we support only one console, the hvc struct is a global var We set
+ * the configuration at this point, since we now have a tty
  */
 static int notifier_add_vio(struct hvc_struct *hp, int data)
 {
@@ -179,13 +174,17 @@ static void hvc_handle_input(struct virtqueue *vq)
 		hvc_kick();
 }
 
-/*D:370 Once we're further in boot, we get probed like any other virtio device.
- * At this stage we set up the output virtqueue.
+/*
+ * Once we're further in boot, we get probed like any other virtio
+ * device.  At this stage we set up the output virtqueue.
  *
- * To set up and manage our virtual console, we call hvc_alloc().  Since we
- * never remove the console device we never need this pointer again.
+ * To set up and manage our virtual console, we call hvc_alloc().
+ * Since we never remove the console device we never need this pointer
+ * again.
  *
- * Finally we put our input buffer in the input queue, ready to receive. */
+ * Finally we put our input buffer in the input queue, ready to
+ * receive.
+ */
 static int __devinit virtcons_probe(struct virtio_device *dev)
 {
 	vq_callback_t *callbacks[] = { hvc_handle_input, NULL};
@@ -203,8 +202,6 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	}
 
 	/* Find the queues. */
-	/* FIXME: This is why we want to wean off hvc: we do nothing
-	 * when input comes in. */
 	err = vdev->config->find_vqs(vdev, 2, vqs, callbacks, names);
 	if (err)
 		goto free;
@@ -219,15 +216,18 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
 	virtio_cons.notifier_del = notifier_del_vio;
 	virtio_cons.notifier_hangup = notifier_del_vio;
 
-	/* The first argument of hvc_alloc() is the virtual console number, so
-	 * we use zero.  The second argument is the parameter for the
-	 * notification mechanism (like irq number). We currently leave this
-	 * as zero, virtqueues have implicit notifications.
+	/*
+	 * The first argument of hvc_alloc() is the virtual console
+	 * number, so we use zero.  The second argument is the
+	 * parameter for the notification mechanism (like irq
+	 * number). We currently leave this as zero, virtqueues have
+	 * implicit notifications.
 	 *
-	 * The third argument is a "struct hv_ops" containing the put_chars()
-	 * get_chars(), notifier_add() and notifier_del() pointers.
-	 * The final argument is the output buffer size: we can do any size,
-	 * so we put PAGE_SIZE here. */
+	 * The third argument is a "struct hv_ops" containing the
+	 * put_chars(), get_chars(), notifier_add() and notifier_del()
+	 * pointers.  The final argument is the output buffer size: we
+	 * can do any size, so we put PAGE_SIZE here.
+	 */
 	hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
 	if (IS_ERR(hvc)) {
 		err = PTR_ERR(hvc);
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index fe885174cc1f..9e0da40beae0 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -3,8 +3,10 @@
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
-/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
- * anyone can use the definitions to implement compatible drivers/servers. */
+/*
+ * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers.
+ */
 
 /* Feature bits */
 #define VIRTIO_CONSOLE_F_SIZE	0	/* Does host provide console size? */
-- 
cgit v1.2.3


From 17634ba25544d60af1968982929150efad755032 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 21 Dec 2009 21:03:25 +0530
Subject: virtio: console: Add a new MULTIPORT feature, support for generic
 ports

This commit adds a new feature, MULTIPORT. If the host supports this
feature as well, the config space has the number of ports defined for
that device. New ports are spawned according to this information.

The config space also has the maximum number of ports that can be
spawned for a particular device. This is useful in initializing the
appropriate number of virtqueues in advance, as ports might be
hot-plugged in later.

Using this feature, generic ports can be created which are not tied to
hvc consoles.

We also open up a private channel between the host and the guest via
which some "control" messages are exchanged for the ports, like whether
the port being spawned is a console port, resizing the console window,
etc.

Next commits will add support for hotplugging and presenting char
devices in /dev/ for bi-directional guest-host communication.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/virtio_console.c  | 392 +++++++++++++++++++++++++++++++++++------
 include/linux/virtio_console.h |  21 +++
 2 files changed, 357 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index d01051060be3..a70f2b3a9e64 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2006, 2007, 2009 Rusty Russell, IBM Corporation
+ * Copyright (C) 2009, 2010 Red Hat, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -21,6 +22,7 @@
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
 #include <linux/virtio_console.h>
+#include <linux/workqueue.h>
 #include "hvc_console.h"
 
 /*
@@ -69,17 +71,6 @@ struct console {
 	u32 vtermno;
 };
 
-/*
- * This is a per-device struct that stores data common to all the
- * ports for that device (vdev->priv).
- */
-struct ports_device {
-	/* Array of per-port IO virtqueues */
-	struct virtqueue **in_vqs, **out_vqs;
-
-	struct virtio_device *vdev;
-};
-
 struct port_buffer {
 	char *buf;
 
@@ -92,8 +83,46 @@ struct port_buffer {
 	size_t offset;
 };
 
+/*
+ * This is a per-device struct that stores data common to all the
+ * ports for that device (vdev->priv).
+ */
+struct ports_device {
+	/*
+	 * Workqueue handlers where we process deferred work after
+	 * notification
+	 */
+	struct work_struct control_work;
+
+	struct list_head ports;
+
+	/* To protect the list of ports */
+	spinlock_t ports_lock;
+
+	/* To protect the vq operations for the control channel */
+	spinlock_t cvq_lock;
+
+	/* The current config space is stored here */
+	struct virtio_console_config config;
+
+	/* The virtio device we're associated with */
+	struct virtio_device *vdev;
+
+	/*
+	 * A couple of virtqueues for the control channel: one for
+	 * guest->host transfers, one for host->guest transfers
+	 */
+	struct virtqueue *c_ivq, *c_ovq;
+
+	/* Array of per-port IO virtqueues */
+	struct virtqueue **in_vqs, **out_vqs;
+};
+
 /* This struct holds the per-port data */
 struct port {
+	/* Next port in the list, head is in the ports_device */
+	struct list_head list;
+
 	/* Pointer to the parent virtio_console device */
 	struct ports_device *portdev;
 
@@ -115,6 +144,9 @@ struct port {
 	 * hooked up to an hvc console
 	 */
 	struct console cons;
+
+	/* The 'id' to identify the port with the Host */
+	u32 id;
 };
 
 /* This is the very early arch-specified put chars function. */
@@ -139,25 +171,56 @@ out:
 	return port;
 }
 
+static struct port *find_port_by_id(struct ports_device *portdev, u32 id)
+{
+	struct port *port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&portdev->ports_lock, flags);
+	list_for_each_entry(port, &portdev->ports, list)
+		if (port->id == id)
+			goto out;
+	port = NULL;
+out:
+	spin_unlock_irqrestore(&portdev->ports_lock, flags);
+
+	return port;
+}
+
 static struct port *find_port_by_vq(struct ports_device *portdev,
 				    struct virtqueue *vq)
 {
 	struct port *port;
-	struct console *cons;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pdrvdata_lock, flags);
-	list_for_each_entry(cons, &pdrvdata.consoles, list) {
-		port = container_of(cons, struct port, cons);
+	spin_lock_irqsave(&portdev->ports_lock, flags);
+	list_for_each_entry(port, &portdev->ports, list)
 		if (port->in_vq == vq || port->out_vq == vq)
 			goto out;
-	}
 	port = NULL;
 out:
-	spin_unlock_irqrestore(&pdrvdata_lock, flags);
+	spin_unlock_irqrestore(&portdev->ports_lock, flags);
 	return port;
 }
 
+static bool is_console_port(struct port *port)
+{
+	if (port->cons.hvc)
+		return true;
+	return false;
+}
+
+static inline bool use_multiport(struct ports_device *portdev)
+{
+	/*
+	 * This condition can be true when put_chars is called from
+	 * early_init
+	 */
+	if (!portdev->vdev)
+		return 0;
+	return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
+}
+
 static void free_buf(struct port_buffer *buf)
 {
 	kfree(buf->buf);
@@ -233,6 +296,32 @@ static bool port_has_data(struct port *port)
 	return ret;
 }
 
+static ssize_t send_control_msg(struct port *port, unsigned int event,
+				unsigned int value)
+{
+	struct scatterlist sg[1];
+	struct virtio_console_control cpkt;
+	struct virtqueue *vq;
+	int len;
+
+	if (!use_multiport(port->portdev))
+		return 0;
+
+	cpkt.id = port->id;
+	cpkt.event = event;
+	cpkt.value = value;
+
+	vq = port->portdev->c_ovq;
+
+	sg_init_one(sg, &cpkt, sizeof(cpkt));
+	if (vq->vq_ops->add_buf(vq, sg, 1, 0, &cpkt) >= 0) {
+		vq->vq_ops->kick(vq);
+		while (!vq->vq_ops->get_buf(vq, &len))
+			cpu_relax();
+	}
+	return 0;
+}
+
 static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count)
 {
 	struct scatterlist sg[1];
@@ -387,24 +476,7 @@ static void notifier_del_vio(struct hvc_struct *hp, int data)
 	hp->irq_requested = 0;
 }
 
-static void hvc_handle_input(struct virtqueue *vq)
-{
-	struct port *port;
-	unsigned long flags;
-
-	port = find_port_by_vq(vq->vdev->priv, vq);
-	if (!port)
-		return;
-
-	spin_lock_irqsave(&port->inbuf_lock, flags);
-	port->inbuf = get_inbuf(port);
-	spin_unlock_irqrestore(&port->inbuf_lock, flags);
-
-	if (hvc_poll(port->cons.hvc))
-		hvc_kick();
-}
-
-/* The operations for the console. */
+/* The operations for console ports. */
 static const struct hv_ops hv_ops = {
 	.get_chars = get_chars,
 	.put_chars = put_chars,
@@ -428,7 +500,7 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
 	return hvc_instantiate(0, 0, &hv_ops);
 }
 
-int __devinit init_port_console(struct port *port)
+int init_port_console(struct port *port)
 {
 	int ret;
 
@@ -465,7 +537,122 @@ int __devinit init_port_console(struct port *port)
 	return 0;
 }
 
-static int __devinit add_port(struct ports_device *portdev)
+/* Any private messages that the Host and Guest want to share */
+static void handle_control_message(struct ports_device *portdev,
+				   struct port_buffer *buf)
+{
+	struct virtio_console_control *cpkt;
+	struct port *port;
+
+	cpkt = (struct virtio_console_control *)(buf->buf + buf->offset);
+
+	port = find_port_by_id(portdev, cpkt->id);
+	if (!port) {
+		/* No valid header at start of buffer.  Drop it. */
+		dev_dbg(&portdev->vdev->dev,
+			"Invalid index %u in control packet\n", cpkt->id);
+		return;
+	}
+
+	switch (cpkt->event) {
+	case VIRTIO_CONSOLE_CONSOLE_PORT:
+		if (!cpkt->value)
+			break;
+		if (is_console_port(port))
+			break;
+
+		init_port_console(port);
+		/*
+		 * Could remove the port here in case init fails - but
+		 * have to notify the host first.
+		 */
+		break;
+	case VIRTIO_CONSOLE_RESIZE:
+		if (!is_console_port(port))
+			break;
+		port->cons.hvc->irq_requested = 1;
+		resize_console(port);
+		break;
+	}
+}
+
+static void control_work_handler(struct work_struct *work)
+{
+	struct ports_device *portdev;
+	struct virtqueue *vq;
+	struct port_buffer *buf;
+	unsigned int len;
+
+	portdev = container_of(work, struct ports_device, control_work);
+	vq = portdev->c_ivq;
+
+	spin_lock(&portdev->cvq_lock);
+	while ((buf = vq->vq_ops->get_buf(vq, &len))) {
+		spin_unlock(&portdev->cvq_lock);
+
+		buf->len = len;
+		buf->offset = 0;
+
+		handle_control_message(portdev, buf);
+
+		spin_lock(&portdev->cvq_lock);
+		if (add_inbuf(portdev->c_ivq, buf) < 0) {
+			dev_warn(&portdev->vdev->dev,
+				 "Error adding buffer to queue\n");
+			free_buf(buf);
+		}
+	}
+	spin_unlock(&portdev->cvq_lock);
+}
+
+static void in_intr(struct virtqueue *vq)
+{
+	struct port *port;
+	unsigned long flags;
+
+	port = find_port_by_vq(vq->vdev->priv, vq);
+	if (!port)
+		return;
+
+	spin_lock_irqsave(&port->inbuf_lock, flags);
+	port->inbuf = get_inbuf(port);
+
+	spin_unlock_irqrestore(&port->inbuf_lock, flags);
+
+	if (is_console_port(port) && hvc_poll(port->cons.hvc))
+		hvc_kick();
+}
+
+static void control_intr(struct virtqueue *vq)
+{
+	struct ports_device *portdev;
+
+	portdev = vq->vdev->priv;
+	schedule_work(&portdev->control_work);
+}
+
+static void fill_queue(struct virtqueue *vq, spinlock_t *lock)
+{
+	struct port_buffer *buf;
+	int ret;
+
+	do {
+		buf = alloc_buf(PAGE_SIZE);
+		if (!buf)
+			break;
+
+		spin_lock_irq(lock);
+		ret = add_inbuf(vq, buf);
+		if (ret < 0) {
+			spin_unlock_irq(lock);
+			free_buf(buf);
+			break;
+		}
+		spin_unlock_irq(lock);
+	} while (ret > 0);
+}
+
+static int add_port(struct ports_device *portdev, u32 id)
 {
 	struct port *port;
 	struct port_buffer *inbuf;
@@ -478,11 +665,13 @@ static int __devinit add_port(struct ports_device *portdev)
 	}
 
 	port->portdev = portdev;
+	port->id = id;
 
 	port->inbuf = NULL;
+	port->cons.hvc = NULL;
 
-	port->in_vq = portdev->in_vqs[0];
-	port->out_vq = portdev->out_vqs[0];
+	port->in_vq = portdev->in_vqs[port->id];
+	port->out_vq = portdev->out_vqs[port->id];
 
 	spin_lock_init(&port->inbuf_lock);
 
@@ -495,9 +684,25 @@ static int __devinit add_port(struct ports_device *portdev)
 	/* Register the input buffer the first time. */
 	add_inbuf(port->in_vq, inbuf);
 
-	err = init_port_console(port);
-	if (err)
-		goto free_inbuf;
+	/*
+	 * If we're not using multiport support, this has to be a console port
+	 */
+	if (!use_multiport(port->portdev)) {
+		err = init_port_console(port);
+		if (err)
+			goto free_inbuf;
+	}
+
+	spin_lock_irq(&portdev->ports_lock);
+	list_add_tail(&port->list, &port->portdev->ports);
+	spin_unlock_irq(&portdev->ports_lock);
+
+	/*
+	 * Tell the Host we're set so that it can send us various
+	 * configuration parameters for this port (eg, port name,
+	 * caching, whether this is a console port, etc.)
+	 */
+	send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1);
 
 	return 0;
 
@@ -514,12 +719,11 @@ static int init_vqs(struct ports_device *portdev)
 	vq_callback_t **io_callbacks;
 	char **io_names;
 	struct virtqueue **vqs;
-	u32 nr_ports, nr_queues;
+	u32 i, j, nr_ports, nr_queues;
 	int err;
 
-	/* We currently only have one port and two queues for that port */
-	nr_ports = 1;
-	nr_queues = 2;
+	nr_ports = portdev->config.max_nr_ports;
+	nr_queues = use_multiport(portdev) ? (nr_ports + 1) * 2 : 2;
 
 	vqs = kmalloc(nr_queues * sizeof(struct virtqueue *), GFP_KERNEL);
 	if (!vqs) {
@@ -549,11 +753,32 @@ static int init_vqs(struct ports_device *portdev)
 		goto free_invqs;
 	}
 
-	io_callbacks[0] = hvc_handle_input;
-	io_callbacks[1] = NULL;
-	io_names[0] = "input";
-	io_names[1] = "output";
-
+	/*
+	 * For backward compat (newer host but older guest), the host
+	 * spawns a console port first and also inits the vqs for port
+	 * 0 before others.
+	 */
+	j = 0;
+	io_callbacks[j] = in_intr;
+	io_callbacks[j + 1] = NULL;
+	io_names[j] = "input";
+	io_names[j + 1] = "output";
+	j += 2;
+
+	if (use_multiport(portdev)) {
+		io_callbacks[j] = control_intr;
+		io_callbacks[j + 1] = NULL;
+		io_names[j] = "control-i";
+		io_names[j + 1] = "control-o";
+
+		for (i = 1; i < nr_ports; i++) {
+			j += 2;
+			io_callbacks[j] = in_intr;
+			io_callbacks[j + 1] = NULL;
+			io_names[j] = "input";
+			io_names[j + 1] = "output";
+		}
+	}
 	/* Find the queues. */
 	err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
 					      io_callbacks,
@@ -561,9 +786,20 @@ static int init_vqs(struct ports_device *portdev)
 	if (err)
 		goto free_outvqs;
 
+	j = 0;
 	portdev->in_vqs[0] = vqs[0];
 	portdev->out_vqs[0] = vqs[1];
-
+	j += 2;
+	if (use_multiport(portdev)) {
+		portdev->c_ivq = vqs[j];
+		portdev->c_ovq = vqs[j + 1];
+
+		for (i = 1; i < nr_ports; i++) {
+			j += 2;
+			portdev->in_vqs[i] = vqs[j];
+			portdev->out_vqs[i] = vqs[j + 1];
+		}
+	}
 	kfree(io_callbacks);
 	kfree(io_names);
 	kfree(vqs);
@@ -587,11 +823,17 @@ fail:
 /*
  * Once we're further in boot, we get probed like any other virtio
  * device.
+ *
+ * If the host also supports multiple console ports, we check the
+ * config space to see how many ports the host has spawned.  We
+ * initialize each port found.
  */
 static int __devinit virtcons_probe(struct virtio_device *vdev)
 {
 	struct ports_device *portdev;
+	u32 i;
 	int err;
+	bool multiport;
 
 	portdev = kmalloc(sizeof(*portdev), GFP_KERNEL);
 	if (!portdev) {
@@ -603,16 +845,53 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
 	portdev->vdev = vdev;
 	vdev->priv = portdev;
 
+	multiport = false;
+	portdev->config.nr_ports = 1;
+	portdev->config.max_nr_ports = 1;
+	if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) {
+		multiport = true;
+		vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT;
+
+		vdev->config->get(vdev, offsetof(struct virtio_console_config,
+						 nr_ports),
+				  &portdev->config.nr_ports,
+				  sizeof(portdev->config.nr_ports));
+		vdev->config->get(vdev, offsetof(struct virtio_console_config,
+						 max_nr_ports),
+				  &portdev->config.max_nr_ports,
+				  sizeof(portdev->config.max_nr_ports));
+		if (portdev->config.nr_ports > portdev->config.max_nr_ports) {
+			dev_warn(&vdev->dev,
+				 "More ports (%u) specified than allowed (%u). Will init %u ports.",
+				 portdev->config.nr_ports,
+				 portdev->config.max_nr_ports,
+				 portdev->config.max_nr_ports);
+
+			portdev->config.nr_ports = portdev->config.max_nr_ports;
+		}
+	}
+
+	/* Let the Host know we support multiple ports.*/
+	vdev->config->finalize_features(vdev);
+
 	err = init_vqs(portdev);
 	if (err < 0) {
 		dev_err(&vdev->dev, "Error %d initializing vqs\n", err);
 		goto free;
 	}
 
-	/* We only have one port. */
-	err = add_port(portdev);
-	if (err)
-		goto free_vqs;
+	spin_lock_init(&portdev->ports_lock);
+	INIT_LIST_HEAD(&portdev->ports);
+
+	if (multiport) {
+		spin_lock_init(&portdev->cvq_lock);
+		INIT_WORK(&portdev->control_work, &control_work_handler);
+
+		fill_queue(portdev->c_ivq, &portdev->cvq_lock);
+	}
+
+	for (i = 0; i < portdev->config.nr_ports; i++)
+		add_port(portdev, i);
 
 	/* Start using the new console output. */
 	early_put_chars = NULL;
@@ -635,6 +914,7 @@ static struct virtio_device_id id_table[] = {
 
 static unsigned int features[] = {
 	VIRTIO_CONSOLE_F_SIZE,
+	VIRTIO_CONSOLE_F_MULTIPORT,
 };
 
 static struct virtio_driver virtio_console = {
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index 9e0da40beae0..f4d183b5b493 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -6,18 +6,39 @@
 /*
  * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
  * anyone can use the definitions to implement compatible drivers/servers.
+ *
+ * Copyright (C) Red Hat, Inc., 2009, 2010
  */
 
 /* Feature bits */
 #define VIRTIO_CONSOLE_F_SIZE	0	/* Does host provide console size? */
+#define VIRTIO_CONSOLE_F_MULTIPORT 1	/* Does host provide multiple ports? */
 
 struct virtio_console_config {
 	/* colums of the screens */
 	__u16 cols;
 	/* rows of the screens */
 	__u16 rows;
+	/* max. number of ports this device can hold */
+	__u32 max_nr_ports;
+	/* number of ports added so far */
+	__u32 nr_ports;
 } __attribute__((packed));
 
+/*
+ * A message that's passed between the Host and the Guest for a
+ * particular port.
+ */
+struct virtio_console_control {
+	__u32 id;		/* Port number */
+	__u16 event;		/* The kind of control event (see below) */
+	__u16 value;		/* Extra information for the key */
+};
+
+/* Some events for control messages */
+#define VIRTIO_CONSOLE_PORT_READY	0
+#define VIRTIO_CONSOLE_CONSOLE_PORT	1
+#define VIRTIO_CONSOLE_RESIZE		2
 
 #ifdef __KERNEL__
 int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int));
-- 
cgit v1.2.3


From 2030fa496d74b49220308eaccf656e2338019cfd Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 21 Dec 2009 21:49:30 +0530
Subject: virtio: console: Add file operations to ports for
 open/read/write/poll

Allow guest userspace applications to open, read from, write to, poll
the ports via the char dev interface.

When a port gets opened, a notification is sent to the host via a
control message indicating a connection has been established. Similarly,
on closing of the port, a notification is sent indicating disconnection.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/virtio_console.c  | 164 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio_console.h |   1 +
 2 files changed, 164 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 64ef476d6557..ece1546fbb20 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -19,11 +19,15 @@
 #include <linux/cdev.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
 #include <linux/virtio_console.h>
+#include <linux/wait.h>
 #include <linux/workqueue.h>
 #include "hvc_console.h"
 
@@ -163,8 +167,14 @@ struct port {
 	struct cdev cdev;
 	struct device *dev;
 
+	/* A waitqueue for poll() or blocking read operations */
+	wait_queue_head_t waitqueue;
+
 	/* The 'id' to identify the port with the Host */
 	u32 id;
+
+	/* Is the host device open */
+	bool host_connected;
 };
 
 /* This is the very early arch-specified put chars function. */
@@ -417,6 +427,146 @@ static ssize_t fill_readbuf(struct port *port, char *out_buf, size_t out_count,
 	return out_count;
 }
 
+/* The condition that must be true for polling to end */
+static bool wait_is_over(struct port *port)
+{
+	return port_has_data(port) || !port->host_connected;
+}
+
+static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
+			      size_t count, loff_t *offp)
+{
+	struct port *port;
+	ssize_t ret;
+
+	port = filp->private_data;
+
+	if (!port_has_data(port)) {
+		/*
+		 * If nothing's connected on the host just return 0 in
+		 * case of list_empty; this tells the userspace app
+		 * that there's no connection
+		 */
+		if (!port->host_connected)
+			return 0;
+		if (filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		ret = wait_event_interruptible(port->waitqueue,
+					       wait_is_over(port));
+		if (ret < 0)
+			return ret;
+	}
+	/*
+	 * We could've received a disconnection message while we were
+	 * waiting for more data.
+	 *
+	 * This check is not clubbed in the if() statement above as we
+	 * might receive some data as well as the host could get
+	 * disconnected after we got woken up from our wait.  So we
+	 * really want to give off whatever data we have and only then
+	 * check for host_connected.
+	 */
+	if (!port_has_data(port) && !port->host_connected)
+		return 0;
+
+	return fill_readbuf(port, ubuf, count, true);
+}
+
+static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
+			       size_t count, loff_t *offp)
+{
+	struct port *port;
+	char *buf;
+	ssize_t ret;
+
+	port = filp->private_data;
+
+	count = min((size_t)(32 * 1024), count);
+
+	buf = kmalloc(count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = copy_from_user(buf, ubuf, count);
+	if (ret) {
+		ret = -EFAULT;
+		goto free_buf;
+	}
+
+	ret = send_buf(port, buf, count);
+free_buf:
+	kfree(buf);
+	return ret;
+}
+
+static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
+{
+	struct port *port;
+	unsigned int ret;
+
+	port = filp->private_data;
+	poll_wait(filp, &port->waitqueue, wait);
+
+	ret = 0;
+	if (port->inbuf)
+		ret |= POLLIN | POLLRDNORM;
+	if (port->host_connected)
+		ret |= POLLOUT;
+	if (!port->host_connected)
+		ret |= POLLHUP;
+
+	return ret;
+}
+
+static int port_fops_release(struct inode *inode, struct file *filp)
+{
+	struct port *port;
+
+	port = filp->private_data;
+
+	/* Notify host of port being closed */
+	send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0);
+
+	return 0;
+}
+
+static int port_fops_open(struct inode *inode, struct file *filp)
+{
+	struct cdev *cdev = inode->i_cdev;
+	struct port *port;
+
+	port = container_of(cdev, struct port, cdev);
+	filp->private_data = port;
+
+	/*
+	 * Don't allow opening of console port devices -- that's done
+	 * via /dev/hvc
+	 */
+	if (is_console_port(port))
+		return -ENXIO;
+
+	/* Notify host of port being opened */
+	send_control_msg(filp->private_data, VIRTIO_CONSOLE_PORT_OPEN, 1);
+
+	return 0;
+}
+
+/*
+ * The file operations that we support: programs in the guest can open
+ * a console device, read from it, write to it, poll for data and
+ * close it.  The devices are at
+ *   /dev/vport<device number>p<port number>
+ */
+static const struct file_operations port_fops = {
+	.owner = THIS_MODULE,
+	.open  = port_fops_open,
+	.read  = port_fops_read,
+	.write = port_fops_write,
+	.poll  = port_fops_poll,
+	.release = port_fops_release,
+};
+
 /*
  * The put_chars() callback is pretty straightforward.
  *
@@ -560,6 +710,9 @@ int init_port_console(struct port *port)
 	list_add_tail(&port->cons.list, &pdrvdata.consoles);
 	spin_unlock_irq(&pdrvdata_lock);
 
+	/* Notify host of port being opened */
+	send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 1);
+
 	return 0;
 }
 
@@ -599,6 +752,10 @@ static void handle_control_message(struct ports_device *portdev,
 		port->cons.hvc->irq_requested = 1;
 		resize_console(port);
 		break;
+	case VIRTIO_CONSOLE_PORT_OPEN:
+		port->host_connected = cpkt->value;
+		wake_up_interruptible(&port->waitqueue);
+		break;
 	}
 }
 
@@ -645,6 +802,8 @@ static void in_intr(struct virtqueue *vq)
 
 	spin_unlock_irqrestore(&port->inbuf_lock, flags);
 
+	wake_up_interruptible(&port->waitqueue);
+
 	if (is_console_port(port) && hvc_poll(port->cons.hvc))
 		hvc_kick();
 }
@@ -697,10 +856,12 @@ static int add_port(struct ports_device *portdev, u32 id)
 	port->inbuf = NULL;
 	port->cons.hvc = NULL;
 
+	port->host_connected = false;
+
 	port->in_vq = portdev->in_vqs[port->id];
 	port->out_vq = portdev->out_vqs[port->id];
 
-	cdev_init(&port->cdev, NULL);
+	cdev_init(&port->cdev, &port_fops);
 
 	devt = MKDEV(portdev->chr_major, id);
 	err = cdev_add(&port->cdev, devt, 1);
@@ -721,6 +882,7 @@ static int add_port(struct ports_device *portdev, u32 id)
 	}
 
 	spin_lock_init(&port->inbuf_lock);
+	init_waitqueue_head(&port->waitqueue);
 
 	inbuf = alloc_buf(PAGE_SIZE);
 	if (!inbuf) {
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index f4d183b5b493..bd0e2a596f93 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -39,6 +39,7 @@ struct virtio_console_control {
 #define VIRTIO_CONSOLE_PORT_READY	0
 #define VIRTIO_CONSOLE_CONSOLE_PORT	1
 #define VIRTIO_CONSOLE_RESIZE		2
+#define VIRTIO_CONSOLE_PORT_OPEN	3
 
 #ifdef __KERNEL__
 int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int));
-- 
cgit v1.2.3


From 431edb8a8bca71008fefceadf53b9315ef7196ec Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 21 Dec 2009 21:57:40 +0530
Subject: virtio: console: Register with sysfs and create a 'name' attribute
 for ports

The host can set a name for ports so that they're easily discoverable
instead of going by the /dev/vportNpn naming. This attribute will be
placed in /sys/class/virtio-ports/vportNpn/name. udev scripts can then
create symlinks to the port using the name.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/virtio_console.c  | 57 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/virtio_console.h |  1 +
 2 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 5e3503a31312..99d182b132c4 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -170,6 +170,9 @@ struct port {
 	/* A waitqueue for poll() or blocking read operations */
 	wait_queue_head_t waitqueue;
 
+	/* The 'name' of the port that we expose via sysfs properties */
+	char *name;
+
 	/* The 'id' to identify the port with the Host */
 	u32 id;
 
@@ -732,12 +735,36 @@ int init_port_console(struct port *port)
 	return 0;
 }
 
+static ssize_t show_port_name(struct device *dev,
+			      struct device_attribute *attr, char *buffer)
+{
+	struct port *port;
+
+	port = dev_get_drvdata(dev);
+
+	return sprintf(buffer, "%s\n", port->name);
+}
+
+static DEVICE_ATTR(name, S_IRUGO, show_port_name, NULL);
+
+static struct attribute *port_sysfs_entries[] = {
+	&dev_attr_name.attr,
+	NULL
+};
+
+static struct attribute_group port_attribute_group = {
+	.name = NULL,		/* put in device directory */
+	.attrs = port_sysfs_entries,
+};
+
 /* Any private messages that the Host and Guest want to share */
 static void handle_control_message(struct ports_device *portdev,
 				   struct port_buffer *buf)
 {
 	struct virtio_console_control *cpkt;
 	struct port *port;
+	size_t name_size;
+	int err;
 
 	cpkt = (struct virtio_console_control *)(buf->buf + buf->offset);
 
@@ -772,6 +799,35 @@ static void handle_control_message(struct ports_device *portdev,
 		port->host_connected = cpkt->value;
 		wake_up_interruptible(&port->waitqueue);
 		break;
+	case VIRTIO_CONSOLE_PORT_NAME:
+		/*
+		 * Skip the size of the header and the cpkt to get the size
+		 * of the name that was sent
+		 */
+		name_size = buf->len - buf->offset - sizeof(*cpkt) + 1;
+
+		port->name = kmalloc(name_size, GFP_KERNEL);
+		if (!port->name) {
+			dev_err(port->dev,
+				"Not enough space to store port name\n");
+			break;
+		}
+		strncpy(port->name, buf->buf + buf->offset + sizeof(*cpkt),
+			name_size - 1);
+		port->name[name_size - 1] = 0;
+
+		/*
+		 * Since we only have one sysfs attribute, 'name',
+		 * create it only if we have a name for the port.
+		 */
+		err = sysfs_create_group(&port->dev->kobj,
+					 &port_attribute_group);
+		if (err)
+			dev_err(port->dev,
+				"Error %d creating sysfs device attributes\n",
+				err);
+
+		break;
 	}
 }
 
@@ -869,6 +925,7 @@ static int add_port(struct ports_device *portdev, u32 id)
 	port->portdev = portdev;
 	port->id = id;
 
+	port->name = NULL;
 	port->inbuf = NULL;
 	port->cons.hvc = NULL;
 
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index bd0e2a596f93..1ebf007812a8 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -40,6 +40,7 @@ struct virtio_console_control {
 #define VIRTIO_CONSOLE_CONSOLE_PORT	1
 #define VIRTIO_CONSOLE_RESIZE		2
 #define VIRTIO_CONSOLE_PORT_OPEN	3
+#define VIRTIO_CONSOLE_PORT_NAME	4
 
 #ifdef __KERNEL__
 int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int));
-- 
cgit v1.2.3


From 1f7aa42d166cd104b0700d61efe2064178a3f6da Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Mon, 21 Dec 2009 22:27:31 +0530
Subject: virtio: console: Add ability to hot-unplug ports

Remove port data; deregister from the hvc core if it's a console port.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/virtio_console.c  | 65 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/virtio_console.h |  1 +
 2 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 7c53f58c87ba..9f20fda9c56f 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -783,6 +783,36 @@ static struct attribute_group port_attribute_group = {
 	.attrs = port_sysfs_entries,
 };
 
+/* Remove all port-specific data. */
+static int remove_port(struct port *port)
+{
+	spin_lock_irq(&port->portdev->ports_lock);
+	list_del(&port->list);
+	spin_unlock_irq(&port->portdev->ports_lock);
+
+	if (is_console_port(port)) {
+		spin_lock_irq(&pdrvdata_lock);
+		list_del(&port->cons.list);
+		spin_unlock_irq(&pdrvdata_lock);
+		hvc_remove(port->cons.hvc);
+	}
+	if (port->guest_connected)
+		send_control_msg(port, VIRTIO_CONSOLE_PORT_OPEN, 0);
+
+	while (port->in_vq->vq_ops->detach_unused_buf(port->in_vq))
+		;
+
+	sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
+	device_destroy(pdrvdata.class, port->dev->devt);
+	cdev_del(&port->cdev);
+
+	discard_port_data(port);
+	kfree(port->name);
+
+	kfree(port);
+	return 0;
+}
+
 /* Any private messages that the Host and Guest want to share */
 static void handle_control_message(struct ports_device *portdev,
 				   struct port_buffer *buf)
@@ -854,6 +884,32 @@ static void handle_control_message(struct ports_device *portdev,
 				err);
 
 		break;
+	case VIRTIO_CONSOLE_PORT_REMOVE:
+		/*
+		 * Hot unplug the port.  We don't decrement nr_ports
+		 * since we don't want to deal with extra complexities
+		 * of using the lowest-available port id: We can just
+		 * pick up the nr_ports number as the id and not have
+		 * userspace send it to us.  This helps us in two
+		 * ways:
+		 *
+		 * - We don't need to have a 'port_id' field in the
+		 *   config space when a port is hot-added.  This is a
+		 *   good thing as we might queue up multiple hotplug
+		 *   requests issued in our workqueue.
+		 *
+		 * - Another way to deal with this would have been to
+		 *   use a bitmap of the active ports and select the
+		 *   lowest non-active port from that map.  That
+		 *   bloats the already tight config space and we
+		 *   would end up artificially limiting the
+		 *   max. number of ports to sizeof(bitmap).  Right
+		 *   now we can support 2^32 ports (as the port id is
+		 *   stored in a u32 type).
+		 *
+		 */
+		remove_port(port);
+		break;
 	}
 }
 
@@ -1078,12 +1134,17 @@ static void config_work_handler(struct work_struct *work)
 		/*
 		 * Port 0 got hot-added.  Since we already did all the
 		 * other initialisation for it, just tell the Host
-		 * that the port is ready.
+		 * that the port is ready if we find the port.  In
+		 * case the port was hot-removed earlier, we call
+		 * add_port to add the port.
 		 */
 		struct port *port;
 
 		port = find_port_by_id(portdev, 0);
-		send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1);
+		if (!port)
+			add_port(portdev, 0);
+		else
+			send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1);
 		return;
 	}
 	if (virtconconf.nr_ports > portdev->config.max_nr_ports) {
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index 1ebf007812a8..ae4f039515b4 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -41,6 +41,7 @@ struct virtio_console_control {
 #define VIRTIO_CONSOLE_RESIZE		2
 #define VIRTIO_CONSOLE_PORT_OPEN	3
 #define VIRTIO_CONSOLE_PORT_NAME	4
+#define VIRTIO_CONSOLE_PORT_REMOVE	5
 
 #ifdef __KERNEL__
 int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int));
-- 
cgit v1.2.3


From a712ffbc199849364c46e9112b93b66de08e2c26 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 4 Feb 2010 10:59:27 -0800
Subject: x86/PCI: Moorestown PCI support

The Moorestown platform only has a few devices that actually support
PCI config cycles.  The rest of the devices use an in-RAM MCFG space
for the purposes of device enumeration and initialization.

There are a few uglies in the fake support, like BAR sizes that aren't
a power of two, sizing detection, and writes to the real devices, but
other than that it's pretty straightforward.

Another way to think of this is not really as PCI at all, but just a
table in RAM describing which devices are present, their capabilities
and their offsets in MMIO space.  This could have been done with a
special new firmware table on this platform, but given that we do have
some real PCI devices too, simply describing things in an MCFG type
space was pretty simple.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D08@orsmsx508.amr.corp.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/pci/Makefile    |   2 +-
 arch/x86/pci/mrst.c      | 258 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_regs.h |   1 +
 3 files changed, 260 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/pci/mrst.c

(limited to 'include/linux')

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 39fba37f702f..4753ebc19cae 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_X86_VISWS)		+= visws.o
 
 obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 
-obj-y				+= common.o early.o
+obj-y				+= common.o early.o mrst.o
 obj-y				+= amd_bus.o
 obj-$(CONFIG_X86_64)		+= bus_numa.o
 
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
new file mode 100644
index 000000000000..6e9e1a35a5d7
--- /dev/null
+++ b/arch/x86/pci/mrst.c
@@ -0,0 +1,258 @@
+/*
+ * Moorestown PCI support
+ *   Copyright (c) 2008 Intel Corporation
+ *     Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Moorestown has an interesting PCI implementation:
+ *   - configuration space is memory mapped (as defined by MCFG)
+ *   - Lincroft devices also have a real, type 1 configuration space
+ *   - Early Lincroft silicon has a type 1 access bug that will cause
+ *     a hang if non-existent devices are accessed
+ *   - some devices have the "fixed BAR" capability, which means
+ *     they can't be relocated or modified; check for that during
+ *     BAR sizing
+ *
+ * So, we use the MCFG space for all reads and writes, but also send
+ * Lincroft writes to type 1 space.  But only read/write if the device
+ * actually exists, otherwise return all 1s for reads and bit bucket
+ * the writes.
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+
+#include <asm/acpi.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/pci_x86.h>
+#include <asm/hw_irq.h>
+#include <asm/io_apic.h>
+
+#define PCIE_CAP_OFFSET	0x100
+
+/* Fixed BAR fields */
+#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD) */
+#define PCI_FIXED_BAR_0_SIZE	0x04
+#define PCI_FIXED_BAR_1_SIZE	0x08
+#define PCI_FIXED_BAR_2_SIZE	0x0c
+#define PCI_FIXED_BAR_3_SIZE	0x10
+#define PCI_FIXED_BAR_4_SIZE	0x14
+#define PCI_FIXED_BAR_5_SIZE	0x1c
+
+/**
+ * fixed_bar_cap - return the offset of the fixed BAR cap if found
+ * @bus: PCI bus
+ * @devfn: device in question
+ *
+ * Look for the fixed BAR cap on @bus and @devfn, returning its offset
+ * if found or 0 otherwise.
+ */
+static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
+{
+	int pos;
+	u32 pcie_cap = 0, cap_data;
+
+	pos = PCIE_CAP_OFFSET;
+	while (pos) {
+		if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+					  devfn, pos, 4, &pcie_cap))
+			return 0;
+
+		if (pcie_cap == 0xffffffff)
+			return 0;
+
+		if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
+			raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+					      devfn, pos + 4, 4, &cap_data);
+			if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR)
+				return pos;
+		}
+
+		pos = pcie_cap >> 20;
+	}
+
+	return 0;
+}
+
+static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
+				   int reg, int len, u32 val, int offset)
+{
+	u32 size;
+	unsigned int domain, busnum;
+	int bar = (reg - PCI_BASE_ADDRESS_0) >> 2;
+
+	domain = pci_domain_nr(bus);
+	busnum = bus->number;
+
+	if (val == ~0 && len == 4) {
+		unsigned long decode;
+
+		raw_pci_ext_ops->read(domain, busnum, devfn,
+			       offset + 8 + (bar * 4), 4, &size);
+
+		/* Turn the size into a decode pattern for the sizing code */
+		if (size) {
+			decode = size - 1;
+			decode |= decode >> 1;
+			decode |= decode >> 2;
+			decode |= decode >> 4;
+			decode |= decode >> 8;
+			decode |= decode >> 16;
+			decode++;
+			decode = ~(decode - 1);
+		} else {
+			decode = ~0;
+		}
+
+		/*
+		 * If val is all ones, the core code is trying to size the reg,
+		 * so update the mmconfig space with the real size.
+		 *
+		 * Note: this assumes the fixed size we got is a power of two.
+		 */
+		return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4,
+				       decode);
+	}
+
+	/* This is some other kind of BAR write, so just do it. */
+	return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val);
+}
+
+/**
+ * type1_access_ok - check whether to use type 1
+ * @bus: bus number
+ * @devfn: device & function in question
+ *
+ * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at
+ * all, the we can go ahead with any reads & writes.  If it's on a Lincroft,
+ * but doesn't exist, avoid the access altogether to keep the chip from
+ * hanging.
+ */
+static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
+{
+	/* This is a workaround for A0 LNC bug where PCI status register does
+	 * not have new CAP bit set. can not be written by SW either.
+	 *
+	 * PCI header type in real LNC indicates a single function device, this
+	 * will prevent probing other devices under the same function in PCI
+	 * shim. Therefore, use the header type in shim instead.
+	 */
+	if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
+		return 0;
+	if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
+		return 1;
+	return 0; /* langwell on others */
+}
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 *value)
+{
+	if (type1_access_ok(bus->number, devfn, where))
+		return pci_direct_conf1.read(pci_domain_nr(bus), bus->number,
+					devfn, where, size, value);
+	return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+			      devfn, where, size, value);
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 value)
+{
+	int offset;
+
+	/* On MRST, there is no PCI ROM BAR, this will cause a subsequent read
+	 * to ROM BAR return 0 then being ignored.
+	 */
+	if (where == PCI_ROM_ADDRESS)
+		return 0;
+
+	/*
+	 * Devices with fixed BARs need special handling:
+	 *   - BAR sizing code will save, write ~0, read size, restore
+	 *   - so writes to fixed BARs need special handling
+	 *   - other writes to fixed BAR devices should go through mmconfig
+	 */
+	offset = fixed_bar_cap(bus, devfn);
+	if (offset &&
+	    (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) {
+		return pci_device_update_fixed(bus, devfn, where, size, value,
+					       offset);
+	}
+
+	/*
+	 * On Moorestown update both real & mmconfig space
+	 * Note: early Lincroft silicon can't handle type 1 accesses to
+	 *       non-existent devices, so just eat the write in that case.
+	 */
+	if (type1_access_ok(bus->number, devfn, where))
+		return pci_direct_conf1.write(pci_domain_nr(bus), bus->number,
+					      devfn, where, size, value);
+	return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn,
+			       where, size, value);
+}
+
+static int mrst_pci_irq_enable(struct pci_dev *dev)
+{
+	u8 pin;
+	struct io_apic_irq_attr irq_attr;
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+
+	/* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
+	 * IOAPIC RTE entries, so we just enable RTE for the device.
+	 */
+	irq_attr.ioapic = mp_find_ioapic(dev->irq);
+	irq_attr.ioapic_pin = dev->irq;
+	irq_attr.trigger = 1; /* level */
+	irq_attr.polarity = 1; /* active low */
+	io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr);
+
+	return 0;
+}
+
+struct pci_ops pci_mrst_ops = {
+	.read = pci_read,
+	.write = pci_write,
+};
+
+/**
+ * pci_mrst_init - installs pci_mrst_ops
+ *
+ * Moorestown has an interesting PCI implementation (see above).
+ * Called when the early platform detection installs it.
+ */
+int __init pci_mrst_init(void)
+{
+	printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
+	pci_mmcfg_late_init();
+	pcibios_enable_irq = mrst_pci_irq_enable;
+	pci_root_ops = pci_mrst_ops;
+	/* Continue with standard init */
+	return 1;
+}
+
+/*
+ * Langwell devices reside at fixed offsets, don't try to move them.
+ */
+static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
+{
+	unsigned long offset;
+	u32 size;
+	int i;
+
+	/* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */
+	offset = fixed_bar_cap(dev->bus, dev->devfn);
+	if (!offset || PCI_DEVFN(2, 0) == dev->devfn ||
+	    PCI_DEVFN(2, 2) == dev->devfn)
+		return;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		pci_read_config_dword(dev, offset + 8 + (i * 4), &size);
+		dev->resource[i].end = dev->resource[i].start + size - 1;
+		dev->resource[i].flags |= IORESOURCE_PCI_FIXED;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup);
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 9f2ad0aa3c39..c8f302991b66 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -507,6 +507,7 @@
 #define PCI_EXT_CAP_ID_VC	2
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
+#define PCI_EXT_CAP_ID_VNDR	11
 #define PCI_EXT_CAP_ID_ACS	13
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
-- 
cgit v1.2.3


From c5ecc484c528ff50bdbb16fbfbac758ee368b329 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 24 Feb 2010 08:30:08 +0100
Subject: pktcdvd: use BIO list management functions

Now that the bio list management stuff is generic, convert pktcdvd to
use bio lists instead of its own private bio list implementation.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Peter Osterlund <petero2@telia.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/block/pktcdvd.c | 89 ++++++++++++-------------------------------------
 include/linux/pktcdvd.h | 10 +++---
 2 files changed, 25 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 68b5957f107c..7cd2973ebb7b 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -569,6 +569,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
 	}
 
 	spin_lock_init(&pkt->lock);
+	bio_list_init(&pkt->orig_bios);
 
 	for (i = 0; i < frames; i++) {
 		struct bio *bio = pkt_bio_alloc(1);
@@ -720,43 +721,6 @@ static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *nod
 	pd->bio_queue_size++;
 }
 
-/*
- * Add a bio to a single linked list defined by its head and tail pointers.
- */
-static void pkt_add_list_last(struct bio *bio, struct bio **list_head, struct bio **list_tail)
-{
-	bio->bi_next = NULL;
-	if (*list_tail) {
-		BUG_ON((*list_head) == NULL);
-		(*list_tail)->bi_next = bio;
-		(*list_tail) = bio;
-	} else {
-		BUG_ON((*list_head) != NULL);
-		(*list_head) = bio;
-		(*list_tail) = bio;
-	}
-}
-
-/*
- * Remove and return the first bio from a single linked list defined by its
- * head and tail pointers.
- */
-static inline struct bio *pkt_get_list_first(struct bio **list_head, struct bio **list_tail)
-{
-	struct bio *bio;
-
-	if (*list_head == NULL)
-		return NULL;
-
-	bio = *list_head;
-	*list_head = bio->bi_next;
-	if (*list_head == NULL)
-		*list_tail = NULL;
-
-	bio->bi_next = NULL;
-	return bio;
-}
-
 /*
  * Send a packet_command to the underlying block device and
  * wait for completion.
@@ -876,13 +840,10 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
 static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
 {
 	spin_lock(&pd->iosched.lock);
-	if (bio_data_dir(bio) == READ) {
-		pkt_add_list_last(bio, &pd->iosched.read_queue,
-				  &pd->iosched.read_queue_tail);
-	} else {
-		pkt_add_list_last(bio, &pd->iosched.write_queue,
-				  &pd->iosched.write_queue_tail);
-	}
+	if (bio_data_dir(bio) == READ)
+		bio_list_add(&pd->iosched.read_queue, bio);
+	else
+		bio_list_add(&pd->iosched.write_queue, bio);
 	spin_unlock(&pd->iosched.lock);
 
 	atomic_set(&pd->iosched.attention, 1);
@@ -917,8 +878,8 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
 		int reads_queued, writes_queued;
 
 		spin_lock(&pd->iosched.lock);
-		reads_queued = (pd->iosched.read_queue != NULL);
-		writes_queued = (pd->iosched.write_queue != NULL);
+		reads_queued = !bio_list_empty(&pd->iosched.read_queue);
+		writes_queued = !bio_list_empty(&pd->iosched.write_queue);
 		spin_unlock(&pd->iosched.lock);
 
 		if (!reads_queued && !writes_queued)
@@ -927,7 +888,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
 		if (pd->iosched.writing) {
 			int need_write_seek = 1;
 			spin_lock(&pd->iosched.lock);
-			bio = pd->iosched.write_queue;
+			bio = bio_list_peek(&pd->iosched.write_queue);
 			spin_unlock(&pd->iosched.lock);
 			if (bio && (bio->bi_sector == pd->iosched.last_write))
 				need_write_seek = 0;
@@ -950,13 +911,10 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
 		}
 
 		spin_lock(&pd->iosched.lock);
-		if (pd->iosched.writing) {
-			bio = pkt_get_list_first(&pd->iosched.write_queue,
-						 &pd->iosched.write_queue_tail);
-		} else {
-			bio = pkt_get_list_first(&pd->iosched.read_queue,
-						 &pd->iosched.read_queue_tail);
-		}
+		if (pd->iosched.writing)
+			bio = bio_list_pop(&pd->iosched.write_queue);
+		else
+			bio = bio_list_pop(&pd->iosched.read_queue);
 		spin_unlock(&pd->iosched.lock);
 
 		if (!bio)
@@ -1114,7 +1072,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 	int f;
 	char written[PACKET_MAX_SIZE];
 
-	BUG_ON(!pkt->orig_bios);
+	BUG_ON(bio_list_empty(&pkt->orig_bios));
 
 	atomic_set(&pkt->io_wait, 0);
 	atomic_set(&pkt->io_errors, 0);
@@ -1124,7 +1082,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 	 */
 	memset(written, 0, sizeof(written));
 	spin_lock(&pkt->lock);
-	for (bio = pkt->orig_bios; bio; bio = bio->bi_next) {
+	bio_list_for_each(bio, &pkt->orig_bios) {
 		int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
 		int num_frames = bio->bi_size / CD_FRAMESIZE;
 		pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
@@ -1363,7 +1321,7 @@ try_next_bio:
 			break;
 		pkt_rbtree_erase(pd, node);
 		spin_lock(&pkt->lock);
-		pkt_add_list_last(bio, &pkt->orig_bios, &pkt->orig_bios_tail);
+		bio_list_add(&pkt->orig_bios, bio);
 		pkt->write_size += bio->bi_size / CD_FRAMESIZE;
 		spin_unlock(&pkt->lock);
 	}
@@ -1409,7 +1367,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 	 */
 	frames_write = 0;
 	spin_lock(&pkt->lock);
-	for (bio = pkt->orig_bios; bio; bio = bio->bi_next) {
+	bio_list_for_each(bio, &pkt->orig_bios) {
 		int segment = bio->bi_idx;
 		int src_offs = 0;
 		int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
@@ -1472,20 +1430,14 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 
 static void pkt_finish_packet(struct packet_data *pkt, int uptodate)
 {
-	struct bio *bio, *next;
+	struct bio *bio;
 
 	if (!uptodate)
 		pkt->cache_valid = 0;
 
 	/* Finish all bios corresponding to this packet */
-	bio = pkt->orig_bios;
-	while (bio) {
-		next = bio->bi_next;
-		bio->bi_next = NULL;
+	while ((bio = bio_list_pop(&pkt->orig_bios)))
 		bio_endio(bio, uptodate ? 0 : -EIO);
-		bio = next;
-	}
-	pkt->orig_bios = pkt->orig_bios_tail = NULL;
 }
 
 static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
@@ -2567,8 +2519,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
 			spin_lock(&pkt->lock);
 			if ((pkt->state == PACKET_WAITING_STATE) ||
 			    (pkt->state == PACKET_READ_WAIT_STATE)) {
-				pkt_add_list_last(bio, &pkt->orig_bios,
-						  &pkt->orig_bios_tail);
+				bio_list_add(&pkt->orig_bios, bio);
 				pkt->write_size += bio->bi_size / CD_FRAMESIZE;
 				if ((pkt->write_size >= pkt->frames) &&
 				    (pkt->state == PACKET_WAITING_STATE)) {
@@ -2898,6 +2849,8 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 
 	spin_lock_init(&pd->lock);
 	spin_lock_init(&pd->iosched.lock);
+	bio_list_init(&pd->iosched.read_queue);
+	bio_list_init(&pd->iosched.write_queue);
 	sprintf(pd->name, DRIVER_NAME"%d", idx);
 	init_waitqueue_head(&pd->wqueue);
 	pd->bio_queue = RB_ROOT;
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 76e5053e1fac..721301b0a908 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -163,10 +163,8 @@ struct packet_iosched
 	atomic_t		attention;	/* Set to non-zero when queue processing is needed */
 	int			writing;	/* Non-zero when writing, zero when reading */
 	spinlock_t		lock;		/* Protecting read/write queue manipulations */
-	struct bio		*read_queue;
-	struct bio		*read_queue_tail;
-	struct bio		*write_queue;
-	struct bio		*write_queue_tail;
+	struct bio_list		read_queue;
+	struct bio_list		write_queue;
 	sector_t		last_write;	/* The sector where the last write ended */
 	int			successive_reads;
 };
@@ -206,8 +204,8 @@ struct packet_data
 	spinlock_t		lock;		/* Lock protecting state transitions and */
 						/* orig_bios list */
 
-	struct bio		*orig_bios;	/* Original bios passed to pkt_make_request */
-	struct bio		*orig_bios_tail;/* that will be handled by this packet */
+	struct bio_list		orig_bios;	/* Original bios passed to pkt_make_request */
+						/* that will be handled by this packet */
 	int			write_size;	/* Total size of all bios in the orig_bios */
 						/* list, measured in number of frames */
 
-- 
cgit v1.2.3


From d62abe563fa4718e7f85f3e871655434db92366d Mon Sep 17 00:00:00 2001
From: Misael Lopez Cruz <x0052729@ti.com>
Date: Tue, 23 Feb 2010 18:10:19 -0600
Subject: OMAP4: PMIC: Add support for twl6030 codec

In order to have TWL6030 CODEC driver as a platform driver, codec data
should be passed through twl_platform_data structure.

For twl6030 audio codec, the following data may be passed:
    - audpwron_gpio: gpio line used to power-up/down the codec. A low-to-high
      transition powers codec up. Setting audpwron_gpio to a negative value
      means that codec will use manual power sequence instead of automatic
      sequence
    - naudint_irq: irq line for audio interrupt. twl6030 drives NAUDINT line
      to low when an interrupt (codec ready, plug insertion/removal, etc) is
      detected

However, codec driver can operate if any or none of them are passed.

Signed-off-by: Misael Lopez Cruz <x0052729@ti.com>
Signed-off-by: Margarita Olaya Cabrera <magi.olaya@ti.com>
Signed-off-by: Jorge Eduardo Candelaria <jorge.candelaria@ti.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/twl-core.c  | 18 +++++++++++++++---
 include/linux/i2c/twl.h |  4 ++++
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 2a7606534196..19a930d06241 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -115,7 +115,8 @@
 #define twl_has_watchdog()        false
 #endif
 
-#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE)
+#if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE) ||\
+	defined(CONFIG_SND_SOC_TWL6030) || defined(CONFIG_SND_SOC_TWL6030_MODULE)
 #define twl_has_codec()	true
 #else
 #define twl_has_codec()	false
@@ -711,8 +712,19 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 			return PTR_ERR(child);
 	}
 
-	if (twl_has_codec() && pdata->codec) {
-		child = add_child(1, "twl4030_codec",
+	if (twl_has_codec() && pdata->codec && twl_class_is_4030()) {
+		sub_chip_id = twl_map[TWL_MODULE_AUDIO_VOICE].sid;
+		child = add_child(sub_chip_id, "twl4030_codec",
+				pdata->codec, sizeof(*pdata->codec),
+				false, 0, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* Phoenix*/
+	if (twl_has_codec() && pdata->codec && twl_class_is_6030()) {
+		sub_chip_id = twl_map[TWL_MODULE_AUDIO_VOICE].sid;
+		child = add_child(sub_chip_id, "twl6030_codec",
 				pdata->codec, sizeof(*pdata->codec),
 				false, 0, 0);
 		if (IS_ERR(child))
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index bf1c5be1f5b6..7897f3096560 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -547,6 +547,10 @@ struct twl4030_codec_data {
 	unsigned int	audio_mclk;
 	struct twl4030_codec_audio_data		*audio;
 	struct twl4030_codec_vibra_data		*vibra;
+
+	/* twl6030 */
+	int audpwron_gpio;      /* audio power-on gpio */
+	int naudint_irq;        /* audio interrupt */
 };
 
 struct twl4030_platform_data {
-- 
cgit v1.2.3


From 72b2b1dd77e8feb0b7c0b26dee58f2a1e2c9828c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Feb 2010 18:32:59 +0100
Subject: netfilter: xtables: replace XT_ENTRY_ITERATE macro

The macro is replaced by a list.h-like foreach loop. This makes
the code much more inspectable.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h        |   9 ++
 include/linux/netfilter_arp/arp_tables.h  |  10 +-
 include/linux/netfilter_ipv4/ip_tables.h  |  11 +-
 include/linux/netfilter_ipv6/ip6_tables.h |  10 +-
 net/ipv4/netfilter/arp_tables.c           | 151 ++++++++++++++++++----------
 net/ipv4/netfilter/ip_tables.c            | 160 +++++++++++++++++++-----------
 net/ipv6/netfilter/ip6_tables.c           | 160 +++++++++++++++++++-----------
 7 files changed, 321 insertions(+), 190 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index a18119fb88f0..9df3f5a8f9f7 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -139,6 +139,7 @@ struct xt_counters_info {
 	__ret;							\
 })
 
+#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define XT_ENTRY_ITERATE_CONTINUE(type, entries, size, n, fn, args...) \
 ({								\
@@ -163,6 +164,14 @@ struct xt_counters_info {
 #define XT_ENTRY_ITERATE(type, entries, size, fn, args...) \
 	XT_ENTRY_ITERATE_CONTINUE(type, entries, size, 0, fn, args)
 
+#endif /* !__KERNEL__ */
+
+/* pos is normally a struct ipt_entry/ip6t_entry/etc. */
+#define xt_entry_foreach(pos, ehead, esize) \
+	for ((pos) = (typeof(pos))(ehead); \
+	     (pos) < (typeof(pos))((char *)(ehead) + (esize)); \
+	     (pos) = (typeof(pos))((char *)(pos) + (pos)->next_offset))
+
 #ifdef __KERNEL__
 
 #include <linux/netdevice.h>
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 0b33980611b2..e9948c0560f6 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -211,9 +211,11 @@ static __inline__ struct arpt_entry_target *arpt_get_target(struct arpt_entry *e
 	return (void *)e + e->target_offset;
 }
 
+#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \
 	XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args)
+#endif
 
 /*
  *	Main firewall chains definitions and global var's definitions.
@@ -291,14 +293,6 @@ compat_arpt_get_target(struct compat_arpt_entry *e)
 
 #define COMPAT_ARPT_ALIGN(s)	COMPAT_XT_ALIGN(s)
 
-/* fn returns 0 to continue iteration */
-#define COMPAT_ARPT_ENTRY_ITERATE(entries, size, fn, args...) \
-	XT_ENTRY_ITERATE(struct compat_arpt_entry, entries, size, fn, ## args)
-
-#define COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \
-	XT_ENTRY_ITERATE_CONTINUE(struct compat_arpt_entry, entries, size, n, \
-				  fn, ## args)
-
 #endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _ARPTABLES_H */
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 364973b42133..5b20ae724b41 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -227,9 +227,11 @@ ipt_get_target(struct ipt_entry *e)
 #define IPT_MATCH_ITERATE(e, fn, args...) \
 	XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args)
 
+#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define IPT_ENTRY_ITERATE(entries, size, fn, args...) \
 	XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args)
+#endif
 
 /*
  *	Main firewall chains definitions and global var's definitions.
@@ -317,15 +319,6 @@ compat_ipt_get_target(struct compat_ipt_entry *e)
 #define COMPAT_IPT_MATCH_ITERATE(e, fn, args...) \
 	XT_MATCH_ITERATE(struct compat_ipt_entry, e, fn, ## args)
 
-/* fn returns 0 to continue iteration */
-#define COMPAT_IPT_ENTRY_ITERATE(entries, size, fn, args...) \
-	XT_ENTRY_ITERATE(struct compat_ipt_entry, entries, size, fn, ## args)
-
-/* fn returns 0 to continue iteration */
-#define COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \
-	XT_ENTRY_ITERATE_CONTINUE(struct compat_ipt_entry, entries, size, n, \
-				  fn, ## args)
-
 #endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IPTABLES_H */
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 8031eb486a10..8bb3f5ba5ff2 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -284,9 +284,11 @@ ip6t_get_target(struct ip6t_entry *e)
 #define IP6T_MATCH_ITERATE(e, fn, args...) \
 	XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args)
 
+#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \
 	XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args)
+#endif
 
 /*
  *	Main firewall chains definitions and global var's definitions.
@@ -345,14 +347,6 @@ compat_ip6t_get_target(struct compat_ip6t_entry *e)
 #define COMPAT_IP6T_MATCH_ITERATE(e, fn, args...) \
 	XT_MATCH_ITERATE(struct compat_ip6t_entry, e, fn, ## args)
 
-/* fn returns 0 to continue iteration */
-#define COMPAT_IP6T_ENTRY_ITERATE(entries, size, fn, args...) \
-	XT_ENTRY_ITERATE(struct compat_ip6t_entry, entries, size, fn, ## args)
-
-#define COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \
-	XT_ENTRY_ITERATE_CONTINUE(struct compat_ip6t_entry, entries, size, n, \
-				  fn, ## args)
-
 #endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IP6_TABLES_H */
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 4db5c1ece0f9..f7338869fc4c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -641,8 +641,9 @@ static int translate_table(const char *name,
 			   const unsigned int *hook_entries,
 			   const unsigned int *underflows)
 {
+	struct arpt_entry *iter;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 	newinfo->size = size;
 	newinfo->number = number;
@@ -657,12 +658,13 @@ static int translate_table(const char *name,
 	i = 0;
 
 	/* Walk through entries, checking offsets. */
-	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
-				 check_entry_size_and_hooks,
-				 newinfo,
-				 entry0,
-				 entry0 + size,
-				 hook_entries, underflows, valid_hooks, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+		      entry0 + size, hook_entries, underflows,
+		      valid_hooks, &i);
+		if (ret != 0)
+			break;
+	}
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
 		return ret;
@@ -697,12 +699,16 @@ static int translate_table(const char *name,
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
-				 find_check_entry, name, size, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = find_check_entry(iter, name, size, &i);
+		if (ret != 0)
+			break;
+	}
 
 	if (ret != 0) {
-		ARPT_ENTRY_ITERATE(entry0, newinfo->size,
-				cleanup_entry, &i);
+		xt_entry_foreach(iter, entry0, newinfo->size)
+			if (cleanup_entry(iter, &i) != 0)
+				break;
 		return ret;
 	}
 
@@ -739,6 +745,7 @@ static inline int set_entry_to_counter(const struct arpt_entry *e,
 static void get_counters(const struct xt_table_info *t,
 			 struct xt_counters counters[])
 {
+	struct arpt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
 	unsigned int curcpu;
@@ -754,22 +761,18 @@ static void get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	ARPT_ENTRY_ITERATE(t->entries[curcpu],
-			   t->size,
-			   set_entry_to_counter,
-			   counters,
-			   &i);
+	xt_entry_foreach(iter, t->entries[curcpu], t->size)
+		if (set_entry_to_counter(iter, counters, &i) != 0)
+			break;
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		ARPT_ENTRY_ITERATE(t->entries[cpu],
-				   t->size,
-				   add_entry_to_counter,
-				   counters,
-				   &i);
+		xt_entry_foreach(iter, t->entries[cpu], t->size)
+			if (add_entry_to_counter(iter, counters, &i) != 0)
+				break;
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -899,7 +902,9 @@ static int compat_calc_entry(const struct arpt_entry *e,
 static int compat_table_info(const struct xt_table_info *info,
 			     struct xt_table_info *newinfo)
 {
+	struct arpt_entry *iter;
 	void *loc_cpu_entry;
+	int ret = 0;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -908,9 +913,12 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
-	return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
-				  compat_calc_entry, info, loc_cpu_entry,
-				  newinfo);
+	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+		if (ret != 0)
+			break;
+	}
+	return ret;
 }
 #endif
 
@@ -1025,6 +1033,7 @@ static int __do_replace(struct net *net, const char *name,
 	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
 	void *loc_cpu_old_entry;
+	struct arpt_entry *iter;
 
 	ret = 0;
 	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
@@ -1068,8 +1077,9 @@ static int __do_replace(struct net *net, const char *name,
 
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			   NULL);
+	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+		if (cleanup_entry(iter, NULL) != 0)
+			break;
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
@@ -1095,6 +1105,7 @@ static int do_replace(struct net *net, const void __user *user,
 	struct arpt_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct arpt_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1130,7 +1141,9 @@ static int do_replace(struct net *net, const void __user *user,
 	return 0;
 
  free_newinfo_untrans:
-	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1163,6 +1176,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 	const struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
+	struct arpt_entry *iter;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1220,11 +1234,9 @@ static int do_add_counters(struct net *net, const void __user *user,
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
 	xt_info_wrlock(curcpu);
-	ARPT_ENTRY_ITERATE(loc_cpu_entry,
-			   private->size,
-			   add_counter_to_entry,
-			   paddc,
-			   &i);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (add_counter_to_entry(iter, paddc, &i) != 0)
+			break;
 	xt_info_wrunlock(curcpu);
  unlock_up_free:
 	local_bh_enable();
@@ -1388,8 +1400,10 @@ static int translate_compat_table(const char *name,
 	unsigned int i, j;
 	struct xt_table_info *newinfo, *info;
 	void *pos, *entry0, *entry1;
+	struct compat_arpt_entry *iter0;
+	struct arpt_entry *iter1;
 	unsigned int size;
-	int ret;
+	int ret = 0;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1406,11 +1420,13 @@ static int translate_compat_table(const char *name,
 	j = 0;
 	xt_compat_lock(NFPROTO_ARP);
 	/* Walk through entries, checking offsets. */
-	ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
-					check_compat_entry_size_and_hooks,
-					info, &size, entry0,
-					entry0 + total_size,
-					hook_entries, underflows, &j, name);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+		      entry0, entry0 + total_size, hook_entries, underflows,
+		      &j, name);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto out_unlock;
 
@@ -1451,9 +1467,12 @@ static int translate_compat_table(const char *name,
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
 	size = total_size;
-	ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
-					compat_copy_entry_from_user,
-					&pos, &size, name, newinfo, entry1);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = compat_copy_entry_from_user(iter0, &pos,
+		      &size, name, newinfo, entry1);
+		if (ret != 0)
+			break;
+	}
 	xt_compat_flush_offsets(NFPROTO_ARP);
 	xt_compat_unlock(NFPROTO_ARP);
 	if (ret)
@@ -1464,13 +1483,28 @@ static int translate_compat_table(const char *name,
 		goto free_newinfo;
 
 	i = 0;
-	ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				 name, &i);
+	xt_entry_foreach(iter1, entry1, newinfo->size) {
+		ret = compat_check_entry(iter1, name, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret) {
+		/*
+		 * The first i matches need cleanup_entry (calls ->destroy)
+		 * because they had called ->check already. The other j-i
+		 * entries need only release.
+		 */
+		int skip = i;
 		j -= i;
-		COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
-						   compat_release_entry, &j);
-		ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		xt_entry_foreach(iter0, entry0, newinfo->size) {
+			if (skip-- > 0)
+				continue;
+			if (compat_release_entry(iter0, &j) != 0)
+				break;
+		}
+		xt_entry_foreach(iter1, entry1, newinfo->size)
+			if (cleanup_entry(iter1, &i) != 0)
+				break;
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1488,7 +1522,9 @@ static int translate_compat_table(const char *name,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	xt_entry_foreach(iter0, entry0, total_size)
+		if (compat_release_entry(iter0, &j) != 0)
+			break;
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(NFPROTO_ARP);
@@ -1515,6 +1551,7 @@ static int compat_do_replace(struct net *net, void __user *user,
 	struct compat_arpt_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct arpt_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1552,7 +1589,9 @@ static int compat_do_replace(struct net *net, void __user *user,
 	return 0;
 
  free_newinfo_untrans:
-	ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1636,6 +1675,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
 	int ret = 0;
 	void *loc_cpu_entry;
 	unsigned int i = 0;
+	struct arpt_entry *iter;
 
 	counters = alloc_counters(table);
 	if (IS_ERR(counters))
@@ -1645,9 +1685,12 @@ static int compat_copy_entries_to_user(unsigned int total_size,
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	pos = userptr;
 	size = total_size;
-	ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
-				 compat_copy_entry_to_user,
-				 &pos, &size, counters, &i);
+	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+		ret = compat_copy_entry_to_user(iter, &pos,
+		      &size, counters, &i);
+		if (ret != 0)
+			break;
+	}
 	vfree(counters);
 	return ret;
 }
@@ -1843,13 +1886,15 @@ void arpt_unregister_table(struct xt_table *table)
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
 	struct module *table_owner = table->me;
+	struct arpt_entry *iter;
 
 	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
-			   cleanup_entry, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (cleanup_entry(iter, NULL) != 0)
+			break;
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e94c18bdfc68..b43280aad8a2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -288,6 +288,7 @@ static void trace_packet(const struct sk_buff *skb,
 	const void *table_base;
 	const struct ipt_entry *root;
 	const char *hookname, *chainname, *comment;
+	const struct ipt_entry *iter;
 	unsigned int rulenum = 0;
 
 	table_base = private->entries[smp_processor_id()];
@@ -296,10 +297,10 @@ static void trace_packet(const struct sk_buff *skb,
 	hookname = chainname = hooknames[hook];
 	comment = comments[NF_IP_TRACE_COMMENT_RULE];
 
-	IPT_ENTRY_ITERATE(root,
-			  private->size - private->hook_entry[hook],
-			  get_chainname_rulenum,
-			  e, hookname, &chainname, &comment, &rulenum);
+	xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+		if (get_chainname_rulenum(iter, e, hookname,
+		    &chainname, &comment, &rulenum) != 0)
+			break;
 
 	nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
 		      "TRACE: %s:%s:%s:%u ",
@@ -826,8 +827,9 @@ translate_table(struct net *net,
 		const unsigned int *hook_entries,
 		const unsigned int *underflows)
 {
+	struct ipt_entry *iter;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 	newinfo->size = size;
 	newinfo->number = number;
@@ -841,12 +843,13 @@ translate_table(struct net *net,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry_size_and_hooks,
-				newinfo,
-				entry0,
-				entry0 + size,
-				hook_entries, underflows, valid_hooks, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+		      entry0 + size, hook_entries, underflows,
+		      valid_hooks, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		return ret;
 
@@ -878,12 +881,16 @@ translate_table(struct net *net,
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				find_check_entry, net, name, size, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = find_check_entry(iter, net, name, size, &i);
+		if (ret != 0)
+			break;
+	}
 
 	if (ret != 0) {
-		IPT_ENTRY_ITERATE(entry0, newinfo->size,
-				cleanup_entry, net, &i);
+		xt_entry_foreach(iter, entry0, newinfo->size)
+			if (cleanup_entry(iter, net, &i) != 0)
+				break;
 		return ret;
 	}
 
@@ -923,6 +930,7 @@ static void
 get_counters(const struct xt_table_info *t,
 	     struct xt_counters counters[])
 {
+	struct ipt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
 	unsigned int curcpu;
@@ -938,22 +946,18 @@ get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	IPT_ENTRY_ITERATE(t->entries[curcpu],
-			  t->size,
-			  set_entry_to_counter,
-			  counters,
-			  &i);
+	xt_entry_foreach(iter, t->entries[curcpu], t->size)
+		if (set_entry_to_counter(iter, counters, &i) != 0)
+			break;
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		IPT_ENTRY_ITERATE(t->entries[cpu],
-				  t->size,
-				  add_entry_to_counter,
-				  counters,
-				  &i);
+		xt_entry_foreach(iter, t->entries[cpu], t->size)
+			if (add_entry_to_counter(iter, counters, &i) != 0)
+				break;
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -1111,7 +1115,9 @@ static int compat_calc_entry(const struct ipt_entry *e,
 static int compat_table_info(const struct xt_table_info *info,
 			     struct xt_table_info *newinfo)
 {
+	struct ipt_entry *iter;
 	void *loc_cpu_entry;
+	int ret = 0;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -1120,9 +1126,12 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
-	return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
-				 compat_calc_entry, info, loc_cpu_entry,
-				 newinfo);
+	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+		if (ret != 0)
+			break;
+	}
+	return ret;
 }
 #endif
 
@@ -1236,6 +1245,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
 	void *loc_cpu_old_entry;
+	struct ipt_entry *iter;
 
 	ret = 0;
 	counters = vmalloc(num_counters * sizeof(struct xt_counters));
@@ -1278,8 +1288,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			  net, NULL);
+	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
+
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1304,6 +1316,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	struct ipt_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct ipt_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1339,7 +1352,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1373,6 +1388,7 @@ do_add_counters(struct net *net, const void __user *user,
 	const struct xt_table_info *private;
 	int ret = 0;
 	void *loc_cpu_entry;
+	struct ipt_entry *iter;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1430,11 +1446,9 @@ do_add_counters(struct net *net, const void __user *user,
 	curcpu = smp_processor_id();
 	loc_cpu_entry = private->entries[curcpu];
 	xt_info_wrlock(curcpu);
-	IPT_ENTRY_ITERATE(loc_cpu_entry,
-			  private->size,
-			  add_counter_to_entry,
-			  paddc,
-			  &i);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (add_counter_to_entry(iter, paddc, &i) != 0)
+			break;
 	xt_info_wrunlock(curcpu);
  unlock_up_free:
 	local_bh_enable();
@@ -1720,8 +1734,10 @@ translate_compat_table(struct net *net,
 	unsigned int i, j;
 	struct xt_table_info *newinfo, *info;
 	void *pos, *entry0, *entry1;
+	struct compat_ipt_entry *iter0;
+	struct ipt_entry *iter1;
 	unsigned int size;
-	int ret;
+	int ret = 0;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1738,11 +1754,13 @@ translate_compat_table(struct net *net,
 	j = 0;
 	xt_compat_lock(AF_INET);
 	/* Walk through entries, checking offsets. */
-	ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
-				       check_compat_entry_size_and_hooks,
-				       info, &size, entry0,
-				       entry0 + total_size,
-				       hook_entries, underflows, &j, name);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+		      entry0, entry0 + total_size, hook_entries, underflows,
+		      &j, name);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto out_unlock;
 
@@ -1783,9 +1801,12 @@ translate_compat_table(struct net *net,
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
 	size = total_size;
-	ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
-				       compat_copy_entry_from_user,
-				       &pos, &size, name, newinfo, entry1);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = compat_copy_entry_from_user(iter0, &pos,
+		      &size, name, newinfo, entry1);
+		if (ret != 0)
+			break;
+	}
 	xt_compat_flush_offsets(AF_INET);
 	xt_compat_unlock(AF_INET);
 	if (ret)
@@ -1796,13 +1817,28 @@ translate_compat_table(struct net *net,
 		goto free_newinfo;
 
 	i = 0;
-	ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				net, name, &i);
+	xt_entry_foreach(iter1, entry1, newinfo->size) {
+		ret = compat_check_entry(iter1, net, name, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret) {
+		/*
+		 * The first i matches need cleanup_entry (calls ->destroy)
+		 * because they had called ->check already. The other j-i
+		 * entries need only release.
+		 */
+		int skip = i;
 		j -= i;
-		COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
-						  compat_release_entry, &j);
-		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
+		xt_entry_foreach(iter0, entry0, newinfo->size) {
+			if (skip-- > 0)
+				continue;
+			if (compat_release_entry(iter0, &i) != 0)
+				break;
+		}
+		xt_entry_foreach(iter1, entry1, newinfo->size)
+			if (cleanup_entry(iter1, net, &i) != 0)
+				break;
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1820,7 +1856,9 @@ translate_compat_table(struct net *net,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	xt_entry_foreach(iter0, entry0, total_size)
+		if (compat_release_entry(iter0, &j) != 0)
+			break;
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(AF_INET);
@@ -1835,6 +1873,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	struct compat_ipt_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct ipt_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1873,7 +1912,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1922,6 +1963,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	int ret = 0;
 	const void *loc_cpu_entry;
 	unsigned int i = 0;
+	struct ipt_entry *iter;
 
 	counters = alloc_counters(table);
 	if (IS_ERR(counters))
@@ -1934,9 +1976,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	pos = userptr;
 	size = total_size;
-	ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
-				compat_copy_entry_to_user,
-				&pos, &size, counters, &i);
+	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+		ret = compat_copy_entry_to_user(iter, &pos,
+		      &size, counters, &i);
+		if (ret != 0)
+			break;
+	}
 
 	vfree(counters);
 	return ret;
@@ -2137,12 +2182,15 @@ void ipt_unregister_table(struct net *net, struct xt_table *table)
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
 	struct module *table_owner = table->me;
+	struct ipt_entry *iter;
 
 	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4185099c2943..23926e38d36b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -318,6 +318,7 @@ static void trace_packet(const struct sk_buff *skb,
 	const void *table_base;
 	const struct ip6t_entry *root;
 	const char *hookname, *chainname, *comment;
+	const struct ip6t_entry *iter;
 	unsigned int rulenum = 0;
 
 	table_base = private->entries[smp_processor_id()];
@@ -326,10 +327,10 @@ static void trace_packet(const struct sk_buff *skb,
 	hookname = chainname = hooknames[hook];
 	comment = comments[NF_IP6_TRACE_COMMENT_RULE];
 
-	IP6T_ENTRY_ITERATE(root,
-			   private->size - private->hook_entry[hook],
-			   get_chainname_rulenum,
-			   e, hookname, &chainname, &comment, &rulenum);
+	xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+		if (get_chainname_rulenum(iter, e, hookname,
+		    &chainname, &comment, &rulenum) != 0)
+			break;
 
 	nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
 		      "TRACE: %s:%s:%s:%u ",
@@ -857,8 +858,9 @@ translate_table(struct net *net,
 		const unsigned int *hook_entries,
 		const unsigned int *underflows)
 {
+	struct ip6t_entry *iter;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 	newinfo->size = size;
 	newinfo->number = number;
@@ -872,12 +874,13 @@ translate_table(struct net *net,
 	duprintf("translate_table: size %u\n", newinfo->size);
 	i = 0;
 	/* Walk through entries, checking offsets. */
-	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				check_entry_size_and_hooks,
-				newinfo,
-				entry0,
-				entry0 + size,
-				hook_entries, underflows, valid_hooks, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+		      entry0 + size, hook_entries, underflows,
+		      valid_hooks, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		return ret;
 
@@ -909,12 +912,16 @@ translate_table(struct net *net,
 
 	/* Finally, each sanity check must pass */
 	i = 0;
-	ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				find_check_entry, net, name, size, &i);
+	xt_entry_foreach(iter, entry0, newinfo->size) {
+		ret = find_check_entry(iter, net, name, size, &i);
+		if (ret != 0)
+			break;
+	}
 
 	if (ret != 0) {
-		IP6T_ENTRY_ITERATE(entry0, newinfo->size,
-				   cleanup_entry, net, &i);
+		xt_entry_foreach(iter, entry0, newinfo->size)
+			if (cleanup_entry(iter, net, &i) != 0)
+				break;
 		return ret;
 	}
 
@@ -954,6 +961,7 @@ static void
 get_counters(const struct xt_table_info *t,
 	     struct xt_counters counters[])
 {
+	struct ip6t_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
 	unsigned int curcpu;
@@ -969,22 +977,18 @@ get_counters(const struct xt_table_info *t,
 	curcpu = smp_processor_id();
 
 	i = 0;
-	IP6T_ENTRY_ITERATE(t->entries[curcpu],
-			   t->size,
-			   set_entry_to_counter,
-			   counters,
-			   &i);
+	xt_entry_foreach(iter, t->entries[curcpu], t->size)
+		if (set_entry_to_counter(iter, counters, &i) != 0)
+			break;
 
 	for_each_possible_cpu(cpu) {
 		if (cpu == curcpu)
 			continue;
 		i = 0;
 		xt_info_wrlock(cpu);
-		IP6T_ENTRY_ITERATE(t->entries[cpu],
-				  t->size,
-				  add_entry_to_counter,
-				  counters,
-				  &i);
+		xt_entry_foreach(iter, t->entries[cpu], t->size)
+			if (add_entry_to_counter(iter, counters, &i) != 0)
+				break;
 		xt_info_wrunlock(cpu);
 	}
 	local_bh_enable();
@@ -1142,7 +1146,9 @@ static int compat_calc_entry(const struct ip6t_entry *e,
 static int compat_table_info(const struct xt_table_info *info,
 			     struct xt_table_info *newinfo)
 {
+	struct ip6t_entry *iter;
 	void *loc_cpu_entry;
+	int ret = 0;
 
 	if (!newinfo || !info)
 		return -EINVAL;
@@ -1151,9 +1157,12 @@ static int compat_table_info(const struct xt_table_info *info,
 	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
 	newinfo->initial_entries = 0;
 	loc_cpu_entry = info->entries[raw_smp_processor_id()];
-	return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size,
-				  compat_calc_entry, info, loc_cpu_entry,
-				  newinfo);
+	xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+		ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+		if (ret != 0)
+			break;
+	}
+	return ret;
 }
 #endif
 
@@ -1267,6 +1276,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct xt_table_info *oldinfo;
 	struct xt_counters *counters;
 	const void *loc_cpu_old_entry;
+	struct ip6t_entry *iter;
 
 	ret = 0;
 	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
@@ -1310,8 +1320,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
-			   net, NULL);
+	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
+
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
 			 sizeof(struct xt_counters) * num_counters) != 0)
@@ -1336,6 +1348,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	struct ip6t_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct ip6t_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1371,7 +1384,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1405,6 +1420,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	const struct xt_table_info *private;
 	int ret = 0;
 	const void *loc_cpu_entry;
+	struct ip6t_entry *iter;
 #ifdef CONFIG_COMPAT
 	struct compat_xt_counters_info compat_tmp;
 
@@ -1463,11 +1479,9 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	curcpu = smp_processor_id();
 	xt_info_wrlock(curcpu);
 	loc_cpu_entry = private->entries[curcpu];
-	IP6T_ENTRY_ITERATE(loc_cpu_entry,
-			  private->size,
-			  add_counter_to_entry,
-			  paddc,
-			  &i);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (add_counter_to_entry(iter, paddc, &i) != 0)
+			break;
 	xt_info_wrunlock(curcpu);
 
  unlock_up_free:
@@ -1753,8 +1767,10 @@ translate_compat_table(struct net *net,
 	unsigned int i, j;
 	struct xt_table_info *newinfo, *info;
 	void *pos, *entry0, *entry1;
+	struct compat_ip6t_entry *iter0;
+	struct ip6t_entry *iter1;
 	unsigned int size;
-	int ret;
+	int ret = 0;
 
 	info = *pinfo;
 	entry0 = *pentry0;
@@ -1771,11 +1787,13 @@ translate_compat_table(struct net *net,
 	j = 0;
 	xt_compat_lock(AF_INET6);
 	/* Walk through entries, checking offsets. */
-	ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
-					check_compat_entry_size_and_hooks,
-					info, &size, entry0,
-					entry0 + total_size,
-					hook_entries, underflows, &j, name);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+		      entry0, entry0 + total_size, hook_entries, underflows,
+		      &j, name);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto out_unlock;
 
@@ -1816,9 +1834,12 @@ translate_compat_table(struct net *net,
 	entry1 = newinfo->entries[raw_smp_processor_id()];
 	pos = entry1;
 	size = total_size;
-	ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size,
-					compat_copy_entry_from_user,
-					&pos, &size, name, newinfo, entry1);
+	xt_entry_foreach(iter0, entry0, total_size) {
+		ret = compat_copy_entry_from_user(iter0, &pos,
+		      &size, name, newinfo, entry1);
+		if (ret != 0)
+			break;
+	}
 	xt_compat_flush_offsets(AF_INET6);
 	xt_compat_unlock(AF_INET6);
 	if (ret)
@@ -1829,13 +1850,28 @@ translate_compat_table(struct net *net,
 		goto free_newinfo;
 
 	i = 0;
-	ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-				 net, name, &i);
+	xt_entry_foreach(iter1, entry1, newinfo->size) {
+		ret = compat_check_entry(iter1, net, name, &i);
+		if (ret != 0)
+			break;
+	}
 	if (ret) {
+		/*
+		 * The first i matches need cleanup_entry (calls ->destroy)
+		 * because they had called ->check already. The other j-i
+		 * entries need only release.
+		 */
+		int skip = i;
 		j -= i;
-		COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
-						   compat_release_entry, &j);
-		IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i);
+		xt_entry_foreach(iter0, entry0, newinfo->size) {
+			if (skip-- > 0)
+				continue;
+			if (compat_release_entry(iter0, &j) != 0)
+				break;
+		}
+		xt_entry_foreach(iter1, entry1, newinfo->size)
+			if (cleanup_entry(iter1, net, &i) != 0)
+				break;
 		xt_free_table_info(newinfo);
 		return ret;
 	}
@@ -1853,7 +1889,9 @@ translate_compat_table(struct net *net,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+	xt_entry_foreach(iter0, entry0, total_size)
+		if (compat_release_entry(iter0, &j) != 0)
+			break;
 	return ret;
 out_unlock:
 	xt_compat_flush_offsets(AF_INET6);
@@ -1868,6 +1906,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	struct compat_ip6t_replace tmp;
 	struct xt_table_info *newinfo;
 	void *loc_cpu_entry;
+	struct ip6t_entry *iter;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
 		return -EFAULT;
@@ -1906,7 +1945,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 	return 0;
 
  free_newinfo_untrans:
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1955,6 +1996,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	int ret = 0;
 	const void *loc_cpu_entry;
 	unsigned int i = 0;
+	struct ip6t_entry *iter;
 
 	counters = alloc_counters(table);
 	if (IS_ERR(counters))
@@ -1967,9 +2009,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
 	pos = userptr;
 	size = total_size;
-	ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size,
-				 compat_copy_entry_to_user,
-				 &pos, &size, counters, &i);
+	xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+		ret = compat_copy_entry_to_user(iter, &pos,
+		      &size, counters, &i);
+		if (ret != 0)
+			break;
+	}
 
 	vfree(counters);
 	return ret;
@@ -2169,12 +2214,15 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table)
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
 	struct module *table_owner = table->me;
+	struct ip6t_entry *iter;
 
 	private = xt_unregister_table(table);
 
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries[raw_smp_processor_id()];
-	IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL);
+	xt_entry_foreach(iter, loc_cpu_entry, private->size)
+		if (cleanup_entry(iter, net, NULL) != 0)
+			break;
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
-- 
cgit v1.2.3


From dcea992aca82cb08b4674c4c783e325835408d1e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Feb 2010 18:34:48 +0100
Subject: netfilter: xtables: replace XT_MATCH_ITERATE macro

The macro is replaced by a list.h-like foreach loop. This makes
the code more inspectable.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h        | 10 +++-
 include/linux/netfilter_ipv4/ip_tables.h  |  6 +--
 include/linux/netfilter_ipv6/ip6_tables.h |  6 +--
 net/ipv4/netfilter/ip_tables.c            | 78 ++++++++++++++++++++++++-------
 net/ipv6/netfilter/ip6_tables.c           | 78 ++++++++++++++++++++++++-------
 net/netfilter/xt_TCPMSS.c                 | 12 +++--
 6 files changed, 141 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9df3f5a8f9f7..84c7c928e9eb 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -120,6 +120,7 @@ struct xt_counters_info {
 
 #define XT_INV_PROTO		0x40	/* Invert the sense of PROTO. */
 
+#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define XT_MATCH_ITERATE(type, e, fn, args...)			\
 ({								\
@@ -139,7 +140,6 @@ struct xt_counters_info {
 	__ret;							\
 })
 
-#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define XT_ENTRY_ITERATE_CONTINUE(type, entries, size, n, fn, args...) \
 ({								\
@@ -172,6 +172,14 @@ struct xt_counters_info {
 	     (pos) < (typeof(pos))((char *)(ehead) + (esize)); \
 	     (pos) = (typeof(pos))((char *)(pos) + (pos)->next_offset))
 
+/* can only be xt_entry_match, so no use of typeof here */
+#define xt_ematch_foreach(pos, entry) \
+	for ((pos) = (struct xt_entry_match *)entry->elems; \
+	     (pos) < (struct xt_entry_match *)((char *)(entry) + \
+	             (entry)->target_offset); \
+	     (pos) = (struct xt_entry_match *)((char *)(pos) + \
+	             (pos)->u.match_size))
+
 #ifdef __KERNEL__
 
 #include <linux/netdevice.h>
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 5b20ae724b41..704a7b6e8169 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -223,11 +223,11 @@ ipt_get_target(struct ipt_entry *e)
 	return (void *)e + e->target_offset;
 }
 
+#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define IPT_MATCH_ITERATE(e, fn, args...) \
 	XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args)
 
-#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define IPT_ENTRY_ITERATE(entries, size, fn, args...) \
 	XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args)
@@ -315,10 +315,6 @@ compat_ipt_get_target(struct compat_ipt_entry *e)
 
 #define COMPAT_IPT_ALIGN(s) 	COMPAT_XT_ALIGN(s)
 
-/* fn returns 0 to continue iteration */
-#define COMPAT_IPT_MATCH_ITERATE(e, fn, args...) \
-	XT_MATCH_ITERATE(struct compat_ipt_entry, e, fn, ## args)
-
 #endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IPTABLES_H */
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 8bb3f5ba5ff2..e5ba03d783c6 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -280,11 +280,11 @@ ip6t_get_target(struct ip6t_entry *e)
 	return (void *)e + e->target_offset;
 }
 
+#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define IP6T_MATCH_ITERATE(e, fn, args...) \
 	XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args)
 
-#ifndef __KERNEL__
 /* fn returns 0 to continue iteration */
 #define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \
 	XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args)
@@ -343,10 +343,6 @@ compat_ip6t_get_target(struct compat_ip6t_entry *e)
 
 #define COMPAT_IP6T_ALIGN(s)	COMPAT_XT_ALIGN(s)
 
-/* fn returns 0 to continue iteration */
-#define COMPAT_IP6T_MATCH_ITERATE(e, fn, args...) \
-	XT_MATCH_ITERATE(struct compat_ip6t_entry, e, fn, ## args)
-
 #endif /* CONFIG_COMPAT */
 #endif /*__KERNEL__*/
 #endif /* _IP6_TABLES_H */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 9c8aa394c51c..3a7fc732b918 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -366,16 +366,21 @@ ipt_do_table(struct sk_buff *skb,
 
 	do {
 		const struct ipt_entry_target *t;
+		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
 		if (!ip_packet_match(ip, indev, outdev,
-		    &e->ip, mtpar.fragoff) ||
-		    IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+		    &e->ip, mtpar.fragoff)) {
+ no_match:
 			e = ipt_next_entry(e);
 			continue;
 		}
 
+		xt_ematch_foreach(ematch, e)
+			if (do_match(ematch, skb, &mtpar) != 0)
+				goto no_match;
+
 		ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
 
 		t = ipt_get_target(e);
@@ -686,6 +691,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	int ret;
 	unsigned int j;
 	struct xt_mtchk_param mtpar;
+	struct xt_entry_match *ematch;
 
 	ret = check_entry(e, name);
 	if (ret)
@@ -697,7 +703,11 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV4;
-	ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = find_check_match(ematch, &mtpar, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto cleanup_matches;
 
@@ -720,7 +730,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, &j) != 0)
+			break;
 	return ret;
 }
 
@@ -791,9 +803,12 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
 	struct ipt_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	IPT_MATCH_ITERATE(e, cleanup_match, net, NULL);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, NULL) != 0)
+			break;
 	t = ipt_get_target(e);
 
 	par.net      = net;
@@ -1060,13 +1075,16 @@ static int compat_calc_entry(const struct ipt_entry *e,
 			     const struct xt_table_info *info,
 			     const void *base, struct xt_table_info *newinfo)
 {
+	const struct xt_entry_match *ematch;
 	const struct ipt_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
 	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - base;
-	IPT_MATCH_ITERATE(e, compat_calc_match, &off);
+	xt_ematch_foreach(ematch, e)
+		if (compat_calc_match(ematch, &off) != 0)
+			break;
 	t = ipt_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
@@ -1441,7 +1459,8 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 	struct compat_ipt_entry __user *ce;
 	u_int16_t target_offset, next_offset;
 	compat_uint_t origsize;
-	int ret;
+	const struct xt_entry_match *ematch;
+	int ret = 0;
 
 	origsize = *size;
 	ce = (struct compat_ipt_entry __user *)*dstptr;
@@ -1453,7 +1472,11 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
 	*dstptr += sizeof(struct compat_ipt_entry);
 	*size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 
-	ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_to_user(ematch, dstptr, size);
+		if (ret != 0)
+			break;
+	}
 	target_offset = e->target_offset - (origsize - *size);
 	if (ret)
 		return ret;
@@ -1505,9 +1528,12 @@ compat_release_match(struct ipt_entry_match *m, unsigned int *i)
 static void compat_release_entry(struct compat_ipt_entry *e)
 {
 	struct ipt_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
+	xt_ematch_foreach(ematch, e)
+		if (compat_release_match(ematch, NULL) != 0)
+			break;
 	t = compat_ipt_get_target(e);
 	module_put(t->u.kernel.target->me);
 }
@@ -1522,6 +1548,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 				  const unsigned int *underflows,
 				  const char *name)
 {
+	struct xt_entry_match *ematch;
 	struct ipt_entry_target *t;
 	struct xt_target *target;
 	unsigned int entry_offset;
@@ -1550,8 +1577,12 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 	off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
-	ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
-				       &e->ip, e->comefrom, &off, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = compat_find_calc_match(ematch, name,
+		      &e->ip, e->comefrom, &off, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto release_matches;
 
@@ -1590,7 +1621,9 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 out:
 	module_put(t->u.kernel.target->me);
 release_matches:
-	IPT_MATCH_ITERATE(e, compat_release_match, &j);
+	xt_ematch_foreach(ematch, e)
+		if (compat_release_match(ematch, &j) != 0)
+			break;
 	return ret;
 }
 
@@ -1604,6 +1637,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 	struct ipt_entry *de;
 	unsigned int origsize;
 	int ret, h;
+	struct xt_entry_match *ematch;
 
 	ret = 0;
 	origsize = *size;
@@ -1614,8 +1648,11 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 	*dstptr += sizeof(struct ipt_entry);
 	*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
 
-	ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
-				       dstptr, size);
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_from_user(ematch, dstptr, size);
+		if (ret != 0)
+			break;
+	}
 	if (ret)
 		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
@@ -1636,9 +1673,10 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 static int
 compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 {
+	struct xt_entry_match *ematch;
 	struct xt_mtchk_param mtpar;
 	unsigned int j;
-	int ret;
+	int ret = 0;
 
 	j = 0;
 	mtpar.net	= net;
@@ -1646,7 +1684,11 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 	mtpar.entryinfo = &e->ip;
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV4;
-	ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = check_match(ematch, &mtpar, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret)
 		goto cleanup_matches;
 
@@ -1656,7 +1698,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
 	return 0;
 
  cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, net, &j);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, &j) != 0)
+			break;
 	return ret;
 }
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index b7e27c19c7ab..1537e6bad5d9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -393,16 +393,21 @@ ip6t_do_table(struct sk_buff *skb,
 
 	do {
 		const struct ip6t_entry_target *t;
+		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
-		    &mtpar.thoff, &mtpar.fragoff, &hotdrop) ||
-		    IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
+ no_match:
 			e = ip6t_next_entry(e);
 			continue;
 		}
 
+		xt_ematch_foreach(ematch, e)
+			if (do_match(ematch, skb, &mtpar) != 0)
+				goto no_match;
+
 		ADD_COUNTER(e->counters,
 			    ntohs(ipv6_hdr(skb)->payload_len) +
 			    sizeof(struct ipv6hdr), 1);
@@ -717,6 +722,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	int ret;
 	unsigned int j;
 	struct xt_mtchk_param mtpar;
+	struct xt_entry_match *ematch;
 
 	ret = check_entry(e, name);
 	if (ret)
@@ -728,7 +734,11 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV6;
-	ret = IP6T_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = find_check_match(ematch, &mtpar, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto cleanup_matches;
 
@@ -751,7 +761,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
  err:
 	module_put(t->u.kernel.target->me);
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, net, &j);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, &j) != 0)
+			break;
 	return ret;
 }
 
@@ -821,9 +833,12 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
 	struct ip6t_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, NULL) != 0)
+			break;
 	t = ip6t_get_target(e);
 
 	par.net      = net;
@@ -1090,13 +1105,16 @@ static int compat_calc_entry(const struct ip6t_entry *e,
 			     const struct xt_table_info *info,
 			     const void *base, struct xt_table_info *newinfo)
 {
+	const struct xt_entry_match *ematch;
 	const struct ip6t_entry_target *t;
 	unsigned int entry_offset;
 	int off, i, ret;
 
 	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 	entry_offset = (void *)e - base;
-	IP6T_MATCH_ITERATE(e, compat_calc_match, &off);
+	xt_ematch_foreach(ematch, e)
+		if (compat_calc_match(ematch, &off) != 0)
+			break;
 	t = ip6t_get_target_c(e);
 	off += xt_compat_target_offset(t->u.kernel.target);
 	newinfo->size -= off;
@@ -1474,7 +1492,8 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 	struct compat_ip6t_entry __user *ce;
 	u_int16_t target_offset, next_offset;
 	compat_uint_t origsize;
-	int ret;
+	const struct xt_entry_match *ematch;
+	int ret = 0;
 
 	origsize = *size;
 	ce = (struct compat_ip6t_entry __user *)*dstptr;
@@ -1486,7 +1505,11 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
 	*dstptr += sizeof(struct compat_ip6t_entry);
 	*size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 
-	ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_to_user(ematch, dstptr, size);
+		if (ret != 0)
+			break;
+	}
 	target_offset = e->target_offset - (origsize - *size);
 	if (ret)
 		return ret;
@@ -1538,9 +1561,12 @@ compat_release_match(struct ip6t_entry_match *m, unsigned int *i)
 static void compat_release_entry(struct compat_ip6t_entry *e)
 {
 	struct ip6t_entry_target *t;
+	struct xt_entry_match *ematch;
 
 	/* Cleanup all matches */
-	COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL);
+	xt_ematch_foreach(ematch, e)
+		if (compat_release_match(ematch, NULL) != 0)
+			break;
 	t = compat_ip6t_get_target(e);
 	module_put(t->u.kernel.target->me);
 }
@@ -1555,6 +1581,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 				  const unsigned int *underflows,
 				  const char *name)
 {
+	struct xt_entry_match *ematch;
 	struct ip6t_entry_target *t;
 	struct xt_target *target;
 	unsigned int entry_offset;
@@ -1583,8 +1610,12 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 	off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
-	ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name,
-					&e->ipv6, e->comefrom, &off, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = compat_find_calc_match(ematch, name,
+		      &e->ipv6, e->comefrom, &off, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret != 0)
 		goto release_matches;
 
@@ -1623,7 +1654,9 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 out:
 	module_put(t->u.kernel.target->me);
 release_matches:
-	IP6T_MATCH_ITERATE(e, compat_release_match, &j);
+	xt_ematch_foreach(ematch, e)
+		if (compat_release_match(ematch, &j) != 0)
+			break;
 	return ret;
 }
 
@@ -1637,6 +1670,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 	struct ip6t_entry *de;
 	unsigned int origsize;
 	int ret, h;
+	struct xt_entry_match *ematch;
 
 	ret = 0;
 	origsize = *size;
@@ -1647,8 +1681,11 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
 	*dstptr += sizeof(struct ip6t_entry);
 	*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
 
-	ret = COMPAT_IP6T_MATCH_ITERATE(e, xt_compat_match_from_user,
-					dstptr, size);
+	xt_ematch_foreach(ematch, e) {
+		ret = xt_compat_match_from_user(ematch, dstptr, size);
+		if (ret != 0)
+			break;
+	}
 	if (ret)
 		return ret;
 	de->target_offset = e->target_offset - (origsize - *size);
@@ -1670,8 +1707,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 			      const char *name)
 {
 	unsigned int j;
-	int ret;
+	int ret = 0;
 	struct xt_mtchk_param mtpar;
+	struct xt_entry_match *ematch;
 
 	j = 0;
 	mtpar.net	= net;
@@ -1679,7 +1717,11 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	mtpar.entryinfo = &e->ipv6;
 	mtpar.hook_mask = e->comefrom;
 	mtpar.family    = NFPROTO_IPV6;
-	ret = IP6T_MATCH_ITERATE(e, check_match, &mtpar, &j);
+	xt_ematch_foreach(ematch, e) {
+		ret = check_match(ematch, &mtpar, &j);
+		if (ret != 0)
+			break;
+	}
 	if (ret)
 		goto cleanup_matches;
 
@@ -1689,7 +1731,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
 	return 0;
 
  cleanup_matches:
-	IP6T_MATCH_ITERATE(e, cleanup_match, net, &j);
+	xt_ematch_foreach(ematch, e)
+		if (cleanup_match(ematch, net, &j) != 0)
+			break;
 	return ret;
 }
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 6f21b4377dbb..0e357ac9a2a8 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -239,6 +239,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
+	const struct xt_entry_match *ematch;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
@@ -248,8 +249,9 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
-	if (IPT_MATCH_ITERATE(e, find_syn_match))
-		return true;
+	xt_ematch_foreach(ematch, e)
+		if (find_syn_match(ematch))
+			return true;
 	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
 	return false;
 }
@@ -259,6 +261,7 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
+	const struct xt_entry_match *ematch;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
@@ -268,8 +271,9 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
-	if (IP6T_MATCH_ITERATE(e, find_syn_match))
-		return true;
+	xt_ematch_foreach(ematch, e)
+		if (find_syn_match(ematch))
+			return true;
 	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
 	return false;
 }
-- 
cgit v1.2.3


From abfe5a01ef1e463cbafdae461b693db34e308c02 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 20 Feb 2010 12:13:49 +0100
Subject: firewire: cdev: add more flexible cycle timer ioctl

The system time from CLOCK_REALTIME is not monotonic, hence problematic
for the main user of the FW_CDEV_IOC_GET_CYCLE_TIMER ioctl.  This issue
exists in its successor ABI, i.e. raw1394, too.
http://subversion.ffado.org/ticket/242

We now offer an alternative ioctl which lets the caller choose between
CLOCK_REALTIME, CLOCK_MONOTONIC, and CLOCK_MONOTONIC_RAW as source of
the local time, very similar to the clock_gettime libc function.  The
format of the local time return value matches that of clock_gettime
(seconds and nanoseconds, instead of a single microseconds value from
the existing ioctl).

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c  | 38 ++++++++++++++++++++++++++++++++------
 include/linux/firewire-cdev.h | 31 +++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 3c1ac0933d24..a4aa477b9b2c 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1031,22 +1031,46 @@ static int ioctl_stop_iso(struct client *client, void *buffer)
 	return fw_iso_context_stop(client->iso_context);
 }
 
-static int ioctl_get_cycle_timer(struct client *client, void *buffer)
+static int ioctl_get_cycle_timer2(struct client *client, void *buffer)
 {
-	struct fw_cdev_get_cycle_timer *request = buffer;
+	struct fw_cdev_get_cycle_timer2 *request = buffer;
 	struct fw_card *card = client->device->card;
-	struct timeval tv;
+	struct timespec ts = {0, 0};
 	u32 cycle_time;
+	int ret = 0;
 
 	local_irq_disable();
 
 	cycle_time = card->driver->get_cycle_time(card);
-	do_gettimeofday(&tv);
+
+	switch (request->clk_id) {
+	case CLOCK_REALTIME:      getnstimeofday(&ts);                   break;
+	case CLOCK_MONOTONIC:     do_posix_clock_monotonic_gettime(&ts); break;
+	case CLOCK_MONOTONIC_RAW: getrawmonotonic(&ts);                  break;
+	default:
+		ret = -EINVAL;
+	}
 
 	local_irq_enable();
 
-	request->local_time = tv.tv_sec * 1000000ULL + tv.tv_usec;
-	request->cycle_timer = cycle_time;
+	request->tv_sec		= ts.tv_sec;
+	request->tv_nsec	= ts.tv_nsec;
+	request->cycle_timer	= cycle_time;
+
+	return ret;
+}
+
+static int ioctl_get_cycle_timer(struct client *client, void *buffer)
+{
+	struct fw_cdev_get_cycle_timer *request = buffer;
+	struct fw_cdev_get_cycle_timer2 ct2;
+
+	ct2.clk_id = CLOCK_REALTIME;
+	ioctl_get_cycle_timer2(client, &ct2);
+
+	request->local_time	= ct2.tv_sec * USEC_PER_SEC +
+				  ct2.tv_nsec / NSEC_PER_USEC;
+	request->cycle_timer	= ct2.cycle_timer;
 
 	return 0;
 }
@@ -1320,6 +1344,7 @@ static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_get_speed,
 	ioctl_send_broadcast_request,
 	ioctl_send_stream_packet,
+	ioctl_get_cycle_timer2,
 };
 
 static int dispatch_ioctl(struct client *client,
@@ -1341,6 +1366,7 @@ static int dispatch_ioctl(struct client *client,
 		struct fw_cdev_get_cycle_timer		_0c;
 		struct fw_cdev_allocate_iso_resource	_0d;
 		struct fw_cdev_send_stream_packet	_13;
+		struct fw_cdev_get_cycle_timer2		_14;
 	})];
 	int ret;
 
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 520ecf86cbb3..baa8290c8416 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -248,6 +248,9 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_SEND_BROADCAST_REQUEST       _IOW('#', 0x12, struct fw_cdev_send_request)
 #define FW_CDEV_IOC_SEND_STREAM_PACKET           _IOW('#', 0x13, struct fw_cdev_send_stream_packet)
 
+/* available since kernel version 2.6.34 */
+#define FW_CDEV_IOC_GET_CYCLE_TIMER2   _IOWR('#', 0x14, struct fw_cdev_get_cycle_timer2)
+
 /*
  * FW_CDEV_VERSION History
  *  1  (2.6.22)  - initial version
@@ -544,20 +547,40 @@ struct fw_cdev_stop_iso {
 /**
  * struct fw_cdev_get_cycle_timer - read cycle timer register
  * @local_time:   system time, in microseconds since the Epoch
- * @cycle_timer:  isochronous cycle timer, as per OHCI 1.1 clause 5.13
+ * @cycle_timer:  Cycle Time register contents
  *
  * The %FW_CDEV_IOC_GET_CYCLE_TIMER ioctl reads the isochronous cycle timer
- * and also the system clock.  This allows to express the receive time of an
- * isochronous packet as a system time with microsecond accuracy.
+ * and also the system clock (%CLOCK_REALTIME).  This allows to express the
+ * receive time of an isochronous packet as a system time.
  *
  * @cycle_timer consists of 7 bits cycleSeconds, 13 bits cycleCount, and
- * 12 bits cycleOffset, in host byte order.
+ * 12 bits cycleOffset, in host byte order.  Cf. the Cycle Time register
+ * per IEEE 1394 or Isochronous Cycle Timer register per OHCI-1394.
  */
 struct fw_cdev_get_cycle_timer {
 	__u64 local_time;
 	__u32 cycle_timer;
 };
 
+/**
+ * struct fw_cdev_get_cycle_timer2 - read cycle timer register
+ * @tv_sec:       system time, seconds
+ * @tv_nsec:      system time, sub-seconds part in nanoseconds
+ * @clk_id:       input parameter, clock from which to get the system time
+ * @cycle_timer:  Cycle Time register contents
+ *
+ * The %FW_CDEV_IOC_GET_CYCLE_TIMER2 works like
+ * %FW_CDEV_IOC_GET_CYCLE_TIMER but lets you choose a clock like with POSIX'
+ * clock_gettime function.  Supported @clk_id values are POSIX' %CLOCK_REALTIME
+ * and %CLOCK_MONOTONIC and Linux' %CLOCK_MONOTONIC_RAW.
+ */
+struct fw_cdev_get_cycle_timer2 {
+	__s64 tv_sec;
+	__s32 tv_nsec;
+	__s32 clk_id;
+	__u32 cycle_timer;
+};
+
 /**
  * struct fw_cdev_allocate_iso_resource - (De)allocate a channel or bandwidth
  * @closure:	Passed back to userspace in correponding iso resource events
-- 
cgit v1.2.3


From e94b6d7736107c07b1b089797651d02994d268c7 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 21 Feb 2010 12:48:57 +0100
Subject: firewire: cdev: increment ABI version number

so that clients can detect whether the FW_CDEV_IOC_GET_CYCLE_TIMER ioctl
is reliable (on all tested controllers, especially the widely used VIA
controllers, also NEC controllers, see commits b677532b and 1c1517ef).

Also add a comment on the 2.6.32 iso xmit enhancement and on dual-buffer
IR having been disabled in 2.6.33.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index baa8290c8416..40b11013408e 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -256,8 +256,12 @@ union fw_cdev_event {
  *  1  (2.6.22)  - initial version
  *  2  (2.6.30)  - changed &fw_cdev_event_iso_interrupt.header if
  *                 &fw_cdev_create_iso_context.header_size is 8 or more
+ *     (2.6.32)  - added time stamp to xmit &fw_cdev_event_iso_interrupt
+ *     (2.6.33)  - IR has always packet-per-buffer semantics now, not one of
+ *                 dual-buffer or packet-per-buffer depending on hardware
+ *  3  (2.6.34)  - made &fw_cdev_get_cycle_timer reliable
  */
-#define FW_CDEV_VERSION 2
+#define FW_CDEV_VERSION 3
 
 /**
  * struct fw_cdev_get_info - General purpose information ioctl
@@ -556,6 +560,9 @@ struct fw_cdev_stop_iso {
  * @cycle_timer consists of 7 bits cycleSeconds, 13 bits cycleCount, and
  * 12 bits cycleOffset, in host byte order.  Cf. the Cycle Time register
  * per IEEE 1394 or Isochronous Cycle Timer register per OHCI-1394.
+ *
+ * In version 1 and 2 of the ABI, this ioctl returned unreliable (non-
+ * monotonic) @cycle_timer values on certain controllers.
  */
 struct fw_cdev_get_cycle_timer {
 	__u64 local_time;
-- 
cgit v1.2.3


From 6498ba04aee69540f8f586438f90d58e5b8e6936 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 21 Feb 2010 17:57:05 +0100
Subject: firewire: ohci: remove unused dualbuffer IR code

This code was no longer used since 2.6.33, "firewire: ohci: always use
packet-per-buffer mode for isochronous reception" commit 090699c0.  If
anybody needs this code in the future for special purposes, it can be
brought back in.  But it must not be re-enabled by default; drivers
(kernelspace or userspace drivers) should only get this mode if they
explicitly request it.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/ohci.c | 184 +-----------------------------------------------
 include/linux/pci_ids.h |   1 -
 2 files changed, 1 insertion(+), 184 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 0f7c4bb978e7..047331e59b31 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -72,20 +72,6 @@ struct descriptor {
 	__le16 transfer_status;
 } __attribute__((aligned(16)));
 
-struct db_descriptor {
-	__le16 first_size;
-	__le16 control;
-	__le16 second_req_count;
-	__le16 first_req_count;
-	__le32 branch_address;
-	__le16 second_res_count;
-	__le16 first_res_count;
-	__le32 reserved0;
-	__le32 first_buffer;
-	__le32 second_buffer;
-	__le32 reserved1;
-} __attribute__((aligned(16)));
-
 #define CONTROL_SET(regs)	(regs)
 #define CONTROL_CLEAR(regs)	((regs) + 4)
 #define COMMAND_PTR(regs)	((regs) + 12)
@@ -187,7 +173,6 @@ struct fw_ohci {
 	int generation;
 	int request_generation;	/* for timestamping incoming requests */
 
-	bool use_dualbuffer;
 	bool old_uninorth;
 	bool bus_reset_packet_quirk;
 	bool iso_cycle_timer_quirk;
@@ -1863,52 +1848,6 @@ static void copy_iso_headers(struct iso_context *ctx, void *p)
 	ctx->header_length += ctx->base.header_size;
 }
 
-static int handle_ir_dualbuffer_packet(struct context *context,
-				       struct descriptor *d,
-				       struct descriptor *last)
-{
-	struct iso_context *ctx =
-		container_of(context, struct iso_context, context);
-	struct db_descriptor *db = (struct db_descriptor *) d;
-	__le32 *ir_header;
-	size_t header_length;
-	void *p, *end;
-
-	if (db->first_res_count != 0 && db->second_res_count != 0) {
-		if (ctx->excess_bytes <= le16_to_cpu(db->second_req_count)) {
-			/* This descriptor isn't done yet, stop iteration. */
-			return 0;
-		}
-		ctx->excess_bytes -= le16_to_cpu(db->second_req_count);
-	}
-
-	header_length = le16_to_cpu(db->first_req_count) -
-		le16_to_cpu(db->first_res_count);
-
-	p = db + 1;
-	end = p + header_length;
-	while (p < end) {
-		copy_iso_headers(ctx, p);
-		ctx->excess_bytes +=
-			(le32_to_cpu(*(__le32 *)(p + 4)) >> 16) & 0xffff;
-		p += max(ctx->base.header_size, (size_t)8);
-	}
-
-	ctx->excess_bytes -= le16_to_cpu(db->second_req_count) -
-		le16_to_cpu(db->second_res_count);
-
-	if (le16_to_cpu(db->control) & DESCRIPTOR_IRQ_ALWAYS) {
-		ir_header = (__le32 *) (db + 1);
-		ctx->base.callback(&ctx->base,
-				   le32_to_cpu(ir_header[0]) & 0xffff,
-				   ctx->header_length, ctx->header,
-				   ctx->base.callback_data);
-		ctx->header_length = 0;
-	}
-
-	return 1;
-}
-
 static int handle_ir_packet_per_buffer(struct context *context,
 				       struct descriptor *d,
 				       struct descriptor *last)
@@ -1995,10 +1934,7 @@ static struct fw_iso_context *ohci_allocate_iso_context(struct fw_card *card,
 		channels = &ohci->ir_context_channels;
 		mask = &ohci->ir_context_mask;
 		list = ohci->ir_context_list;
-		if (ohci->use_dualbuffer)
-			callback = handle_ir_dualbuffer_packet;
-		else
-			callback = handle_ir_packet_per_buffer;
+		callback = handle_ir_packet_per_buffer;
 	}
 
 	spin_lock_irqsave(&ohci->lock, flags);
@@ -2061,8 +1997,6 @@ static int ohci_start_iso(struct fw_iso_context *base,
 	} else {
 		index = ctx - ohci->ir_context_list;
 		control = IR_CONTEXT_ISOCH_HEADER;
-		if (ohci->use_dualbuffer)
-			control |= IR_CONTEXT_DUAL_BUFFER_MODE;
 		match = (tags << 28) | (sync << 8) | ctx->base.channel;
 		if (cycle >= 0) {
 			match |= (cycle & 0x07fff) << 12;
@@ -2223,92 +2157,6 @@ static int ohci_queue_iso_transmit(struct fw_iso_context *base,
 	return 0;
 }
 
-static int ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base,
-					     struct fw_iso_packet *packet,
-					     struct fw_iso_buffer *buffer,
-					     unsigned long payload)
-{
-	struct iso_context *ctx = container_of(base, struct iso_context, base);
-	struct db_descriptor *db = NULL;
-	struct descriptor *d;
-	struct fw_iso_packet *p;
-	dma_addr_t d_bus, page_bus;
-	u32 z, header_z, length, rest;
-	int page, offset, packet_count, header_size;
-
-	/*
-	 * FIXME: Cycle lost behavior should be configurable: lose
-	 * packet, retransmit or terminate..
-	 */
-
-	p = packet;
-	z = 2;
-
-	/*
-	 * The OHCI controller puts the isochronous header and trailer in the
-	 * buffer, so we need at least 8 bytes.
-	 */
-	packet_count = p->header_length / ctx->base.header_size;
-	header_size = packet_count * max(ctx->base.header_size, (size_t)8);
-
-	/* Get header size in number of descriptors. */
-	header_z = DIV_ROUND_UP(header_size, sizeof(*d));
-	page     = payload >> PAGE_SHIFT;
-	offset   = payload & ~PAGE_MASK;
-	rest     = p->payload_length;
-	/*
-	 * The controllers I've tested have not worked correctly when
-	 * second_req_count is zero.  Rather than do something we know won't
-	 * work, return an error
-	 */
-	if (rest == 0)
-		return -EINVAL;
-
-	while (rest > 0) {
-		d = context_get_descriptors(&ctx->context,
-					    z + header_z, &d_bus);
-		if (d == NULL)
-			return -ENOMEM;
-
-		db = (struct db_descriptor *) d;
-		db->control = cpu_to_le16(DESCRIPTOR_STATUS |
-					  DESCRIPTOR_BRANCH_ALWAYS);
-		db->first_size =
-		    cpu_to_le16(max(ctx->base.header_size, (size_t)8));
-		if (p->skip && rest == p->payload_length) {
-			db->control |= cpu_to_le16(DESCRIPTOR_WAIT);
-			db->first_req_count = db->first_size;
-		} else {
-			db->first_req_count = cpu_to_le16(header_size);
-		}
-		db->first_res_count = db->first_req_count;
-		db->first_buffer = cpu_to_le32(d_bus + sizeof(*db));
-
-		if (p->skip && rest == p->payload_length)
-			length = 4;
-		else if (offset + rest < PAGE_SIZE)
-			length = rest;
-		else
-			length = PAGE_SIZE - offset;
-
-		db->second_req_count = cpu_to_le16(length);
-		db->second_res_count = db->second_req_count;
-		page_bus = page_private(buffer->pages[page]);
-		db->second_buffer = cpu_to_le32(page_bus + offset);
-
-		if (p->interrupt && length == rest)
-			db->control |= cpu_to_le16(DESCRIPTOR_IRQ_ALWAYS);
-
-		context_append(&ctx->context, d, z, header_z);
-		offset = (offset + length) & ~PAGE_MASK;
-		rest -= length;
-		if (offset == 0)
-			page++;
-	}
-
-	return 0;
-}
-
 static int ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
 					struct fw_iso_packet *packet,
 					struct fw_iso_buffer *buffer,
@@ -2399,9 +2247,6 @@ static int ohci_queue_iso(struct fw_iso_context *base,
 	spin_lock_irqsave(&ctx->context.ohci->lock, flags);
 	if (base->type == FW_ISO_CONTEXT_TRANSMIT)
 		ret = ohci_queue_iso_transmit(base, packet, buffer, payload);
-	else if (ctx->context.ohci->use_dualbuffer)
-		ret = ohci_queue_iso_receive_dualbuffer(base, packet,
-							buffer, payload);
 	else
 		ret = ohci_queue_iso_receive_packet_per_buffer(base, packet,
 							buffer, payload);
@@ -2456,10 +2301,6 @@ static void ohci_pmac_off(struct pci_dev *dev)
 #define ohci_pmac_off(dev)
 #endif /* CONFIG_PPC_PMAC */
 
-#define PCI_VENDOR_ID_AGERE		PCI_VENDOR_ID_ATT
-#define PCI_DEVICE_ID_AGERE_FW643	0x5901
-#define PCI_DEVICE_ID_TI_TSB43AB23	0x8024
-
 static int __devinit pci_probe(struct pci_dev *dev,
 			       const struct pci_device_id *ent)
 {
@@ -2508,29 +2349,6 @@ static int __devinit pci_probe(struct pci_dev *dev,
 	}
 
 	version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
-#if 0
-	/* FIXME: make it a context option or remove dual-buffer mode */
-	ohci->use_dualbuffer = version >= OHCI_VERSION_1_1;
-#endif
-
-	/* dual-buffer mode is broken if more than one IR context is active */
-	if (dev->vendor == PCI_VENDOR_ID_AGERE &&
-	    dev->device == PCI_DEVICE_ID_AGERE_FW643)
-		ohci->use_dualbuffer = false;
-
-	/* dual-buffer mode is broken */
-	if (dev->vendor == PCI_VENDOR_ID_RICOH &&
-	    dev->device == PCI_DEVICE_ID_RICOH_R5C832)
-		ohci->use_dualbuffer = false;
-
-/* x86-32 currently doesn't use highmem for dma_alloc_coherent */
-#if !defined(CONFIG_X86_32)
-	/* dual-buffer mode is broken with descriptor addresses above 2G */
-	if (dev->vendor == PCI_VENDOR_ID_TI &&
-	    (dev->device == PCI_DEVICE_ID_TI_TSB43AB22 ||
-	     dev->device == PCI_DEVICE_ID_TI_TSB43AB23))
-		ohci->use_dualbuffer = false;
-#endif
 
 #if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
 	ohci->old_uninorth = dev->vendor == PCI_VENDOR_ID_APPLE &&
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index cca8a044e2b6..a6f80a129ff6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -770,7 +770,6 @@
 #define PCI_VENDOR_ID_TI		0x104c
 #define PCI_DEVICE_ID_TI_TVP4020	0x3d07
 #define PCI_DEVICE_ID_TI_4450		0x8011
-#define PCI_DEVICE_ID_TI_TSB43AB22	0x8023
 #define PCI_DEVICE_ID_TI_XX21_XX11	0x8031
 #define PCI_DEVICE_ID_TI_XX21_XX11_FM	0x8033
 #define PCI_DEVICE_ID_TI_XX21_XX11_SD	0x8034
-- 
cgit v1.2.3


From 632ee200130899252508c478ad0e808222573fbc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:45 -0800
Subject: rcu: Introduce lockdep-based checking to RCU read-side primitives

Inspection is proving insufficient to catch all RCU misuses,
which is understandable given that rcu_dereference() might be
protected by any of four different flavors of RCU (RCU, RCU-bh,
RCU-sched, and SRCU), and might also/instead be protected by any
of a number of locking primitives. It is therefore time to
enlist the aid of lockdep.

This set of patches is inspired by earlier work by Peter
Zijlstra and Thomas Gleixner, and takes the following approach:

o	Set up separate lockdep classes for RCU, RCU-bh, and RCU-sched.

o	Set up separate lockdep classes for each instance of SRCU.

o	Create primitives that check for being in an RCU read-side
	critical section.  These return exact answers if lockdep is
	fully enabled, but if unsure, report being in an RCU read-side
	critical section.  (We want to avoid false positives!)
	The primitives are:

	For RCU: rcu_read_lock_held(void)

	For RCU-bh: rcu_read_lock_bh_held(void)

	For RCU-sched: rcu_read_lock_sched_held(void)

	For SRCU: srcu_read_lock_held(struct srcu_struct *sp)

o	Add rcu_dereference_check(), which takes a second argument
	in which one places a boolean expression based on the above
	primitives and/or lockdep_is_held().

o	A new kernel configuration parameter, CONFIG_PROVE_RCU, enables
	rcu_dereference_check().  This depends on CONFIG_PROVE_LOCKING,
	and should be quite helpful during the transition period while
	CONFIG_PROVE_RCU-unaware patches are in flight.

The existing rcu_dereference() primitive does no checking, but
upcoming patches will change that.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 126 +++++++++++++++++++++++++++++++++++++++++++----
 include/linux/srcu.h     |  87 +++++++++++++++++++++++++++++++-
 kernel/rcupdate.c        |  10 ++++
 kernel/rcutorture.c      |  12 ++++-
 kernel/srcu.c            |  50 ++++++++++++-------
 lib/Kconfig.debug        |  12 +++++
 lib/debug_locks.c        |   1 +
 7 files changed, 267 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 24440f4bf476..e3d37efe2703 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -78,14 +78,120 @@ extern void rcu_init(void);
 } while (0)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
+
 extern struct lockdep_map rcu_lock_map;
-# define rcu_read_acquire()	\
-			lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_acquire() \
+		lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
 # define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
-#else
-# define rcu_read_acquire()	do { } while (0)
-# define rcu_read_release()	do { } while (0)
-#endif
+
+extern struct lockdep_map rcu_bh_lock_map;
+# define rcu_read_acquire_bh() \
+		lock_acquire(&rcu_bh_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release_bh()	lock_release(&rcu_bh_lock_map, 1, _THIS_IP_)
+
+extern struct lockdep_map rcu_sched_lock_map;
+# define rcu_read_acquire_sched() \
+		lock_acquire(&rcu_sched_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release_sched() \
+		lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
+
+/**
+ * rcu_read_lock_held - might we be in RCU read-side critical section?
+ *
+ * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
+ * an RCU read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
+ * this assumes we are in an RCU read-side critical section unless it can
+ * prove otherwise.
+ */
+static inline int rcu_read_lock_held(void)
+{
+	if (debug_locks)
+		return lock_is_held(&rcu_lock_map);
+	return 1;
+}
+
+/**
+ * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ *
+ * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
+ * an RCU-bh read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
+ * this assumes we are in an RCU-bh read-side critical section unless it can
+ * prove otherwise.
+ */
+static inline int rcu_read_lock_bh_held(void)
+{
+	if (debug_locks)
+		return lock_is_held(&rcu_bh_lock_map);
+	return 1;
+}
+
+/**
+ * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section?
+ *
+ * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in an
+ * RCU-sched read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
+ * this assumes we are in an RCU-sched read-side critical section unless it
+ * can prove otherwise.  Note that disabling of preemption (including
+ * disabling irqs) counts as an RCU-sched read-side critical section.
+ */
+static inline int rcu_read_lock_sched_held(void)
+{
+	int lockdep_opinion = 0;
+
+	if (debug_locks)
+		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
+	return lockdep_opinion || preempt_count() != 0;
+}
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+# define rcu_read_acquire()		do { } while (0)
+# define rcu_read_release()		do { } while (0)
+# define rcu_read_acquire_bh()		do { } while (0)
+# define rcu_read_release_bh()		do { } while (0)
+# define rcu_read_acquire_sched()	do { } while (0)
+# define rcu_read_release_sched()	do { } while (0)
+
+static inline int rcu_read_lock_held(void)
+{
+	return 1;
+}
+
+static inline int rcu_read_lock_bh_held(void)
+{
+	return 1;
+}
+
+static inline int rcu_read_lock_sched_held(void)
+{
+	return preempt_count() != 0;
+}
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_PROVE_RCU
+
+/**
+ * rcu_dereference_check - rcu_dereference with debug checking
+ *
+ * Do an rcu_dereference(), but check that the context is correct.
+ * For example, rcu_dereference_check(gp, rcu_read_lock_held()) to
+ * ensure that the rcu_dereference_check() executes within an RCU
+ * read-side critical section.  It is also possible to check for
+ * locks being held, for example, by using lockdep_is_held().
+ */
+#define rcu_dereference_check(p, c) \
+	({ \
+		if (debug_locks) \
+			WARN_ON_ONCE(!(c)); \
+		rcu_dereference(p); \
+	})
+
+#else /* #ifdef CONFIG_PROVE_RCU */
+
+#define rcu_dereference_check(p, c)	rcu_dereference(p)
+
+#endif /* #else #ifdef CONFIG_PROVE_RCU */
 
 /**
  * rcu_read_lock - mark the beginning of an RCU read-side critical section.
@@ -160,7 +266,7 @@ static inline void rcu_read_lock_bh(void)
 {
 	__rcu_read_lock_bh();
 	__acquire(RCU_BH);
-	rcu_read_acquire();
+	rcu_read_acquire_bh();
 }
 
 /*
@@ -170,7 +276,7 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
-	rcu_read_release();
+	rcu_read_release_bh();
 	__release(RCU_BH);
 	__rcu_read_unlock_bh();
 }
@@ -188,7 +294,7 @@ static inline void rcu_read_lock_sched(void)
 {
 	preempt_disable();
 	__acquire(RCU_SCHED);
-	rcu_read_acquire();
+	rcu_read_acquire_sched();
 }
 
 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -205,7 +311,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
-	rcu_read_release();
+	rcu_read_release_sched();
 	__release(RCU_SCHED);
 	preempt_enable();
 }
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4765d97dcafb..adbe1670b366 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -35,6 +35,9 @@ struct srcu_struct {
 	int completed;
 	struct srcu_struct_array *per_cpu_ref;
 	struct mutex mutex;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 };
 
 #ifndef CONFIG_PREEMPT
@@ -43,12 +46,92 @@ struct srcu_struct {
 #define srcu_barrier()
 #endif /* #else #ifndef CONFIG_PREEMPT */
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+		       struct lock_class_key *key);
+
+#define init_srcu_struct(sp) \
+({ \
+	static struct lock_class_key __srcu_key; \
+	\
+	__init_srcu_struct((sp), #sp, &__srcu_key); \
+})
+
+# define srcu_read_acquire(sp) \
+		lock_acquire(&(sp)->dep_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define srcu_read_release(sp) \
+		lock_release(&(sp)->dep_map, 1, _THIS_IP_)
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 int init_srcu_struct(struct srcu_struct *sp);
+
+# define srcu_read_acquire(sp)  do { } while (0)
+# define srcu_read_release(sp)  do { } while (0)
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 void cleanup_srcu_struct(struct srcu_struct *sp);
-int srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
-void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
+int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
+void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
 void synchronize_srcu_expedited(struct srcu_struct *sp);
 long srcu_batches_completed(struct srcu_struct *sp);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+/**
+ * srcu_read_lock_held - might we be in SRCU read-side critical section?
+ *
+ * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
+ * an SRCU read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
+ * this assumes we are in an SRCU read-side critical section unless it can
+ * prove otherwise.
+ */
+static inline int srcu_read_lock_held(struct srcu_struct *sp)
+{
+	if (debug_locks)
+		return lock_is_held(&sp->dep_map);
+	return 1;
+}
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+static inline int srcu_read_lock_held(struct srcu_struct *sp)
+{
+	return 1;
+}
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+/**
+ * srcu_read_lock - register a new reader for an SRCU-protected structure.
+ * @sp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section.  Note that SRCU read-side
+ * critical sections may be nested.
+ */
+static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
+{
+	int retval = __srcu_read_lock(sp);
+
+	srcu_read_acquire(sp);
+	return retval;
+}
+
+/**
+ * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
+ * @sp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit an SRCU read-side critical section.
+ */
+static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
+	__releases(sp)
+{
+	srcu_read_release(sp);
+	__srcu_read_unlock(sp, idx);
+}
+
 #endif
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 9b7fd4723878..033cb55c26df 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -50,6 +50,16 @@ static struct lock_class_key rcu_lock_key;
 struct lockdep_map rcu_lock_map =
 	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
 EXPORT_SYMBOL_GPL(rcu_lock_map);
+
+static struct lock_class_key rcu_bh_lock_key;
+struct lockdep_map rcu_bh_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
+EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
+
+static struct lock_class_key rcu_sched_lock_key;
+struct lockdep_map rcu_sched_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
+EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
 #endif
 
 /*
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index adda92bfafac..5f43f30fcd1d 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -796,7 +796,11 @@ static void rcu_torture_timer(unsigned long unused)
 
 	idx = cur_ops->readlock();
 	completed = cur_ops->completed();
-	p = rcu_dereference(rcu_torture_current);
+	p = rcu_dereference_check(rcu_torture_current,
+				  rcu_read_lock_held() ||
+				  rcu_read_lock_bh_held() ||
+				  rcu_read_lock_sched_held() ||
+				  srcu_read_lock_held(&srcu_ctl));
 	if (p == NULL) {
 		/* Leave because rcu_torture_writer is not yet underway */
 		cur_ops->readunlock(idx);
@@ -853,7 +857,11 @@ rcu_torture_reader(void *arg)
 		}
 		idx = cur_ops->readlock();
 		completed = cur_ops->completed();
-		p = rcu_dereference(rcu_torture_current);
+		p = rcu_dereference_check(rcu_torture_current,
+					  rcu_read_lock_held() ||
+					  rcu_read_lock_bh_held() ||
+					  rcu_read_lock_sched_held() ||
+					  srcu_read_lock_held(&srcu_ctl));
 		if (p == NULL) {
 			/* Wait for rcu_torture_writer to get underway */
 			cur_ops->readunlock(idx);
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 31b275b9c112..bde4295774c8 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -34,6 +34,30 @@
 #include <linux/smp.h>
 #include <linux/srcu.h>
 
+static int init_srcu_struct_fields(struct srcu_struct *sp)
+{
+	sp->completed = 0;
+	mutex_init(&sp->mutex);
+	sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+	return sp->per_cpu_ref ? 0 : -ENOMEM;
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+int __init_srcu_struct(struct srcu_struct *sp, const char *name,
+		       struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/* Don't re-initialize a lock while it is held. */
+	debug_check_no_locks_freed((void *)sp, sizeof(*sp));
+	lockdep_init_map(&sp->dep_map, name, key, 0);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+	return init_srcu_struct_fields(sp);
+}
+EXPORT_SYMBOL_GPL(__init_srcu_struct);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 /**
  * init_srcu_struct - initialize a sleep-RCU structure
  * @sp: structure to initialize.
@@ -44,13 +68,12 @@
  */
 int init_srcu_struct(struct srcu_struct *sp)
 {
-	sp->completed = 0;
-	mutex_init(&sp->mutex);
-	sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
-	return (sp->per_cpu_ref ? 0 : -ENOMEM);
+	return init_srcu_struct_fields(sp);
 }
 EXPORT_SYMBOL_GPL(init_srcu_struct);
 
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 /*
  * srcu_readers_active_idx -- returns approximate number of readers
  *	active on the specified rank of per-CPU counters.
@@ -100,15 +123,12 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
-/**
- * srcu_read_lock - register a new reader for an SRCU-protected structure.
- * @sp: srcu_struct in which to register the new reader.
- *
+/*
  * Counts the new reader in the appropriate per-CPU element of the
  * srcu_struct.  Must be called from process context.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
-int srcu_read_lock(struct srcu_struct *sp)
+int __srcu_read_lock(struct srcu_struct *sp)
 {
 	int idx;
 
@@ -120,26 +140,22 @@ int srcu_read_lock(struct srcu_struct *sp)
 	preempt_enable();
 	return idx;
 }
-EXPORT_SYMBOL_GPL(srcu_read_lock);
+EXPORT_SYMBOL_GPL(__srcu_read_lock);
 
-/**
- * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
- * @sp: srcu_struct in which to unregister the old reader.
- * @idx: return value from corresponding srcu_read_lock().
- *
+/*
  * Removes the count for the old reader from the appropriate per-CPU
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
  * Must be called from process context.
  */
-void srcu_read_unlock(struct srcu_struct *sp, int idx)
+void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 {
 	preempt_disable();
 	srcu_barrier();  /* ensure compiler won't misorder critical section. */
 	per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
 	preempt_enable();
 }
-EXPORT_SYMBOL_GPL(srcu_read_unlock);
+EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
 /*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6bf97d176326..6af20a8a0a54 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -499,6 +499,18 @@ config PROVE_LOCKING
 
 	 For more details, see Documentation/lockdep-design.txt.
 
+config PROVE_RCU
+	bool "RCU debugging: prove RCU correctness"
+	depends on PROVE_LOCKING
+	default n
+	help
+	 This feature enables lockdep extensions that check for correct
+	 use of RCU APIs.  This is currently under development.  Say Y
+	 if you want to debug RCU usage or help work on the PROVE_RCU
+	 feature.
+
+	 Say N if you are unsure.
+
 config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index bc3b11731b9c..5bf0020b9248 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -23,6 +23,7 @@
  * shut up after that.
  */
 int debug_locks = 1;
+EXPORT_SYMBOL_GPL(debug_locks);
 
 /*
  * The locking-testsuite uses <debug_locks_silent> to get a
-- 
cgit v1.2.3


From c26d34a5858f96a564c45048bf5f09319d2abad1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:46 -0800
Subject: rcu: Add lockdep-enabled variants of rcu_dereference()

Make rcu_dereference() check for being in an RCU read-side
critical section, and create rcu_dereference_bh(),
rcu_dereference_sched(), and srcu_dereference() to check for the
other flavors of RCU.  Also create rcu_dereference_raw() to
avoid checking, and make rcu_dereference_check() use
rcu_dereference_raw().

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-2-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 41 ++++++++++++++++++++++++++++++++++-------
 include/linux/srcu.h     |  8 ++++++++
 2 files changed, 42 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e3d37efe2703..839d296a7ac0 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -184,12 +184,12 @@ static inline int rcu_read_lock_sched_held(void)
 	({ \
 		if (debug_locks) \
 			WARN_ON_ONCE(!(c)); \
-		rcu_dereference(p); \
+		rcu_dereference_raw(p); \
 	})
 
 #else /* #ifdef CONFIG_PROVE_RCU */
 
-#define rcu_dereference_check(p, c)	rcu_dereference(p)
+#define rcu_dereference_check(p, c)	rcu_dereference_raw(p)
 
 #endif /* #else #ifdef CONFIG_PROVE_RCU */
 
@@ -325,21 +325,48 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 
 
 /**
- * rcu_dereference - fetch an RCU-protected pointer in an
- * RCU read-side critical section.  This pointer may later
- * be safely dereferenced.
+ * rcu_dereference_raw - fetch an RCU-protected pointer
+ *
+ * The caller must be within some flavor of RCU read-side critical
+ * section, or must be otherwise preventing the pointer from changing,
+ * for example, by holding an appropriate lock.  This pointer may later
+ * be safely dereferenced.  It is the caller's responsibility to have
+ * done the right thing, as this primitive does no checking of any kind.
  *
  * Inserts memory barriers on architectures that require them
  * (currently only the Alpha), and, more importantly, documents
  * exactly which pointers are protected by RCU.
  */
-
-#define rcu_dereference(p)     ({ \
+#define rcu_dereference_raw(p)	({ \
 				typeof(p) _________p1 = ACCESS_ONCE(p); \
 				smp_read_barrier_depends(); \
 				(_________p1); \
 				})
 
+/**
+ * rcu_dereference - fetch an RCU-protected pointer, checking for RCU
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference(p) \
+	rcu_dereference_check(p, rcu_read_lock_held())
+
+/**
+ * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference_bh(p) \
+		rcu_dereference_check(p, rcu_read_lock_bh_held())
+
+/**
+ * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference_sched(p) \
+		rcu_dereference_check(p, rcu_read_lock_sched_held())
+
 /**
  * rcu_assign_pointer - assign (publicize) a pointer to a newly
  * initialized structure that will be dereferenced by RCU read-side
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index adbe1670b366..3084f80909cd 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -105,6 +105,14 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+/**
+ * srcu_dereference - fetch SRCU-protected pointer with checking
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define srcu_dereference(p, sp) \
+		rcu_dereference_check(p, srcu_read_lock_held(sp))
+
 /**
  * srcu_read_lock - register a new reader for an SRCU-protected structure.
  * @sp: srcu_struct in which to register the new reader.
-- 
cgit v1.2.3


From 0632eb3d7563d6a76d49a3860b6352d800c92854 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:47 -0800
Subject: rcu: Integrate rcu_dereference_check() message into lockdep

Make rcu_dereference_check() print the list of held locks in
addition to the stack dump to ease debugging.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-3-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h  |  4 ++++
 include/linux/rcupdate.h |  4 ++--
 kernel/lockdep.c         | 18 ++++++++++++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 9ccf0e286b2a..10206a87da19 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -534,4 +534,8 @@ do {									\
 # define might_lock_read(lock) do { } while (0)
 #endif
 
+#ifdef CONFIG_PROVE_RCU
+extern void lockdep_rcu_dereference(const char *file, const int line);
+#endif
+
 #endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 839d296a7ac0..1a4de31bd7b4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -182,8 +182,8 @@ static inline int rcu_read_lock_sched_held(void)
  */
 #define rcu_dereference_check(p, c) \
 	({ \
-		if (debug_locks) \
-			WARN_ON_ONCE(!(c)); \
+		if (debug_locks && !(c)) \
+			lockdep_rcu_dereference(__FILE__, __LINE__); \
 		rcu_dereference_raw(p); \
 	})
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c62ec14609b9..672c436946ce 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3809,3 +3809,21 @@ void lockdep_sys_exit(void)
 		lockdep_print_held_locks(curr);
 	}
 }
+
+void lockdep_rcu_dereference(const char *file, const int line)
+{
+	struct task_struct *curr = current;
+
+	if (!debug_locks_off())
+		return;
+	printk("\n==============================================\n");
+	printk(  "[ BUG: Unsafe rcu_dereference_check() usage! ]\n");
+	printk(  "----------------------------------------------\n");
+	printk("%s:%d invoked rcu_dereference_check() without protection!\n",
+			file, line);
+	printk("\nother info that might help us debug this:\n\n");
+	lockdep_print_held_locks(curr);
+	printk("\nstack backtrace:\n");
+	dump_stack();
+}
+EXPORT_SYMBOL_GPL(lockdep_rcu_dereference);
-- 
cgit v1.2.3


From 3120438ad68601f341e61e7cb1323b0e1a6ca367 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:48 -0800
Subject: rcu: Disable lockdep checking in RCU list-traversal primitives

The theory is that use of bare rcu_dereference() is more prone
to error than use of the RCU list-traversal primitives.
Therefore, disable lockdep RCU read-side critical-section
checking in these primitives for the time being.  Once all of
the rcu_dereference() uses have been dealt with, it may be time
to re-enable lockdep checking for the RCU list-traversal
primitives.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-4-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rculist.h       | 14 +++++++-------
 include/linux/rculist_nulls.h |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 1bf0f708c4fc..779d70749beb 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -208,7 +208,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
  */
 #define list_entry_rcu(ptr, type, member) \
-	container_of(rcu_dereference(ptr), type, member)
+	container_of(rcu_dereference_raw(ptr), type, member)
 
 /**
  * list_first_entry_rcu - get the first element from a list
@@ -225,9 +225,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	list_entry_rcu((ptr)->next, type, member)
 
 #define __list_for_each_rcu(pos, head) \
-	for (pos = rcu_dereference((head)->next); \
+	for (pos = rcu_dereference_raw((head)->next); \
 		pos != (head); \
-		pos = rcu_dereference(pos->next))
+		pos = rcu_dereference_raw(pos->next))
 
 /**
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
@@ -257,9 +257,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_continue_rcu(pos, head) \
-	for ((pos) = rcu_dereference((pos)->next); \
+	for ((pos) = rcu_dereference_raw((pos)->next); \
 		prefetch((pos)->next), (pos) != (head); \
-		(pos) = rcu_dereference((pos)->next))
+		(pos) = rcu_dereference_raw((pos)->next))
 
 /**
  * list_for_each_entry_continue_rcu - continue iteration over list of given type
@@ -418,10 +418,10 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define hlist_for_each_entry_rcu(tpos, pos, head, member)		 \
-	for (pos = rcu_dereference((head)->first);			 \
+	for (pos = rcu_dereference_raw((head)->first);			 \
 		pos && ({ prefetch(pos->next); 1; }) &&			 \
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
-		pos = rcu_dereference(pos->next))
+		pos = rcu_dereference_raw(pos->next))
 
 #endif	/* __KERNEL__ */
 #endif
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 589a40919f01..b70ffe53cb9f 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -101,10 +101,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
  *
  */
 #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
-	for (pos = rcu_dereference((head)->first);			 \
+	for (pos = rcu_dereference_raw((head)->first);			 \
 		(!is_a_nulls(pos)) &&			\
 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
-		pos = rcu_dereference(pos->next))
+		pos = rcu_dereference_raw(pos->next))
 
 #endif
 #endif
-- 
cgit v1.2.3


From a898def29e4119bc01ebe7ca97423181f4c0ea2d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:49 -0800
Subject: net: Add checking to rcu_dereference() primitives

Update rcu_dereference() primitives to use new lockdep-based
checking. The rcu_dereference() in __in6_dev_get() may be
protected either by rcu_read_lock() or RTNL, per Eric Dumazet.
The rcu_dereference() in __sk_free() is protected by the fact
that it is never reached if an update could change it.  Check
for this by using rcu_dereference_check() to verify that the
struct sock's ->sk_wmem_alloc counter is zero.

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-5-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rtnetlink.h |  3 +++
 include/net/addrconf.h    |  4 +++-
 net/core/dev.c            |  2 +-
 net/core/filter.c         |  6 +++---
 net/core/rtnetlink.c      |  8 ++++++++
 net/core/sock.c           |  3 ++-
 net/decnet/dn_route.c     | 14 +++++++-------
 net/ipv4/route.c          | 14 +++++++-------
 net/packet/af_packet.c    |  2 +-
 9 files changed, 35 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 05330fc5b436..5c52fa43785c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -735,6 +735,9 @@ extern void rtnl_lock(void);
 extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
+#ifdef CONFIG_PROVE_LOCKING
+extern int lockdep_rtnl_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
 
 extern void rtnetlink_init(void);
 extern void __rtnl_unlock(void);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 0f7c37825fc1..45375b41a2a0 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -177,7 +177,9 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb);
 static inline struct inet6_dev *
 __in6_dev_get(struct net_device *dev)
 {
-	return rcu_dereference(dev->ip6_ptr);
+	return rcu_dereference_check(dev->ip6_ptr,
+				     rcu_read_lock_held() ||
+				     lockdep_rtnl_is_held());
 }
 
 static inline struct inet6_dev *
diff --git a/net/core/dev.c b/net/core/dev.c
index ec874218b206..bb1f1da2b8a7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2041,7 +2041,7 @@ gso:
 	rcu_read_lock_bh();
 
 	txq = dev_pick_tx(dev, skb);
-	q = rcu_dereference(txq->qdisc);
+	q = rcu_dereference_bh(txq->qdisc);
 
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
diff --git a/net/core/filter.c b/net/core/filter.c
index 08db7b9143a3..3541aa48d21d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -86,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 		return err;
 
 	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter) {
 		unsigned int pkt_len = sk_run_filter(skb, filter->insns,
 				filter->len);
@@ -521,7 +521,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	}
 
 	rcu_read_lock_bh();
-	old_fp = rcu_dereference(sk->sk_filter);
+	old_fp = rcu_dereference_bh(sk->sk_filter);
 	rcu_assign_pointer(sk->sk_filter, fp);
 	rcu_read_unlock_bh();
 
@@ -536,7 +536,7 @@ int sk_detach_filter(struct sock *sk)
 	struct sk_filter *filter;
 
 	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter) {
 		rcu_assign_pointer(sk->sk_filter, NULL);
 		sk_filter_delayed_uncharge(sk, filter);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 794bcb897ff0..4c7d3f635ba7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -89,6 +89,14 @@ int rtnl_is_locked(void)
 }
 EXPORT_SYMBOL(rtnl_is_locked);
 
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_rtnl_is_held(void)
+{
+	return lockdep_is_held(&rtnl_mutex);
+}
+EXPORT_SYMBOL(lockdep_rtnl_is_held);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+
 static struct rtnl_link *rtnl_msg_handlers[NPROTO];
 
 static inline int rtm_msgindex(int msgtype)
diff --git a/net/core/sock.c b/net/core/sock.c
index e1f6f225f012..305cba401ae6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1073,7 +1073,8 @@ static void __sk_free(struct sock *sk)
 	if (sk->sk_destruct)
 		sk->sk_destruct(sk);
 
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_check(sk->sk_filter,
+				       atomic_read(&sk->sk_wmem_alloc) == 0);
 	if (filter) {
 		sk_filter_uncharge(sk, filter);
 		rcu_assign_pointer(sk->sk_filter, NULL);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a03284061a31..a7bf03ca0a36 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1155,8 +1155,8 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
 
 	if (!(flags & MSG_TRYHARD)) {
 		rcu_read_lock_bh();
-		for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt;
-			rt = rcu_dereference(rt->u.dst.dn_next)) {
+		for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
+			rt = rcu_dereference_bh(rt->u.dst.dn_next)) {
 			if ((flp->fld_dst == rt->fl.fld_dst) &&
 			    (flp->fld_src == rt->fl.fld_src) &&
 			    (flp->mark == rt->fl.mark) &&
@@ -1618,9 +1618,9 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (h > s_h)
 			s_idx = 0;
 		rcu_read_lock_bh();
-		for(rt = rcu_dereference(dn_rt_hash_table[h].chain), idx = 0;
+		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
 			rt;
-			rt = rcu_dereference(rt->u.dst.dn_next), idx++) {
+			rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) {
 			if (idx < s_idx)
 				continue;
 			skb_dst_set(skb, dst_clone(&rt->u.dst));
@@ -1654,12 +1654,12 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
 
 	for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
 		rcu_read_lock_bh();
-		rt = dn_rt_hash_table[s->bucket].chain;
+		rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
 		if (rt)
 			break;
 		rcu_read_unlock_bh();
 	}
-	return rcu_dereference(rt);
+	return rt;
 }
 
 static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
@@ -1674,7 +1674,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
 		rcu_read_lock_bh();
 		rt = dn_rt_hash_table[s->bucket].chain;
 	}
-	return rcu_dereference(rt);
+	return rcu_dereference_bh(rt);
 }
 
 static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d62b05d33384..4f11faa5c824 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -287,12 +287,12 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 		if (!rt_hash_table[st->bucket].chain)
 			continue;
 		rcu_read_lock_bh();
-		r = rcu_dereference(rt_hash_table[st->bucket].chain);
+		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
 		while (r) {
 			if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
 			    r->rt_genid == st->genid)
 				return r;
-			r = rcu_dereference(r->u.dst.rt_next);
+			r = rcu_dereference_bh(r->u.dst.rt_next);
 		}
 		rcu_read_unlock_bh();
 	}
@@ -314,7 +314,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 		rcu_read_lock_bh();
 		r = rt_hash_table[st->bucket].chain;
 	}
-	return rcu_dereference(r);
+	return rcu_dereference_bh(r);
 }
 
 static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -2689,8 +2689,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 	hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
 
 	rcu_read_lock_bh();
-	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-		rth = rcu_dereference(rth->u.dst.rt_next)) {
+	for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
+		rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
 		if (rth->fl.fl4_dst == flp->fl4_dst &&
 		    rth->fl.fl4_src == flp->fl4_src &&
 		    rth->fl.iif == 0 &&
@@ -3008,8 +3008,8 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 		if (!rt_hash_table[h].chain)
 			continue;
 		rcu_read_lock_bh();
-		for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
-		     rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
+		for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
+		     rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
 			if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
 				continue;
 			if (rt_is_expired(rt))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f126d18dbdc4..939471ef8d50 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -508,7 +508,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 	struct sk_filter *filter;
 
 	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
+	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter != NULL)
 		res = sk_run_filter(skb, filter->insns, filter->len);
 	rcu_read_unlock_bh();
-- 
cgit v1.2.3


From d11c563dd20ff35da5652c3e1c989d9e10e1d6d0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:50 -0800
Subject: sched: Use lockdep-based checking on rcu_dereference()

Update the rcu_dereference() usages to take advantage of the new
lockdep-based checking.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-6-git-send-email-paulmck@linux.vnet.ibm.com>
[ -v2: fix allmodconfig missing symbol export build failure on x86 ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cgroup.h |  5 ++++-
 include/linux/cred.h   |  2 +-
 init/main.c            |  2 ++
 kernel/cgroup.c        | 14 ++++++++++++++
 kernel/exit.c          | 14 +++++++++++---
 kernel/fork.c          |  1 +
 kernel/notifier.c      |  6 +++---
 kernel/pid.c           |  2 +-
 kernel/sched.c         | 11 ++++++++---
 9 files changed, 45 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0008dee66514..c9bbcb2a75ae 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -28,6 +28,7 @@ struct css_id;
 extern int cgroup_init_early(void);
 extern int cgroup_init(void);
 extern void cgroup_lock(void);
+extern int cgroup_lock_is_held(void);
 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
 extern void cgroup_unlock(void);
 extern void cgroup_fork(struct task_struct *p);
@@ -486,7 +487,9 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
 static inline struct cgroup_subsys_state *task_subsys_state(
 	struct task_struct *task, int subsys_id)
 {
-	return rcu_dereference(task->cgroups->subsys[subsys_id]);
+	return rcu_dereference_check(task->cgroups->subsys[subsys_id],
+				     rcu_read_lock_held() ||
+				     cgroup_lock_is_held());
 }
 
 static inline struct cgroup* task_cgroup(struct task_struct *task,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4e3387a89cb9..4db09f89b637 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -280,7 +280,7 @@ static inline void put_cred(const struct cred *_cred)
  * task or by holding tasklist_lock to prevent it from being unlinked.
  */
 #define __task_cred(task) \
-	((const struct cred *)(rcu_dereference((task)->real_cred)))
+	((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock))))
 
 /**
  * get_task_cred - Get another task's objective credentials
diff --git a/init/main.c b/init/main.c
index 4cb47a159f02..c75dcd6eef09 100644
--- a/init/main.c
+++ b/init/main.c
@@ -416,7 +416,9 @@ static noinline void __init_refok rest_init(void)
 	kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
 	numa_default_policy();
 	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+	rcu_read_lock();
 	kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
+	rcu_read_unlock();
 	unlock_kernel();
 
 	/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index aa3bee566446..b1a0f5a528fe 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -166,6 +166,20 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
  */
 static int need_forkexit_callback __read_mostly;
 
+#ifdef CONFIG_PROVE_LOCKING
+int cgroup_lock_is_held(void)
+{
+	return lockdep_is_held(&cgroup_mutex);
+}
+#else /* #ifdef CONFIG_PROVE_LOCKING */
+int cgroup_lock_is_held(void)
+{
+	return mutex_is_locked(&cgroup_mutex);
+}
+#endif /* #else #ifdef CONFIG_PROVE_LOCKING */
+
+EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
+
 /* convenient tests for these bits */
 inline int cgroup_is_removed(const struct cgroup *cgrp)
 {
diff --git a/kernel/exit.c b/kernel/exit.c
index 546774a31a66..45ed043b8bf5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,9 @@ static void __exit_signal(struct task_struct *tsk)
 	BUG_ON(!sig);
 	BUG_ON(!atomic_read(&sig->count));
 
-	sighand = rcu_dereference(tsk->sighand);
+	sighand = rcu_dereference_check(tsk->sighand,
+					rcu_read_lock_held() ||
+					lockdep_is_held(&tasklist_lock));
 	spin_lock(&sighand->siglock);
 
 	posix_cpu_timers_exit(tsk);
@@ -170,8 +172,10 @@ void release_task(struct task_struct * p)
 repeat:
 	tracehook_prepare_release_task(p);
 	/* don't need to get the RCU readlock here - the process is dead and
-	 * can't be modifying its own credentials */
+	 * can't be modifying its own credentials. But shut RCU-lockdep up */
+	rcu_read_lock();
 	atomic_dec(&__task_cred(p)->user->processes);
+	rcu_read_unlock();
 
 	proc_flush_task(p);
 
@@ -473,9 +477,11 @@ static void close_files(struct files_struct * files)
 	/*
 	 * It is safe to dereference the fd table without RCU or
 	 * ->file_lock because this is the last reference to the
-	 * files structure.
+	 * files structure.  But use RCU to shut RCU-lockdep up.
 	 */
+	rcu_read_lock();
 	fdt = files_fdtable(files);
+	rcu_read_unlock();
 	for (;;) {
 		unsigned long set;
 		i = j * __NFDBITS;
@@ -521,10 +527,12 @@ void put_files_struct(struct files_struct *files)
 		 * at the end of the RCU grace period. Otherwise,
 		 * you can free files immediately.
 		 */
+		rcu_read_lock();
 		fdt = files_fdtable(files);
 		if (fdt != &files->fdtab)
 			kmem_cache_free(files_cachep, files);
 		free_fdtable(fdt);
+		rcu_read_unlock();
 	}
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index f88bd984df35..17bbf093356d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,6 +86,7 @@ int max_threads;		/* tunable limit on nr_threads */
 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
+EXPORT_SYMBOL_GPL(tasklist_lock);
 
 int nr_processes(void)
 {
diff --git a/kernel/notifier.c b/kernel/notifier.c
index acd24e7643eb..2488ba7eb568 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -78,10 +78,10 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
 	int ret = NOTIFY_DONE;
 	struct notifier_block *nb, *next_nb;
 
-	nb = rcu_dereference(*nl);
+	nb = rcu_dereference_raw(*nl);
 
 	while (nb && nr_to_call) {
-		next_nb = rcu_dereference(nb->next);
+		next_nb = rcu_dereference_raw(nb->next);
 
 #ifdef CONFIG_DEBUG_NOTIFIERS
 		if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
@@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
 	 * racy then it does not matter what the result of the test
 	 * is, we re-check the list after having taken the lock anyway:
 	 */
-	if (rcu_dereference(nh->head)) {
+	if (rcu_dereference_raw(nh->head)) {
 		down_read(&nh->rwsem);
 		ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
 					nr_calls);
diff --git a/kernel/pid.c b/kernel/pid.c
index 2e17c9c92cbe..b08e697cd83f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
 	struct task_struct *result = NULL;
 	if (pid) {
 		struct hlist_node *first;
-		first = rcu_dereference(pid->tasks[type].first);
+		first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
 		if (first)
 			result = hlist_entry(first, struct task_struct, pids[(type)].node);
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index 3a8fb30a91b1..70ae68680d4c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -645,6 +645,11 @@ static inline int cpu_of(struct rq *rq)
 #endif
 }
 
+#define for_each_domain_rd(p) \
+	rcu_dereference_check((p), \
+			      rcu_read_lock_sched_held() || \
+			      lockdep_is_held(&sched_domains_mutex))
+
 /*
  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
  * See detach_destroy_domains: synchronize_sched for details.
@@ -653,7 +658,7 @@ static inline int cpu_of(struct rq *rq)
  * preempt-disabled sections.
  */
 #define for_each_domain(cpu, __sd) \
-	for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
+	for (__sd = for_each_domain_rd(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
 
 #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
 #define this_rq()		(&__get_cpu_var(runqueues))
@@ -1531,7 +1536,7 @@ static unsigned long target_load(int cpu, int type)
 
 static struct sched_group *group_of(int cpu)
 {
-	struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+	struct sched_domain *sd = rcu_dereference_sched(cpu_rq(cpu)->sd);
 
 	if (!sd)
 		return NULL;
@@ -4888,7 +4893,7 @@ static void run_rebalance_domains(struct softirq_action *h)
 
 static inline int on_null_domain(int cpu)
 {
-	return !rcu_dereference(cpu_rq(cpu)->sd);
+	return !rcu_dereference_sched(cpu_rq(cpu)->sd);
 }
 
 /*
-- 
cgit v1.2.3


From 7dc52157982ab771f40e3c0b7dc55b954c3c2d19 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:52 -0800
Subject: vfs: Apply lockdep-based checking to rcu_dereference() uses

Add lockdep-ified RCU primitives to alloc_fd(), files_fdtable()
and fcheck_files().

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
LKML-Reference: <1266887105-1528-8-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/file.c               | 2 +-
 fs/proc/array.c         | 2 ++
 fs/proc/base.c          | 6 +++++-
 include/linux/fdtable.h | 8 ++++++--
 4 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file.c b/fs/file.c
index 87e129030ab1..38039af67663 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -478,7 +478,7 @@ repeat:
 	error = fd;
 #if 1
 	/* Sanity check */
-	if (rcu_dereference(fdt->fd[fd]) != NULL) {
+	if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
 		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
 		rcu_assign_pointer(fdt->fd[fd], NULL);
 	}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 13b5d0708175..18e20feee251 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -270,7 +270,9 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 		blocked = p->blocked;
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = atomic_read(&p->signal->count);
+		rcu_read_lock();  /* FIXME: is this correct? */
 		qsize = atomic_read(&__task_cred(p)->user->sigpending);
+		rcu_read_unlock();
 		qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
 		unlock_task_sighand(p, &flags);
 	}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58324c299165..623e2ffb5d2b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1095,8 +1095,12 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 	if (!capable(CAP_AUDIT_CONTROL))
 		return -EPERM;
 
-	if (current != pid_task(proc_pid(inode), PIDTYPE_PID))
+	rcu_read_lock();
+	if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
+		rcu_read_unlock();
 		return -EPERM;
+	}
+	rcu_read_unlock();
 
 	if (count >= PAGE_SIZE)
 		count = PAGE_SIZE - 1;
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index a2ec74bc4812..144412ffaced 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,7 +57,11 @@ struct files_struct {
 	struct file * fd_array[NR_OPEN_DEFAULT];
 };
 
-#define files_fdtable(files) (rcu_dereference((files)->fdt))
+#define files_fdtable(files) \
+	(rcu_dereference_check((files)->fdt, \
+			       rcu_read_lock_held() || \
+			       lockdep_is_held(&(files)->file_lock) || \
+			       atomic_read(&files->count) == 1))
 
 struct file_operations;
 struct vfsmount;
@@ -78,7 +82,7 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in
 	struct fdtable *fdt = files_fdtable(files);
 
 	if (fd < fdt->max_fds)
-		file = rcu_dereference(fdt->fd[fd]);
+		file = rcu_dereference_check(fdt->fd[fd], rcu_read_lock_held() || lockdep_is_held(&files->file_lock) || atomic_read(&files->count) == 1);
 	return file;
 }
 
-- 
cgit v1.2.3


From af61b96b4f68f7ab25ebf34fed275fabf64f2edc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:53 -0800
Subject: vfs: Abstract rcu_dereference_check for files-fdtable use

Create an rcu_dereference_check_fdtable() that encapsulates the
rcu_dereference_check() condition for fcheck_files() use.  This
has the beneficial side-effect of getting rid of a very long
line.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-9-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/fdtable.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 144412ffaced..013dc529e95f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,11 +57,14 @@ struct files_struct {
 	struct file * fd_array[NR_OPEN_DEFAULT];
 };
 
-#define files_fdtable(files) \
-	(rcu_dereference_check((files)->fdt, \
+#define rcu_dereference_check_fdtable(files, fdtfd) \
+	(rcu_dereference_check((fdtfd), \
 			       rcu_read_lock_held() || \
 			       lockdep_is_held(&(files)->file_lock) || \
-			       atomic_read(&files->count) == 1))
+			       atomic_read(&(files)->count) == 1))
+
+#define files_fdtable(files) \
+		(rcu_dereference_check_fdtable((files), (files)->fdt))
 
 struct file_operations;
 struct vfsmount;
@@ -82,7 +85,7 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in
 	struct fdtable *fdt = files_fdtable(files);
 
 	if (fd < fdt->max_fds)
-		file = rcu_dereference_check(fdt->fd[fd], rcu_read_lock_held() || lockdep_is_held(&files->file_lock) || atomic_read(&files->count) == 1);
+		file = rcu_dereference_check_fdtable(files, fdt->fd[fd]);
 	return file;
 }
 
-- 
cgit v1.2.3


From 8bd93a2c5d4cab2ae17d06350daa7dbf546a4634 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:59 -0800
Subject: rcu: Accelerate grace period if last non-dynticked CPU

Currently, rcu_needs_cpu() simply checks whether the current CPU
has an outstanding RCU callback, which means that the last CPU
to go into dyntick-idle mode might wait a few ticks for the
relevant grace periods to complete.  However, if all the other
CPUs are in dyntick-idle mode, and if this CPU is in a quiescent
state (which it is for RCU-bh and RCU-sched any time that we are
considering going into dyntick-idle mode), then the grace period
is instantly complete.

This patch therefore repeatedly invokes the RCU grace-period
machinery in order to force any needed grace periods to complete
quickly.  It does so a limited number of times in order to
prevent starvation by an RCU callback function that might pass
itself to call_rcu().

However, if any CPU other than the current one is not in
dyntick-idle mode, fall back to simply checking (with fix to bug
noted by Lai Jiangshan).  Also, take advantage of last
grace-period forcing, the opportunity to do so noted by Steve
Rostedt.  And apply simplified #ifdef condition suggested by
Frederic Weisbecker.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-15-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpumask.h | 14 ++++++++++
 init/Kconfig            | 16 ++++++++++++
 kernel/rcutree.c        |  5 ++--
 kernel/rcutree_plugin.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 101 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d77b54733c5b..dbcee7647d9a 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -143,6 +143,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
 
 #define for_each_cpu(cpu, mask)			\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_not(cpu, mask)		\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_and(cpu, mask, and)	\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
 #else
@@ -202,6 +204,18 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
 		(cpu) = cpumask_next((cpu), (mask)),	\
 		(cpu) < nr_cpu_ids;)
 
+/**
+ * for_each_cpu_not - iterate over every cpu in a complemented mask
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask pointer
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_not(cpu, mask)				\
+	for ((cpu) = -1;					\
+		(cpu) = cpumask_next_zero((cpu), (mask)),	\
+		(cpu) < nr_cpu_ids;)
+
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
  * @cpu: the (optionally unsigned) integer iterator
diff --git a/init/Kconfig b/init/Kconfig
index d95ca7cd5d45..42bf914b325a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -396,6 +396,22 @@ config RCU_FANOUT_EXACT
 
 	  Say N if unsure.
 
+config RCU_FAST_NO_HZ
+	bool "Accelerate last non-dyntick-idle CPU's grace periods"
+	depends on TREE_RCU && NO_HZ && SMP
+	default n
+	help
+	  This option causes RCU to attempt to accelerate grace periods
+	  in order to allow the final CPU to enter dynticks-idle state
+	  more quickly.  On the other hand, this option increases the
+	  overhead of the dynticks-idle checking, particularly on systems
+	  with large numbers of CPUs.
+
+	  Say Y if energy efficiency is critically important, particularly
+	  	if you have relatively few CPUs.
+
+	  Say N if you are unsure.
+
 config TREE_RCU_TRACE
 	def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU )
 	select DEBUG_FS
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 099a255ede4c..29d88c08d875 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1550,10 +1550,9 @@ static int rcu_pending(int cpu)
 /*
  * Check to see if any future RCU-related work will need to be done
  * by the current CPU, even if none need be done immediately, returning
- * 1 if so.  This function is part of the RCU implementation; it is -not-
- * an exported member of the RCU API.
+ * 1 if so.
  */
-int rcu_needs_cpu(int cpu)
+static int rcu_needs_cpu_quick_check(int cpu)
 {
 	/* RCU callbacks either ready or pending? */
 	return per_cpu(rcu_sched_data, cpu).nxtlist ||
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index e77cdf321e13..a82566696b0b 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -906,3 +906,72 @@ static void __init __rcu_init_preempt(void)
 }
 
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+#if !defined(CONFIG_RCU_FAST_NO_HZ)
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so.  This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ *
+ * Because we have preemptible RCU, just check whether this CPU needs
+ * any flavor of RCU.  Do not chew up lots of CPU cycles with preemption
+ * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
+ */
+int rcu_needs_cpu(int cpu)
+{
+	return rcu_needs_cpu_quick_check(cpu);
+}
+
+#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
+
+#define RCU_NEEDS_CPU_FLUSHES 5
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so.  This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ *
+ * Because we are not supporting preemptible RCU, attempt to accelerate
+ * any current grace periods so that RCU no longer needs this CPU, but
+ * only if all other CPUs are already in dynticks-idle mode.  This will
+ * allow the CPU cores to be powered down immediately, as opposed to after
+ * waiting many milliseconds for grace periods to elapse.
+ */
+int rcu_needs_cpu(int cpu)
+{
+	int c = 1;
+	int i;
+	int thatcpu;
+
+	/* Don't bother unless we are the last non-dyntick-idle CPU. */
+	for_each_cpu_not(thatcpu, nohz_cpu_mask)
+		if (thatcpu != cpu)
+			return rcu_needs_cpu_quick_check(cpu);
+
+	/* Try to push remaining RCU-sched and RCU-bh callbacks through. */
+	for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) {
+		c = 0;
+		if (per_cpu(rcu_sched_data, cpu).nxtlist) {
+			rcu_sched_qs(cpu);
+			force_quiescent_state(&rcu_sched_state, 0);
+			__rcu_process_callbacks(&rcu_sched_state,
+						&per_cpu(rcu_sched_data, cpu));
+			c = !!per_cpu(rcu_sched_data, cpu).nxtlist;
+		}
+		if (per_cpu(rcu_bh_data, cpu).nxtlist) {
+			rcu_bh_qs(cpu);
+			force_quiescent_state(&rcu_bh_state, 0);
+			__rcu_process_callbacks(&rcu_bh_state,
+						&per_cpu(rcu_bh_data, cpu));
+			c = !!per_cpu(rcu_bh_data, cpu).nxtlist;
+		}
+	}
+
+	/* If RCU callbacks are still pending, RCU still needs this CPU. */
+	return c;
+}
+
+#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
-- 
cgit v1.2.3


From 3e58974027b04e84f68b964ef368a6cd758e2f84 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Thu, 25 Feb 2010 14:44:56 +0530
Subject: doc: fix typo in comment explaining rb_tree usage

Fix typo in comment explaining rb_tree usage.
s/int/in

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/rbtree.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 9c295411d01f..5210a5c60877 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -25,10 +25,10 @@
 
   Some example of insert and search follows here. The search is a plain
   normal search over an ordered tree. The insert instead must be implemented
-  int two steps: as first thing the code must insert the element in
-  order as a red leaf in the tree, then the support library function
-  rb_insert_color() must be called. Such function will do the
-  not trivial work to rebalance the rbtree if necessary.
+  in two steps: First, the code must insert the element in order as a red leaf
+  in the tree, and then the support library function rb_insert_color() must
+  be called. Such function will do the not trivial work to rebalance the
+  rbtree, if necessary.
 
 -----------------------------------------------------------------------
 static inline struct page * rb_search_page_cache(struct inode * inode,
-- 
cgit v1.2.3


From 86c38a31aa7f2dd6e74a262710bf8ebf7455acc5 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 24 Feb 2010 13:59:23 -0500
Subject: tracing: Fix ftrace_event_call alignment for use with gcc 4.5

GCC 4.5 introduces behavior that forces the alignment of structures to
 use the largest possible value. The default value is 32 bytes, so if
 some structures are defined with a 4-byte alignment and others aren't
 declared with an alignment constraint at all - it will align at 32-bytes.

 For things like the ftrace events, this results in a non-standard array.
 When initializing the ftrace subsystem, we traverse the _ftrace_events
 section and call the initialization callback for each event. When the
 structures are misaligned, we could be treating another part of the
 structure (or the zeroed out space between them) as a function pointer.

 This patch forces the alignment for all the ftrace_event_call structures
 to 4 bytes.

 Without this patch, the kernel fails to boot very early when built with
 gcc 4.5.

 It's trivial to check the alignment of the members of the array, so it
 might be worthwhile to add something to the build system to do that
 automatically. Unfortunately, that only covers this case. I've asked one
 of the gcc developers about adding a warning when this condition is seen.

Cc: stable@kernel.org
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
LKML-Reference: <4B85770B.6010901@suse.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h | 6 ++++--
 include/trace/ftrace.h   | 3 ++-
 kernel/trace/trace.h     | 3 ++-
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7b219696ad24..91bd7d78a07d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -132,7 +132,8 @@ struct perf_event_attr;
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
 	static const struct syscall_metadata __syscall_meta_##sname;	\
-	static struct ftrace_event_call event_enter_##sname;		\
+	static struct ftrace_event_call					\
+	__attribute__((__aligned__(4))) event_enter_##sname;		\
 	static struct trace_event enter_syscall_print_##sname = {	\
 		.trace                  = print_syscall_enter,		\
 	};								\
@@ -153,7 +154,8 @@ struct perf_event_attr;
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static const struct syscall_metadata __syscall_meta_##sname;	\
-	static struct ftrace_event_call event_exit_##sname;		\
+	static struct ftrace_event_call					\
+	__attribute__((__aligned__(4))) event_exit_##sname;		\
 	static struct trace_event exit_syscall_print_##sname = {	\
 		.trace                  = print_syscall_exit,		\
 	};								\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 09fd9afc0859..f23a0ca6910a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -65,7 +65,8 @@
 	};
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)	\
-	static struct ftrace_event_call event_##name
+	static struct ftrace_event_call			\
+	__attribute__((__aligned__(4))) event_##name
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b477fce41edf..fd05bcaf91b0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -792,7 +792,8 @@ extern const char *__stop___trace_bprintk_fmt[];
 
 #undef FTRACE_ENTRY
 #define FTRACE_ENTRY(call, struct_name, id, tstruct, print)		\
-	extern struct ftrace_event_call event_##call;
+	extern struct ftrace_event_call					\
+	__attribute__((__aligned__(4))) event_##call;
 #undef FTRACE_ENTRY_DUP
 #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print)		\
 	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
-- 
cgit v1.2.3


From afd66255b9a48f5851326ddae50e2203fbf71dc9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 25 Feb 2010 08:34:07 -0500
Subject: kprobes: Introduce kprobes jump optimization

Introduce kprobes jump optimization arch-independent parts.
Kprobes uses breakpoint instruction for interrupting execution
flow, on some architectures, it can be replaced by a jump
instruction and interruption emulation code. This gains kprobs'
performance drastically.

To enable this feature, set CONFIG_OPTPROBES=y (default y if the
arch supports OPTPROBE).

Changes in v9:
 - Fix a bug to optimize probe when enabling.
 - Check nearby probes can be optimize/unoptimize when disarming/arming
   kprobes, instead of registering/unregistering. This will help
   kprobe-tracer because most of probes on it are usually disabled.

Changes in v6:
 - Cleanup coding style for readability.
 - Add comments around get/put_online_cpus().

Changes in v5:
 - Use get_online_cpus()/put_online_cpus() for avoiding text_mutex
   deadlock.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Anders Kaseorg <andersk@ksplice.com>
Cc: Tim Abbott <tabbott@ksplice.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
LKML-Reference: <20100225133407.6725.81992.stgit@localhost6.localdomain6>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig            |  13 ++
 include/linux/kprobes.h |  36 ++++
 kernel/kprobes.c        | 461 ++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 459 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 9d055b4f0585..e0ad3caf16d9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -57,6 +57,17 @@ config KPROBES
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+config OPTPROBES
+	bool "Kprobes jump optimization support (EXPERIMENTAL)"
+	default y
+	depends on KPROBES
+	depends on !PREEMPT
+	depends on HAVE_OPTPROBES
+	select KALLSYMS_ALL
+	help
+	  This option will allow kprobes to optimize breakpoint to
+	  a jump for reducing its overhead.
+
 config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	bool
 	help
@@ -99,6 +110,8 @@ config HAVE_KPROBES
 config HAVE_KRETPROBES
 	bool
 
+config HAVE_OPTPROBES
+	bool
 #
 # An arch should select this if it provides all these things:
 #
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1b672f74a32f..aed1f95c582f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -122,6 +122,11 @@ struct kprobe {
 /* Kprobe status flags */
 #define KPROBE_FLAG_GONE	1 /* breakpoint has already gone */
 #define KPROBE_FLAG_DISABLED	2 /* probe is temporarily disabled */
+#define KPROBE_FLAG_OPTIMIZED	4 /*
+				   * probe is really optimized.
+				   * NOTE:
+				   * this flag is only for optimized_kprobe.
+				   */
 
 /* Has this kprobe gone ? */
 static inline int kprobe_gone(struct kprobe *p)
@@ -134,6 +139,12 @@ static inline int kprobe_disabled(struct kprobe *p)
 {
 	return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE);
 }
+
+/* Is this kprobe really running optimized path ? */
+static inline int kprobe_optimized(struct kprobe *p)
+{
+	return p->flags & KPROBE_FLAG_OPTIMIZED;
+}
 /*
  * Special probe type that uses setjmp-longjmp type tricks to resume
  * execution at a specified entry with a matching prototype corresponding
@@ -249,6 +260,31 @@ extern kprobe_opcode_t *get_insn_slot(void);
 extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
+#ifdef CONFIG_OPTPROBES
+/*
+ * Internal structure for direct jump optimized probe
+ */
+struct optimized_kprobe {
+	struct kprobe kp;
+	struct list_head list;	/* list for optimizing queue */
+	struct arch_optimized_insn optinsn;
+};
+
+/* Architecture dependent functions for direct jump optimization */
+extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
+extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
+extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
+extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
+extern int  arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
+extern kprobe_opcode_t *get_optinsn_slot(void);
+extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
+extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+					unsigned long addr);
+
+extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
+#endif /* CONFIG_OPTPROBES */
+
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
 void kretprobe_hash_lock(struct task_struct *tsk,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 78105623d739..612af2d61614 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -45,6 +45,7 @@
 #include <linux/kdebug.h>
 #include <linux/memory.h>
 #include <linux/ftrace.h>
+#include <linux/cpu.h>
 
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
@@ -280,6 +281,33 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
 	__free_insn_slot(&kprobe_insn_slots, slot, dirty);
 	mutex_unlock(&kprobe_insn_mutex);
 }
+#ifdef CONFIG_OPTPROBES
+/* For optimized_kprobe buffer */
+static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
+static struct kprobe_insn_cache kprobe_optinsn_slots = {
+	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
+	/* .insn_size is initialized later */
+	.nr_garbage = 0,
+};
+/* Get a slot for optimized_kprobe buffer */
+kprobe_opcode_t __kprobes *get_optinsn_slot(void)
+{
+	kprobe_opcode_t *ret = NULL;
+
+	mutex_lock(&kprobe_optinsn_mutex);
+	ret = __get_insn_slot(&kprobe_optinsn_slots);
+	mutex_unlock(&kprobe_optinsn_mutex);
+
+	return ret;
+}
+
+void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
+{
+	mutex_lock(&kprobe_optinsn_mutex);
+	__free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
+	mutex_unlock(&kprobe_optinsn_mutex);
+}
+#endif
 #endif
 
 /* We have preemption disabled.. so it is safe to use __ versions */
@@ -310,23 +338,324 @@ struct kprobe __kprobes *get_kprobe(void *addr)
 		if (p->addr == addr)
 			return p;
 	}
+
 	return NULL;
 }
 
+static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+/* Return true if the kprobe is an aggregator */
+static inline int kprobe_aggrprobe(struct kprobe *p)
+{
+	return p->pre_handler == aggr_pre_handler;
+}
+
+/*
+ * Keep all fields in the kprobe consistent
+ */
+static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+{
+	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
+	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+}
+
+#ifdef CONFIG_OPTPROBES
+/*
+ * Call all pre_handler on the list, but ignores its return value.
+ * This must be called from arch-dep optimized caller.
+ */
+void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe *kp;
+
+	list_for_each_entry_rcu(kp, &p->list, list) {
+		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
+			set_kprobe_instance(kp);
+			kp->pre_handler(kp, regs);
+		}
+		reset_kprobe_instance();
+	}
+}
+
+/* Return true(!0) if the kprobe is ready for optimization. */
+static inline int kprobe_optready(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	if (kprobe_aggrprobe(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		return arch_prepared_optinsn(&op->optinsn);
+	}
+
+	return 0;
+}
+
+/*
+ * Return an optimized kprobe whose optimizing code replaces
+ * instructions including addr (exclude breakpoint).
+ */
+struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
+{
+	int i;
+	struct kprobe *p = NULL;
+	struct optimized_kprobe *op;
+
+	/* Don't check i == 0, since that is a breakpoint case. */
+	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
+		p = get_kprobe((void *)(addr - i));
+
+	if (p && kprobe_optready(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		if (arch_within_optimized_kprobe(op, addr))
+			return p;
+	}
+
+	return NULL;
+}
+
+/* Optimization staging list, protected by kprobe_mutex */
+static LIST_HEAD(optimizing_list);
+
+static void kprobe_optimizer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+#define OPTIMIZE_DELAY 5
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+	struct optimized_kprobe *op, *tmp;
+
+	/* Lock modules while optimizing kprobes */
+	mutex_lock(&module_mutex);
+	mutex_lock(&kprobe_mutex);
+	if (kprobes_all_disarmed)
+		goto end;
+
+	/*
+	 * Wait for quiesence period to ensure all running interrupts
+	 * are done. Because optprobe may modify multiple instructions
+	 * there is a chance that Nth instruction is interrupted. In that
+	 * case, running interrupt can return to 2nd-Nth byte of jump
+	 * instruction. This wait is for avoiding it.
+	 */
+	synchronize_sched();
+
+	/*
+	 * The optimization/unoptimization refers online_cpus via
+	 * stop_machine() and cpu-hotplug modifies online_cpus.
+	 * And same time, text_mutex will be held in cpu-hotplug and here.
+	 * This combination can cause a deadlock (cpu-hotplug try to lock
+	 * text_mutex but stop_machine can not be done because online_cpus
+	 * has been changed)
+	 * To avoid this deadlock, we need to call get_online_cpus()
+	 * for preventing cpu-hotplug outside of text_mutex locking.
+	 */
+	get_online_cpus();
+	mutex_lock(&text_mutex);
+	list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
+		WARN_ON(kprobe_disabled(&op->kp));
+		if (arch_optimize_kprobe(op) < 0)
+			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+		list_del_init(&op->list);
+	}
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+end:
+	mutex_unlock(&kprobe_mutex);
+	mutex_unlock(&module_mutex);
+}
+
+/* Optimize kprobe if p is ready to be optimized */
+static __kprobes void optimize_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	/* Check if the kprobe is disabled or not ready for optimization. */
+	if (!kprobe_optready(p) ||
+	    (kprobe_disabled(p) || kprobes_all_disarmed))
+		return;
+
+	/* Both of break_handler and post_handler are not supported. */
+	if (p->break_handler || p->post_handler)
+		return;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+
+	/* Check there is no other kprobes at the optimized instructions */
+	if (arch_check_optimized_kprobe(op) < 0)
+		return;
+
+	/* Check if it is already optimized. */
+	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
+		return;
+
+	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
+	list_add(&op->list, &optimizing_list);
+	if (!delayed_work_pending(&optimizing_work))
+		schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
+/* Unoptimize a kprobe if p is optimized */
+static __kprobes void unoptimize_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		if (!list_empty(&op->list))
+			/* Dequeue from the optimization queue */
+			list_del_init(&op->list);
+		else
+			/* Replace jump with break */
+			arch_unoptimize_kprobe(op);
+		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+	}
+}
+
+/* Remove optimized instructions */
+static void __kprobes kill_optimized_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	if (!list_empty(&op->list)) {
+		/* Dequeue from the optimization queue */
+		list_del_init(&op->list);
+		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+	}
+	/* Don't unoptimize, because the target code will be freed. */
+	arch_remove_optimized_kprobe(op);
+}
+
+/* Try to prepare optimized instructions */
+static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	arch_prepare_optimized_kprobe(op);
+}
+
+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	arch_remove_optimized_kprobe(op);
+	kfree(op);
+}
+
+/* Allocate new optimized_kprobe and try to prepare optimized instructions */
+static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
+	if (!op)
+		return NULL;
+
+	INIT_LIST_HEAD(&op->list);
+	op->kp.addr = p->addr;
+	arch_prepare_optimized_kprobe(op);
+
+	return &op->kp;
+}
+
+static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
+
+/*
+ * Prepare an optimized_kprobe and optimize it
+ * NOTE: p must be a normal registered kprobe
+ */
+static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
+{
+	struct kprobe *ap;
+	struct optimized_kprobe *op;
+
+	ap = alloc_aggr_kprobe(p);
+	if (!ap)
+		return;
+
+	op = container_of(ap, struct optimized_kprobe, kp);
+	if (!arch_prepared_optinsn(&op->optinsn)) {
+		/* If failed to setup optimizing, fallback to kprobe */
+		free_aggr_kprobe(ap);
+		return;
+	}
+
+	init_aggr_kprobe(ap, p);
+	optimize_kprobe(ap);
+}
+
+static void __kprobes __arm_kprobe(struct kprobe *p)
+{
+	struct kprobe *old_p;
+
+	/* Check collision with other optimized kprobes */
+	old_p = get_optimized_kprobe((unsigned long)p->addr);
+	if (unlikely(old_p))
+		unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+
+	arch_arm_kprobe(p);
+	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
+}
+
+static void __kprobes __disarm_kprobe(struct kprobe *p)
+{
+	struct kprobe *old_p;
+
+	unoptimize_kprobe(p);	/* Try to unoptimize */
+	arch_disarm_kprobe(p);
+
+	/* If another kprobe was blocked, optimize it. */
+	old_p = get_optimized_kprobe((unsigned long)p->addr);
+	if (unlikely(old_p))
+		optimize_kprobe(old_p);
+}
+
+#else /* !CONFIG_OPTPROBES */
+
+#define optimize_kprobe(p)			do {} while (0)
+#define unoptimize_kprobe(p)			do {} while (0)
+#define kill_optimized_kprobe(p)		do {} while (0)
+#define prepare_optimized_kprobe(p)		do {} while (0)
+#define try_to_optimize_kprobe(p)		do {} while (0)
+#define __arm_kprobe(p)				arch_arm_kprobe(p)
+#define __disarm_kprobe(p)			arch_disarm_kprobe(p)
+
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+	kfree(p);
+}
+
+static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+{
+	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+}
+#endif /* CONFIG_OPTPROBES */
+
 /* Arm a kprobe with text_mutex */
 static void __kprobes arm_kprobe(struct kprobe *kp)
 {
+	/*
+	 * Here, since __arm_kprobe() doesn't use stop_machine(),
+	 * this doesn't cause deadlock on text_mutex. So, we don't
+	 * need get_online_cpus().
+	 */
 	mutex_lock(&text_mutex);
-	arch_arm_kprobe(kp);
+	__arm_kprobe(kp);
 	mutex_unlock(&text_mutex);
 }
 
 /* Disarm a kprobe with text_mutex */
 static void __kprobes disarm_kprobe(struct kprobe *kp)
 {
+	get_online_cpus();	/* For avoiding text_mutex deadlock */
 	mutex_lock(&text_mutex);
-	arch_disarm_kprobe(kp);
+	__disarm_kprobe(kp);
 	mutex_unlock(&text_mutex);
+	put_online_cpus();
 }
 
 /*
@@ -395,7 +724,7 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
 {
 	struct kprobe *kp;
-	if (p->pre_handler != aggr_pre_handler) {
+	if (!kprobe_aggrprobe(p)) {
 		p->nmissed++;
 	} else {
 		list_for_each_entry_rcu(kp, &p->list, list)
@@ -518,15 +847,6 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
 	free_rp_inst(rp);
 }
 
-/*
- * Keep all fields in the kprobe consistent
- */
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
-{
-	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
-	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
-}
-
 /*
 * Add the new probe to ap->list. Fail if this is the
 * second jprobe at the address - two jprobes can't coexist
@@ -534,6 +854,10 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
 static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
+
+	if (p->break_handler || p->post_handler)
+		unoptimize_kprobe(ap);	/* Fall back to normal kprobe */
+
 	if (p->break_handler) {
 		if (ap->break_handler)
 			return -EEXIST;
@@ -548,7 +872,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 		ap->flags &= ~KPROBE_FLAG_DISABLED;
 		if (!kprobes_all_disarmed)
 			/* Arm the breakpoint again. */
-			arm_kprobe(ap);
+			__arm_kprobe(ap);
 	}
 	return 0;
 }
@@ -557,12 +881,13 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
  * Fill in the required fields of the "manager kprobe". Replace the
  * earlier kprobe in the hlist with the manager kprobe
  */
-static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
+static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
+	/* Copy p's insn slot to ap */
 	copy_kprobe(p, ap);
 	flush_insn_slot(ap);
 	ap->addr = p->addr;
-	ap->flags = p->flags;
+	ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
 	ap->pre_handler = aggr_pre_handler;
 	ap->fault_handler = aggr_fault_handler;
 	/* We don't care the kprobe which has gone. */
@@ -572,8 +897,9 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 		ap->break_handler = aggr_break_handler;
 
 	INIT_LIST_HEAD(&ap->list);
-	list_add_rcu(&p->list, &ap->list);
+	INIT_HLIST_NODE(&ap->hlist);
 
+	list_add_rcu(&p->list, &ap->list);
 	hlist_replace_rcu(&p->hlist, &ap->hlist);
 }
 
@@ -587,12 +913,12 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 	int ret = 0;
 	struct kprobe *ap = old_p;
 
-	if (old_p->pre_handler != aggr_pre_handler) {
-		/* If old_p is not an aggr_probe, create new aggr_kprobe. */
-		ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+	if (!kprobe_aggrprobe(old_p)) {
+		/* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
+		ap = alloc_aggr_kprobe(old_p);
 		if (!ap)
 			return -ENOMEM;
-		add_aggr_kprobe(ap, old_p);
+		init_aggr_kprobe(ap, old_p);
 	}
 
 	if (kprobe_gone(ap)) {
@@ -611,6 +937,9 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 			 */
 			return ret;
 
+		/* Prepare optimized instructions if possible. */
+		prepare_optimized_kprobe(ap);
+
 		/*
 		 * Clear gone flag to prevent allocating new slot again, and
 		 * set disabled flag because it is not armed yet.
@@ -619,6 +948,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 			    | KPROBE_FLAG_DISABLED;
 	}
 
+	/* Copy ap's insn slot to p */
 	copy_kprobe(ap, p);
 	return add_new_kprobe(ap, p);
 }
@@ -769,27 +1099,34 @@ int __kprobes register_kprobe(struct kprobe *p)
 	p->nmissed = 0;
 	INIT_LIST_HEAD(&p->list);
 	mutex_lock(&kprobe_mutex);
+
+	get_online_cpus();	/* For avoiding text_mutex deadlock. */
+	mutex_lock(&text_mutex);
+
 	old_p = get_kprobe(p->addr);
 	if (old_p) {
+		/* Since this may unoptimize old_p, locking text_mutex. */
 		ret = register_aggr_kprobe(old_p, p);
 		goto out;
 	}
 
-	mutex_lock(&text_mutex);
 	ret = arch_prepare_kprobe(p);
 	if (ret)
-		goto out_unlock_text;
+		goto out;
 
 	INIT_HLIST_NODE(&p->hlist);
 	hlist_add_head_rcu(&p->hlist,
 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
 
 	if (!kprobes_all_disarmed && !kprobe_disabled(p))
-		arch_arm_kprobe(p);
+		__arm_kprobe(p);
+
+	/* Try to optimize kprobe */
+	try_to_optimize_kprobe(p);
 
-out_unlock_text:
-	mutex_unlock(&text_mutex);
 out:
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
 	mutex_unlock(&kprobe_mutex);
 
 	if (probed_mod)
@@ -811,7 +1148,7 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 		return -EINVAL;
 
 	if (old_p == p ||
-	    (old_p->pre_handler == aggr_pre_handler &&
+	    (kprobe_aggrprobe(old_p) &&
 	     list_is_singular(&old_p->list))) {
 		/*
 		 * Only probe on the hash list. Disarm only if kprobes are
@@ -819,7 +1156,7 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 		 * already have been removed. We save on flushing icache.
 		 */
 		if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
-			disarm_kprobe(p);
+			disarm_kprobe(old_p);
 		hlist_del_rcu(&old_p->hlist);
 	} else {
 		if (p->break_handler && !kprobe_gone(p))
@@ -835,8 +1172,13 @@ noclean:
 		list_del_rcu(&p->list);
 		if (!kprobe_disabled(old_p)) {
 			try_to_disable_aggr_kprobe(old_p);
-			if (!kprobes_all_disarmed && kprobe_disabled(old_p))
-				disarm_kprobe(old_p);
+			if (!kprobes_all_disarmed) {
+				if (kprobe_disabled(old_p))
+					disarm_kprobe(old_p);
+				else
+					/* Try to optimize this probe again */
+					optimize_kprobe(old_p);
+			}
 		}
 	}
 	return 0;
@@ -853,7 +1195,7 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 		old_p = list_entry(p->list.next, struct kprobe, list);
 		list_del(&p->list);
 		arch_remove_kprobe(old_p);
-		kfree(old_p);
+		free_aggr_kprobe(old_p);
 	}
 }
 
@@ -1149,7 +1491,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 	struct kprobe *kp;
 
 	p->flags |= KPROBE_FLAG_GONE;
-	if (p->pre_handler == aggr_pre_handler) {
+	if (kprobe_aggrprobe(p)) {
 		/*
 		 * If this is an aggr_kprobe, we have to list all the
 		 * chained probes and mark them GONE.
@@ -1158,6 +1500,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 			kp->flags |= KPROBE_FLAG_GONE;
 		p->post_handler = NULL;
 		p->break_handler = NULL;
+		kill_optimized_kprobe(p);
 	}
 	/*
 	 * Here, we can remove insn_slot safely, because no thread calls
@@ -1267,6 +1610,11 @@ static int __init init_kprobes(void)
 		}
 	}
 
+#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
+	/* Init kprobe_optinsn_slots */
+	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
+#endif
+
 	/* By default, kprobes are armed */
 	kprobes_all_disarmed = false;
 
@@ -1285,7 +1633,7 @@ static int __init init_kprobes(void)
 
 #ifdef CONFIG_DEBUG_FS
 static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
-		const char *sym, int offset,char *modname)
+		const char *sym, int offset, char *modname, struct kprobe *pp)
 {
 	char *kprobe_type;
 
@@ -1295,19 +1643,21 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
 		kprobe_type = "j";
 	else
 		kprobe_type = "k";
+
 	if (sym)
-		seq_printf(pi, "%p  %s  %s+0x%x  %s %s%s\n",
+		seq_printf(pi, "%p  %s  %s+0x%x  %s ",
 			p->addr, kprobe_type, sym, offset,
-			(modname ? modname : " "),
-			(kprobe_gone(p) ? "[GONE]" : ""),
-			((kprobe_disabled(p) && !kprobe_gone(p)) ?
-			 "[DISABLED]" : ""));
+			(modname ? modname : " "));
 	else
-		seq_printf(pi, "%p  %s  %p %s%s\n",
-			p->addr, kprobe_type, p->addr,
-			(kprobe_gone(p) ? "[GONE]" : ""),
-			((kprobe_disabled(p) && !kprobe_gone(p)) ?
-			 "[DISABLED]" : ""));
+		seq_printf(pi, "%p  %s  %p ",
+			p->addr, kprobe_type, p->addr);
+
+	if (!pp)
+		pp = p;
+	seq_printf(pi, "%s%s%s\n",
+		(kprobe_gone(p) ? "[GONE]" : ""),
+		((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
+		(kprobe_optimized(pp) ? "[OPTIMIZED]" : ""));
 }
 
 static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1343,11 +1693,11 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
 	hlist_for_each_entry_rcu(p, node, head, hlist) {
 		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
 					&offset, &modname, namebuf);
-		if (p->pre_handler == aggr_pre_handler) {
+		if (kprobe_aggrprobe(p)) {
 			list_for_each_entry_rcu(kp, &p->list, list)
-				report_probe(pi, kp, sym, offset, modname);
+				report_probe(pi, kp, sym, offset, modname, p);
 		} else
-			report_probe(pi, p, sym, offset, modname);
+			report_probe(pi, p, sym, offset, modname, NULL);
 	}
 	preempt_enable();
 	return 0;
@@ -1425,12 +1775,13 @@ int __kprobes enable_kprobe(struct kprobe *kp)
 		goto out;
 	}
 
-	if (!kprobes_all_disarmed && kprobe_disabled(p))
-		arm_kprobe(p);
-
-	p->flags &= ~KPROBE_FLAG_DISABLED;
 	if (p != kp)
 		kp->flags &= ~KPROBE_FLAG_DISABLED;
+
+	if (!kprobes_all_disarmed && kprobe_disabled(p)) {
+		p->flags &= ~KPROBE_FLAG_DISABLED;
+		arm_kprobe(p);
+	}
 out:
 	mutex_unlock(&kprobe_mutex);
 	return ret;
@@ -1450,12 +1801,13 @@ static void __kprobes arm_all_kprobes(void)
 	if (!kprobes_all_disarmed)
 		goto already_enabled;
 
+	/* Arming kprobes doesn't optimize kprobe itself */
 	mutex_lock(&text_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist)
 			if (!kprobe_disabled(p))
-				arch_arm_kprobe(p);
+				__arm_kprobe(p);
 	}
 	mutex_unlock(&text_mutex);
 
@@ -1482,16 +1834,23 @@ static void __kprobes disarm_all_kprobes(void)
 
 	kprobes_all_disarmed = true;
 	printk(KERN_INFO "Kprobes globally disabled\n");
+
+	/*
+	 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
+	 * because disarming may also unoptimize kprobes.
+	 */
+	get_online_cpus();
 	mutex_lock(&text_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist) {
 			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-				arch_disarm_kprobe(p);
+				__disarm_kprobe(p);
 		}
 	}
 
 	mutex_unlock(&text_mutex);
+	put_online_cpus();
 	mutex_unlock(&kprobe_mutex);
 	/* Allow all currently running kprobes to complete */
 	synchronize_sched();
-- 
cgit v1.2.3


From b2be84df99ebc93599c69e931a3c4a5105abfabc Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 25 Feb 2010 08:34:15 -0500
Subject: kprobes: Jump optimization sysctl interface

Add /proc/sys/debug/kprobes-optimization sysctl which enables
and disables kprobes jump optimization on the fly for debugging.

Changes in v7:
 - Remove ctl_name = CTL_UNNUMBERED for upstream compatibility.

Changes in v6:
- Update comments and coding style.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Anders Kaseorg <andersk@ksplice.com>
Cc: Tim Abbott <tabbott@ksplice.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
LKML-Reference: <20100225133415.6725.8274.stgit@localhost6.localdomain6>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kprobes.h |  8 +++++
 kernel/kprobes.c        | 88 +++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sysctl.c         | 12 +++++++
 3 files changed, 105 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index aed1f95c582f..e7d1b2e0070d 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -283,6 +283,14 @@ extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
 					unsigned long addr);
 
 extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+#ifdef CONFIG_SYSCTL
+extern int sysctl_kprobes_optimization;
+extern int proc_kprobes_optimization_handler(struct ctl_table *table,
+					     int write, void __user *buffer,
+					     size_t *length, loff_t *ppos);
+#endif
+
 #endif /* CONFIG_OPTPROBES */
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 612af2d61614..fa034d29cf73 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -42,6 +42,7 @@
 #include <linux/freezer.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/sysctl.h>
 #include <linux/kdebug.h>
 #include <linux/memory.h>
 #include <linux/ftrace.h>
@@ -360,6 +361,9 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
 }
 
 #ifdef CONFIG_OPTPROBES
+/* NOTE: change this value only with kprobe_mutex held */
+static bool kprobes_allow_optimization;
+
 /*
  * Call all pre_handler on the list, but ignores its return value.
  * This must be called from arch-dep optimized caller.
@@ -428,7 +432,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
 	/* Lock modules while optimizing kprobes */
 	mutex_lock(&module_mutex);
 	mutex_lock(&kprobe_mutex);
-	if (kprobes_all_disarmed)
+	if (kprobes_all_disarmed || !kprobes_allow_optimization)
 		goto end;
 
 	/*
@@ -471,7 +475,7 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
 	struct optimized_kprobe *op;
 
 	/* Check if the kprobe is disabled or not ready for optimization. */
-	if (!kprobe_optready(p) ||
+	if (!kprobe_optready(p) || !kprobes_allow_optimization ||
 	    (kprobe_disabled(p) || kprobes_all_disarmed))
 		return;
 
@@ -588,6 +592,80 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
 	optimize_kprobe(ap);
 }
 
+#ifdef CONFIG_SYSCTL
+static void __kprobes optimize_all_kprobes(void)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+	unsigned int i;
+
+	/* If optimization is already allowed, just return */
+	if (kprobes_allow_optimization)
+		return;
+
+	kprobes_allow_optimization = true;
+	mutex_lock(&text_mutex);
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+		head = &kprobe_table[i];
+		hlist_for_each_entry_rcu(p, node, head, hlist)
+			if (!kprobe_disabled(p))
+				optimize_kprobe(p);
+	}
+	mutex_unlock(&text_mutex);
+	printk(KERN_INFO "Kprobes globally optimized\n");
+}
+
+static void __kprobes unoptimize_all_kprobes(void)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+	unsigned int i;
+
+	/* If optimization is already prohibited, just return */
+	if (!kprobes_allow_optimization)
+		return;
+
+	kprobes_allow_optimization = false;
+	printk(KERN_INFO "Kprobes globally unoptimized\n");
+	get_online_cpus();	/* For avoiding text_mutex deadlock */
+	mutex_lock(&text_mutex);
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+		head = &kprobe_table[i];
+		hlist_for_each_entry_rcu(p, node, head, hlist) {
+			if (!kprobe_disabled(p))
+				unoptimize_kprobe(p);
+		}
+	}
+
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+	/* Allow all currently running kprobes to complete */
+	synchronize_sched();
+}
+
+int sysctl_kprobes_optimization;
+int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
+				      void __user *buffer, size_t *length,
+				      loff_t *ppos)
+{
+	int ret;
+
+	mutex_lock(&kprobe_mutex);
+	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+
+	if (sysctl_kprobes_optimization)
+		optimize_all_kprobes();
+	else
+		unoptimize_all_kprobes();
+	mutex_unlock(&kprobe_mutex);
+
+	return ret;
+}
+#endif /* CONFIG_SYSCTL */
+
 static void __kprobes __arm_kprobe(struct kprobe *p)
 {
 	struct kprobe *old_p;
@@ -1610,10 +1688,14 @@ static int __init init_kprobes(void)
 		}
 	}
 
-#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
+#if defined(CONFIG_OPTPROBES)
+#if defined(__ARCH_WANT_KPROBES_INSN_SLOT)
 	/* Init kprobe_optinsn_slots */
 	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
 #endif
+	/* By default, kprobes can be optimized */
+	kprobes_allow_optimization = true;
+#endif
 
 	/* By default, kprobes are armed */
 	kprobes_all_disarmed = false;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b2448468..40d791d616b5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -50,6 +50,7 @@
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
 #include <linux/perf_event.h>
+#include <linux/kprobes.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1449,6 +1450,17 @@ static struct ctl_table debug_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#endif
+#if defined(CONFIG_OPTPROBES)
+	{
+		.procname	= "kprobes-optimization",
+		.data		= &sysctl_kprobes_optimization,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_kprobes_optimization_handler,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #endif
 	{ }
 };
-- 
cgit v1.2.3


From d9f1bb6ad7fc53c406706f47858dd5ff030b14a3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 25 Feb 2010 14:06:47 -0800
Subject: rcu: Make rcu_read_lock_sched_held() take boot time into account

Before the scheduler starts, all tasks are non-preemptible by
definition. So, during that time, rcu_read_lock_sched_held()
needs to always return "true".  This patch makes that be so.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267135607-7056-2-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h |  4 +++-
 include/linux/rcutiny.h  |  4 ----
 include/linux/rcutree.h  |  1 -
 kernel/rcupdate.c        | 18 ++++++++++++++++++
 kernel/rcutree.c         | 19 -------------------
 5 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 1a4de31bd7b4..fcea332a8424 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -62,6 +62,8 @@ extern int sched_expedited_torture_stats(char *page);
 
 /* Internal to kernel */
 extern void rcu_init(void);
+extern int rcu_scheduler_active;
+extern void rcu_scheduler_starting(void);
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
@@ -140,7 +142,7 @@ static inline int rcu_read_lock_sched_held(void)
 
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || preempt_count() != 0;
+	return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active;
 }
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 2b70d4e37383..a5195875480a 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -105,10 +105,6 @@ static inline void rcu_exit_nohz(void)
 
 #endif /* #else #ifdef CONFIG_NO_HZ */
 
-static inline void rcu_scheduler_starting(void)
-{
-}
-
 static inline void exit_rcu(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 704a010f686c..42cc3a04779e 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,7 +35,6 @@ struct notifier_block;
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern int rcu_needs_cpu(int cpu);
-extern void rcu_scheduler_starting(void);
 extern int rcu_expedited_torture_stats(char *page);
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 033cb55c26df..7bfa004572b1 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,6 +44,7 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
+#include <linux/kernel_stat.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -62,6 +63,23 @@ struct lockdep_map rcu_sched_lock_map =
 EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
 #endif
 
+int rcu_scheduler_active __read_mostly;
+
+/*
+ * This function is invoked towards the end of the scheduler's initialization
+ * process.  Before this is called, the idle task might contain
+ * RCU read-side critical sections (during which time, this idle
+ * task is booting the system).  After this function is called, the
+ * idle tasks are prohibited from containing RCU read-side critical
+ * sections.
+ */
+void rcu_scheduler_starting(void)
+{
+	WARN_ON(num_online_cpus() != 1);
+	WARN_ON(nr_context_switches() > 0);
+	rcu_scheduler_active = 1;
+}
+
 /*
  * Awaken the corresponding synchronize_rcu() instance now that a
  * grace period has elapsed.
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 525d39810616..335bfe4f0076 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -46,7 +46,6 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/time.h>
-#include <linux/kernel_stat.h>
 
 #include "rcutree.h"
 
@@ -81,9 +80,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
-static int rcu_scheduler_active __read_mostly;
-
-
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
@@ -1565,21 +1561,6 @@ static int rcu_needs_cpu_quick_check(int cpu)
 	       rcu_preempt_needs_cpu(cpu);
 }
 
-/*
- * This function is invoked towards the end of the scheduler's initialization
- * process.  Before this is called, the idle task might contain
- * RCU read-side critical sections (during which time, this idle
- * task is booting the system).  After this function is called, the
- * idle tasks are prohibited from containing RCU read-side critical
- * sections.
- */
-void rcu_scheduler_starting(void)
-{
-	WARN_ON(num_online_cpus() != 1);
-	WARN_ON(nr_context_switches() > 0);
-	rcu_scheduler_active = 1;
-}
-
 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
 static atomic_t rcu_barrier_cpu_count;
 static DEFINE_MUTEX(rcu_barrier_mutex);
-- 
cgit v1.2.3


From fb90ef93df654f2678933efbbf864adac0ae490e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 24 Feb 2010 18:36:53 -0800
Subject: early_res: Add free_early_partial()

To free partial areas in pcpu_setup...

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <4B85E245.5030001@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c |  6 +++++
 include/linux/early_res.h      |  1 +
 kernel/early_res.c             | 55 ++++++++++++++++++++++++++++++++++++++++++
 mm/percpu.c                    |  3 ---
 4 files changed, 62 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 35abcb8b00e9..ef6370b00e70 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	u64 start = __pa(ptr);
+	u64 end = start + size;
+	free_early_partial(start, end);
+#else
 	free_bootmem(__pa(ptr), size);
+#endif
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/include/linux/early_res.h b/include/linux/early_res.h
index 50f7663bb8b1..29c09f57a13c 100644
--- a/include/linux/early_res.h
+++ b/include/linux/early_res.h
@@ -5,6 +5,7 @@
 extern void reserve_early(u64 start, u64 end, char *name);
 extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
 extern void free_early(u64 start, u64 end);
+void free_early_partial(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 
 void reserve_early_without_check(u64 start, u64 end, char *name);
diff --git a/kernel/early_res.c b/kernel/early_res.c
index aa5494ac4462..9ab11cd84853 100644
--- a/kernel/early_res.c
+++ b/kernel/early_res.c
@@ -61,6 +61,40 @@ static void __init drop_range(int i)
 	early_res_count--;
 }
 
+static void __init drop_range_partial(int i, u64 start, u64 end)
+{
+	u64 common_start, common_end;
+	u64 old_start, old_end;
+
+	old_start = early_res[i].start;
+	old_end = early_res[i].end;
+	common_start = max(old_start, start);
+	common_end = min(old_end, end);
+
+	/* no overlap ? */
+	if (common_start >= common_end)
+		return;
+
+	if (old_start < common_start) {
+		/* make head segment */
+		early_res[i].end = common_start;
+		if (old_end > common_end) {
+			/* add another for left over on tail */
+			reserve_early_without_check(common_end, old_end,
+					 early_res[i].name);
+		}
+		return;
+	} else {
+		if (old_end > common_end) {
+			/* reuse the entry for tail left */
+			early_res[i].start = common_end;
+			return;
+		}
+		/* all covered */
+		drop_range(i);
+	}
+}
+
 /*
  * Split any existing ranges that:
  *  1) are marked 'overlap_ok', and
@@ -284,6 +318,27 @@ void __init free_early(u64 start, u64 end)
 	drop_range(i);
 }
 
+void __init free_early_partial(u64 start, u64 end)
+{
+	struct early_res *r;
+	int i;
+
+try_next:
+	i = find_overlapped_early(start, end);
+	if (i >= max_early_res)
+		return;
+
+	r = &early_res[i];
+	/* hole ? */
+	if (r->end >= end && r->start <= start) {
+		drop_range_partial(i, start, end);
+		return;
+	}
+
+	drop_range_partial(i, start, end);
+	goto try_next;
+}
+
 #ifdef CONFIG_NO_BOOTMEM
 static void __init subtract_early_res(struct range *range, int az)
 {
diff --git a/mm/percpu.c b/mm/percpu.c
index 841defeeef86..083e7c91e5f6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1929,10 +1929,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
 			}
 			/* copy and return the unused part */
 			memcpy(ptr, __per_cpu_load, ai->static_size);
-#ifndef CONFIG_NO_BOOTMEM
-			/* fix partial free ! */
 			free_fn(ptr + size_sum, ai->unit_size - size_sum);
-#endif
 		}
 	}
 
-- 
cgit v1.2.3


From 492d4f25416528ffb900e6edf0fd70eafd098cfc Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 26 Feb 2010 00:16:05 -0800
Subject: Input: add KEY_WPS_BUTTON definition

The new key definition is supposed to be used for buttons that initiate
WiFi Protected setup sequence:

	http://en.wikipedia.org/wiki/Wi-Fi_Protected_Setup

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 8dc5d724c703..828c3feb7213 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -596,6 +596,7 @@ struct input_absinfo {
 #define KEY_NUMERIC_POUND	0x20b
 
 #define KEY_CAMERA_FOCUS	0x210
+#define KEY_WPS_BUTTON		0x211	/* WiFi Protected Setup key */
 
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING	KEY_MUTE
-- 
cgit v1.2.3


From 4b70858ba8d4537daf782defebe5f2ff80ccef2b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 26 Feb 2010 00:22:04 -0800
Subject: Input: atkbd - release previously reserved keycodes 248 - 254

Keycodes in 248 - 254 range were reserved for special needs (scrolling)
of atkbd driver. Now that the driver has been switched to use unsigned
short keycodes instead of unsigned char we can release this range back
into pull. We keep code 255 (ATKBD_KEY_NULL) reserved since users may
have been using it to silence keys they do not care about since atkbd
silently drops scancodes mapped to this keycode.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c | 26 +++++++++++++++-----------
 include/linux/input.h          |  2 +-
 2 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 326875be192e..d358ef8623f4 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -153,16 +153,16 @@ static const unsigned short atkbd_unxlate_table[128] = {
 #define ATKBD_RET_HANGEUL	0xf2
 #define ATKBD_RET_ERR		0xff
 
-#define ATKBD_KEY_UNKNOWN	  0
+#define ATKBD_KEY_UNKNOWN	0
 #define ATKBD_KEY_NULL		255
 
-#define ATKBD_SCR_1		254
-#define ATKBD_SCR_2		253
-#define ATKBD_SCR_4		252
-#define ATKBD_SCR_8		251
-#define ATKBD_SCR_CLICK		250
-#define ATKBD_SCR_LEFT		249
-#define ATKBD_SCR_RIGHT		248
+#define ATKBD_SCR_1		0xfffe
+#define ATKBD_SCR_2		0xfffd
+#define ATKBD_SCR_4		0xfffc
+#define ATKBD_SCR_8		0xfffb
+#define ATKBD_SCR_CLICK		0xfffa
+#define ATKBD_SCR_LEFT		0xfff9
+#define ATKBD_SCR_RIGHT		0xfff8
 
 #define ATKBD_SPECIAL		ATKBD_SCR_RIGHT
 
@@ -177,7 +177,7 @@ static const unsigned short atkbd_unxlate_table[128] = {
 #define ATKBD_XL_HANJA		0x20
 
 static const struct {
-	unsigned char keycode;
+	unsigned short keycode;
 	unsigned char set2;
 } atkbd_scroll_keys[] = {
 	{ ATKBD_SCR_1,     0xc5 },
@@ -1074,9 +1074,13 @@ static void atkbd_set_device_attrs(struct atkbd *atkbd)
 	input_dev->keycodesize = sizeof(unsigned short);
 	input_dev->keycodemax = ARRAY_SIZE(atkbd_set2_keycode);
 
-	for (i = 0; i < ATKBD_KEYMAP_SIZE; i++)
-		if (atkbd->keycode[i] && atkbd->keycode[i] < ATKBD_SPECIAL)
+	for (i = 0; i < ATKBD_KEYMAP_SIZE; i++) {
+		if (atkbd->keycode[i] != KEY_RESERVED &&
+		    atkbd->keycode[i] != ATKBD_KEY_NULL &&
+		    atkbd->keycode[i] < ATKBD_SPECIAL) {
 			__set_bit(atkbd->keycode[i], input_dev->keybit);
+		}
+	}
 }
 
 /*
diff --git a/include/linux/input.h b/include/linux/input.h
index 828c3feb7213..b1a74fb2e436 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -377,7 +377,7 @@ struct input_absinfo {
 
 #define KEY_WIMAX		246
 
-/* Range 248 - 255 is reserved for special needs of AT keyboard driver */
+/* Code 255 is reserved for special needs of AT keyboard driver */
 
 #define BTN_MISC		0x100
 #define BTN_0			0x100
-- 
cgit v1.2.3


From 52c793f24054f5dc30d228e37e0e19cc8313f086 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Mon, 22 Feb 2010 22:21:17 +0000
Subject: can: netlink support for bus-error reporting and counters

This patch makes the bus-error reporting configurable and allows to
retrieve the CAN TX and RX bus error counters via netlink interface.
I have added support for the SJA1000. The TX and RX bus error counters
are also copied to the data fields 6..7 of error messages when state
changes are reported.

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/dev.c             |  6 ++++++
 drivers/net/can/sja1000/sja1000.c | 25 ++++++++++++++++++++++---
 include/linux/can/dev.h           |  2 ++
 include/linux/can/netlink.h       | 18 ++++++++++++++----
 4 files changed, 44 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index f08f1202ff00..904aa369f80e 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -574,6 +574,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = {
 	[IFLA_CAN_BITTIMING_CONST]
 				= { .len = sizeof(struct can_bittiming_const) },
 	[IFLA_CAN_CLOCK]	= { .len = sizeof(struct can_clock) },
+	[IFLA_CAN_BERR_COUNTER]	= { .len = sizeof(struct can_berr_counter) },
 };
 
 static int can_changelink(struct net_device *dev,
@@ -649,6 +650,8 @@ static size_t can_get_size(const struct net_device *dev)
 	size += nla_total_size(sizeof(u32));  /* IFLA_CAN_RESTART_MS */
 	size += sizeof(struct can_bittiming); /* IFLA_CAN_BITTIMING */
 	size += sizeof(struct can_clock);     /* IFLA_CAN_CLOCK */
+	if (priv->do_get_berr_counter)        /* IFLA_CAN_BERR_COUNTER */
+		size += sizeof(struct can_berr_counter);
 	if (priv->bittiming_const)	      /* IFLA_CAN_BITTIMING_CONST */
 		size += sizeof(struct can_bittiming_const);
 
@@ -659,6 +662,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
 	struct can_priv *priv = netdev_priv(dev);
 	struct can_ctrlmode cm = {.flags = priv->ctrlmode};
+	struct can_berr_counter bec;
 	enum can_state state = priv->state;
 
 	if (priv->do_get_state)
@@ -669,6 +673,8 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	NLA_PUT(skb, IFLA_CAN_BITTIMING,
 		sizeof(priv->bittiming), &priv->bittiming);
 	NLA_PUT(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock);
+	if (priv->do_get_berr_counter && !priv->do_get_berr_counter(dev, &bec))
+		NLA_PUT(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec);
 	if (priv->bittiming_const)
 		NLA_PUT(skb, IFLA_CAN_BITTIMING_CONST,
 			sizeof(*priv->bittiming_const), priv->bittiming_const);
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index ace103a44833..145b1a731a53 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -130,8 +130,12 @@ static void set_normal_mode(struct net_device *dev)
 		/* check reset bit */
 		if ((status & MOD_RM) == 0) {
 			priv->can.state = CAN_STATE_ERROR_ACTIVE;
-			/* enable all interrupts */
-			priv->write_reg(priv, REG_IER, IRQ_ALL);
+			/* enable interrupts */
+			if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
+				priv->write_reg(priv, REG_IER, IRQ_ALL);
+			else
+				priv->write_reg(priv, REG_IER,
+						IRQ_ALL & ~IRQ_BEI);
 			return;
 		}
 
@@ -203,6 +207,17 @@ static int sja1000_set_bittiming(struct net_device *dev)
 	return 0;
 }
 
+static int sja1000_get_berr_counter(const struct net_device *dev,
+				    struct can_berr_counter *bec)
+{
+	struct sja1000_priv *priv = netdev_priv(dev);
+
+	bec->txerr = priv->read_reg(priv, REG_TXERR);
+	bec->rxerr = priv->read_reg(priv, REG_RXERR);
+
+	return 0;
+}
+
 /*
  * initialize SJA1000 chip:
  *   - reset chip
@@ -437,6 +452,8 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
 				CAN_ERR_CRTL_TX_PASSIVE :
 				CAN_ERR_CRTL_RX_PASSIVE;
 		}
+		cf->data[6] = txerr;
+		cf->data[7] = rxerr;
 	}
 
 	priv->can.state = state;
@@ -567,7 +584,9 @@ struct net_device *alloc_sja1000dev(int sizeof_priv)
 	priv->can.bittiming_const = &sja1000_bittiming_const;
 	priv->can.do_set_bittiming = sja1000_set_bittiming;
 	priv->can.do_set_mode = sja1000_set_mode;
-	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
+	priv->can.do_get_berr_counter = sja1000_get_berr_counter;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES |
+		CAN_CTRLMODE_BERR_REPORTING;
 
 	if (sizeof_priv)
 		priv->priv = (void *)priv + sizeof(struct sja1000_priv);
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index c8c660a79f90..6e5a7f00223d 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -47,6 +47,8 @@ struct can_priv {
 	int (*do_set_mode)(struct net_device *dev, enum can_mode mode);
 	int (*do_get_state)(const struct net_device *dev,
 			    enum can_state *state);
+	int (*do_get_berr_counter)(const struct net_device *dev,
+				   struct can_berr_counter *bec);
 
 	unsigned int echo_skb_max;
 	struct sk_buff **echo_skb;
diff --git a/include/linux/can/netlink.h b/include/linux/can/netlink.h
index c818335fbb13..3250de935e1a 100644
--- a/include/linux/can/netlink.h
+++ b/include/linux/can/netlink.h
@@ -69,6 +69,14 @@ enum can_state {
 	CAN_STATE_MAX
 };
 
+/*
+ * CAN bus error counters
+ */
+struct can_berr_counter {
+	__u16 txerr;
+	__u16 rxerr;
+};
+
 /*
  * CAN controller mode
  */
@@ -77,10 +85,11 @@ struct can_ctrlmode {
 	__u32 flags;
 };
 
-#define CAN_CTRLMODE_LOOPBACK	0x1	/* Loopback mode */
-#define CAN_CTRLMODE_LISTENONLY	0x2 	/* Listen-only mode */
-#define CAN_CTRLMODE_3_SAMPLES	0x4	/* Triple sampling mode */
-#define CAN_CTRLMODE_ONE_SHOT	0x8	/* One-Shot mode */
+#define CAN_CTRLMODE_LOOPBACK		0x01	/* Loopback mode */
+#define CAN_CTRLMODE_LISTENONLY		0x02 	/* Listen-only mode */
+#define CAN_CTRLMODE_3_SAMPLES		0x04	/* Triple sampling mode */
+#define CAN_CTRLMODE_ONE_SHOT		0x08	/* One-Shot mode */
+#define CAN_CTRLMODE_BERR_REPORTING	0x10	/* Bus-error reporting */
 
 /*
  * CAN device statistics
@@ -106,6 +115,7 @@ enum {
 	IFLA_CAN_CTRLMODE,
 	IFLA_CAN_RESTART_MS,
 	IFLA_CAN_RESTART,
+	IFLA_CAN_BERR_COUNTER,
 	__IFLA_CAN_MAX
 };
 
-- 
cgit v1.2.3


From d76a0812ac4139ceb54daab3cc70e1bd8bd9d43a Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 8 Feb 2010 17:06:01 +0200
Subject: perf_events: Add new start/stop PMU callbacks

In certain situations, the kernel may need to stop and start the same
event rapidly. The current PMU callbacks do not distinguish between stop
and release (i.e., stop + free the resource). Thus, a counter may be
released, then it will be immediately re-acquired. Event scheduling will
again take place with no guarantee to assign the same counter. On some
processors, this may event yield to failure to assign the event back due
to competion between cores.

This patch is adding a new pair of callback to stop and restart a counter
without actually release the underlying counter resource. On stop, the
counter is stopped, its values saved and that's it. On start, the value
is reloaded and counter is restarted (on x86, actual restart is delayed
until perf_enable()).

Signed-off-by: Stephane Eranian <eranian@google.com>
[ added fallback to ->enable/->disable for all other PMUs
  fixed x86_pmu_start() to call x86_pmu.enable()
  merged __x86_pmu_disable into x86_pmu_stop() ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4b703875.0a04d00a.7896.ffffb824@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 24 ++++++++++++++++++++----
 include/linux/perf_event.h       |  2 ++
 kernel/perf_event.c              | 20 ++++++++++++++++++--
 3 files changed, 40 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a920f173a220..9173ea95f918 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1495,7 +1495,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
 		hwc->last_tag == cpuc->tags[i];
 }
 
-static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
+static void x86_pmu_stop(struct perf_event *event);
 
 void hw_perf_enable(void)
 {
@@ -1533,7 +1533,7 @@ void hw_perf_enable(void)
 			    match_prev_assignment(hwc, cpuc, i))
 				continue;
 
-			__x86_pmu_disable(event, cpuc);
+			x86_pmu_stop(event);
 
 			hwc->idx = -1;
 		}
@@ -1801,6 +1801,19 @@ static int x86_pmu_enable(struct perf_event *event)
 	return 0;
 }
 
+static int x86_pmu_start(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->idx == -1)
+		return -EAGAIN;
+
+	x86_perf_event_set_period(event, hwc, hwc->idx);
+	x86_pmu.enable(hwc, hwc->idx);
+
+	return 0;
+}
+
 static void x86_pmu_unthrottle(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1924,8 +1937,9 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
 	event->pending_kill = POLL_IN;
 }
 
-static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
+static void x86_pmu_stop(struct perf_event *event)
 {
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
@@ -1954,7 +1968,7 @@ static void x86_pmu_disable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int i;
 
-	__x86_pmu_disable(event, cpuc);
+	x86_pmu_stop(event);
 
 	for (i = 0; i < cpuc->n_events; i++) {
 		if (event == cpuc->event_list[i]) {
@@ -2667,6 +2681,8 @@ static inline void x86_pmu_read(struct perf_event *event)
 static const struct pmu pmu = {
 	.enable		= x86_pmu_enable,
 	.disable	= x86_pmu_disable,
+	.start		= x86_pmu_start,
+	.stop		= x86_pmu_stop,
 	.read		= x86_pmu_read,
 	.unthrottle	= x86_pmu_unthrottle,
 };
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 071a7db52549..b08dfdad08cb 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -513,6 +513,8 @@ struct perf_event;
 struct pmu {
 	int (*enable)			(struct perf_event *event);
 	void (*disable)			(struct perf_event *event);
+	int (*start)			(struct perf_event *event);
+	void (*stop)			(struct perf_event *event);
 	void (*read)			(struct perf_event *event);
 	void (*unthrottle)		(struct perf_event *event);
 };
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 5a69abb05ac3..74c60021cdbc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1493,6 +1493,22 @@ do {					\
 	return div64_u64(dividend, divisor);
 }
 
+static void perf_event_stop(struct perf_event *event)
+{
+	if (!event->pmu->stop)
+		return event->pmu->disable(event);
+
+	return event->pmu->stop(event);
+}
+
+static int perf_event_start(struct perf_event *event)
+{
+	if (!event->pmu->start)
+		return event->pmu->enable(event);
+
+	return event->pmu->start(event);
+}
+
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -1513,9 +1529,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 
 	if (atomic64_read(&hwc->period_left) > 8*sample_period) {
 		perf_disable();
-		event->pmu->disable(event);
+		perf_event_stop(event);
 		atomic64_set(&hwc->period_left, 0);
-		event->pmu->enable(event);
+		perf_event_start(event);
 		perf_enable();
 	}
 }
-- 
cgit v1.2.3


From 6e37738a2fac964583debe91099bc3248554f6e5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 11 Feb 2010 13:21:58 +0100
Subject: perf_events: Simplify code by removing cpu argument to
 hw_perf_group_sched_in()

Since the cpu argument to hw_perf_group_sched_in() is always
smp_processor_id(), simplify the code a little by removing this argument
and using the current cpu where needed.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1265890918.5396.3.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c | 10 ++++-----
 arch/sparc/kernel/perf_event.c   | 10 ++++-----
 arch/x86/kernel/cpu/perf_event.c | 18 ++++++++--------
 include/linux/perf_event.h       |  2 +-
 kernel/perf_event.c              | 45 ++++++++++++++++------------------------
 5 files changed, 38 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 1eb85fbf53a5..b6cf8f1f4d35 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -718,10 +718,10 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 }
 
-static void event_sched_in(struct perf_event *event, int cpu)
+static void event_sched_in(struct perf_event *event)
 {
 	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = cpu;
+	event->oncpu = smp_processor_id();
 	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
 	if (is_software_event(event))
 		event->pmu->enable(event);
@@ -735,7 +735,7 @@ static void event_sched_in(struct perf_event *event, int cpu)
  */
 int hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx, int cpu)
+	       struct perf_event_context *ctx)
 {
 	struct cpu_hw_events *cpuhw;
 	long i, n, n0;
@@ -766,10 +766,10 @@ int hw_perf_group_sched_in(struct perf_event *group_leader,
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 	cpuctx->active_oncpu += n;
 	n = 1;
-	event_sched_in(group_leader, cpu);
+	event_sched_in(group_leader);
 	list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
 		if (sub->state != PERF_EVENT_STATE_OFF) {
-			event_sched_in(sub, cpu);
+			event_sched_in(sub);
 			++n;
 		}
 	}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index e856456ec02f..9f2b2bac8b2b 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -980,10 +980,10 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 }
 
-static void event_sched_in(struct perf_event *event, int cpu)
+static void event_sched_in(struct perf_event *event)
 {
 	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = cpu;
+	event->oncpu = smp_processor_id();
 	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
 	if (is_software_event(event))
 		event->pmu->enable(event);
@@ -991,7 +991,7 @@ static void event_sched_in(struct perf_event *event, int cpu)
 
 int hw_perf_group_sched_in(struct perf_event *group_leader,
 			   struct perf_cpu_context *cpuctx,
-			   struct perf_event_context *ctx, int cpu)
+			   struct perf_event_context *ctx)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct perf_event *sub;
@@ -1015,10 +1015,10 @@ int hw_perf_group_sched_in(struct perf_event *group_leader,
 
 	cpuctx->active_oncpu += n;
 	n = 1;
-	event_sched_in(group_leader, cpu);
+	event_sched_in(group_leader);
 	list_for_each_entry(sub, &group_leader->sibling_list, group_entry) {
 		if (sub->state != PERF_EVENT_STATE_OFF) {
-			event_sched_in(sub, cpu);
+			event_sched_in(sub);
 			n++;
 		}
 	}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index aa12f36e4711..ad096562d694 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2403,12 +2403,12 @@ done:
 }
 
 static int x86_event_sched_in(struct perf_event *event,
-			  struct perf_cpu_context *cpuctx, int cpu)
+			  struct perf_cpu_context *cpuctx)
 {
 	int ret = 0;
 
 	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = cpu;
+	event->oncpu = smp_processor_id();
 	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
 
 	if (!is_x86_event(event))
@@ -2424,7 +2424,7 @@ static int x86_event_sched_in(struct perf_event *event,
 }
 
 static void x86_event_sched_out(struct perf_event *event,
-			    struct perf_cpu_context *cpuctx, int cpu)
+			    struct perf_cpu_context *cpuctx)
 {
 	event->state = PERF_EVENT_STATE_INACTIVE;
 	event->oncpu = -1;
@@ -2452,9 +2452,9 @@ static void x86_event_sched_out(struct perf_event *event,
  */
 int hw_perf_group_sched_in(struct perf_event *leader,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx, int cpu)
+	       struct perf_event_context *ctx)
 {
-	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct perf_event *sub;
 	int assign[X86_PMC_IDX_MAX];
 	int n0, n1, ret;
@@ -2468,14 +2468,14 @@ int hw_perf_group_sched_in(struct perf_event *leader,
 	if (ret)
 		return ret;
 
-	ret = x86_event_sched_in(leader, cpuctx, cpu);
+	ret = x86_event_sched_in(leader, cpuctx);
 	if (ret)
 		return ret;
 
 	n1 = 1;
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		if (sub->state > PERF_EVENT_STATE_OFF) {
-			ret = x86_event_sched_in(sub, cpuctx, cpu);
+			ret = x86_event_sched_in(sub, cpuctx);
 			if (ret)
 				goto undo;
 			++n1;
@@ -2500,11 +2500,11 @@ int hw_perf_group_sched_in(struct perf_event *leader,
 	 */
 	return 1;
 undo:
-	x86_event_sched_out(leader, cpuctx, cpu);
+	x86_event_sched_out(leader, cpuctx);
 	n0  = 1;
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		if (sub->state == PERF_EVENT_STATE_ACTIVE) {
-			x86_event_sched_out(sub, cpuctx, cpu);
+			x86_event_sched_out(sub, cpuctx);
 			if (++n0 == n1)
 				break;
 		}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b08dfdad08cb..d0e072c5b58a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -772,7 +772,7 @@ extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
 extern int hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx, int cpu);
+	       struct perf_event_context *ctx);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fb4e56eb58f4..05b6c6b825e3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -103,7 +103,7 @@ void __weak hw_perf_event_setup_offline(int cpu)	{ barrier(); }
 int __weak
 hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx, int cpu)
+	       struct perf_event_context *ctx)
 {
 	return 0;
 }
@@ -633,14 +633,13 @@ void perf_event_disable(struct perf_event *event)
 static int
 event_sched_in(struct perf_event *event,
 		 struct perf_cpu_context *cpuctx,
-		 struct perf_event_context *ctx,
-		 int cpu)
+		 struct perf_event_context *ctx)
 {
 	if (event->state <= PERF_EVENT_STATE_OFF)
 		return 0;
 
 	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+	event->oncpu = smp_processor_id();
 	/*
 	 * The new state must be visible before we turn it on in the hardware:
 	 */
@@ -667,8 +666,7 @@ event_sched_in(struct perf_event *event,
 static int
 group_sched_in(struct perf_event *group_event,
 	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx,
-	       int cpu)
+	       struct perf_event_context *ctx)
 {
 	struct perf_event *event, *partial_group;
 	int ret;
@@ -676,18 +674,18 @@ group_sched_in(struct perf_event *group_event,
 	if (group_event->state == PERF_EVENT_STATE_OFF)
 		return 0;
 
-	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu);
+	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx);
 	if (ret)
 		return ret < 0 ? ret : 0;
 
-	if (event_sched_in(group_event, cpuctx, ctx, cpu))
+	if (event_sched_in(group_event, cpuctx, ctx))
 		return -EAGAIN;
 
 	/*
 	 * Schedule in siblings as one group (if any):
 	 */
 	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
-		if (event_sched_in(event, cpuctx, ctx, cpu)) {
+		if (event_sched_in(event, cpuctx, ctx)) {
 			partial_group = event;
 			goto group_error;
 		}
@@ -761,7 +759,6 @@ static void __perf_install_in_context(void *info)
 	struct perf_event *event = info;
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_event *leader = event->group_leader;
-	int cpu = smp_processor_id();
 	int err;
 
 	/*
@@ -808,7 +805,7 @@ static void __perf_install_in_context(void *info)
 	if (!group_can_go_on(event, cpuctx, 1))
 		err = -EEXIST;
 	else
-		err = event_sched_in(event, cpuctx, ctx, cpu);
+		err = event_sched_in(event, cpuctx, ctx);
 
 	if (err) {
 		/*
@@ -950,11 +947,9 @@ static void __perf_event_enable(void *info)
 	} else {
 		perf_disable();
 		if (event == leader)
-			err = group_sched_in(event, cpuctx, ctx,
-					     smp_processor_id());
+			err = group_sched_in(event, cpuctx, ctx);
 		else
-			err = event_sched_in(event, cpuctx, ctx,
-					       smp_processor_id());
+			err = event_sched_in(event, cpuctx, ctx);
 		perf_enable();
 	}
 
@@ -1281,19 +1276,18 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
 
 static void
 ctx_pinned_sched_in(struct perf_event_context *ctx,
-		    struct perf_cpu_context *cpuctx,
-		    int cpu)
+		    struct perf_cpu_context *cpuctx)
 {
 	struct perf_event *event;
 
 	list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
 		if (event->state <= PERF_EVENT_STATE_OFF)
 			continue;
-		if (event->cpu != -1 && event->cpu != cpu)
+		if (event->cpu != -1 && event->cpu != smp_processor_id())
 			continue;
 
 		if (group_can_go_on(event, cpuctx, 1))
-			group_sched_in(event, cpuctx, ctx, cpu);
+			group_sched_in(event, cpuctx, ctx);
 
 		/*
 		 * If this pinned group hasn't been scheduled,
@@ -1308,8 +1302,7 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
 
 static void
 ctx_flexible_sched_in(struct perf_event_context *ctx,
-		      struct perf_cpu_context *cpuctx,
-		      int cpu)
+		      struct perf_cpu_context *cpuctx)
 {
 	struct perf_event *event;
 	int can_add_hw = 1;
@@ -1322,11 +1315,11 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
 		 * Listen to the 'cpu' scheduling filter constraint
 		 * of events:
 		 */
-		if (event->cpu != -1 && event->cpu != cpu)
+		if (event->cpu != -1 && event->cpu != smp_processor_id())
 			continue;
 
 		if (group_can_go_on(event, cpuctx, can_add_hw))
-			if (group_sched_in(event, cpuctx, ctx, cpu))
+			if (group_sched_in(event, cpuctx, ctx))
 				can_add_hw = 0;
 	}
 }
@@ -1336,8 +1329,6 @@ ctx_sched_in(struct perf_event_context *ctx,
 	     struct perf_cpu_context *cpuctx,
 	     enum event_type_t event_type)
 {
-	int cpu = smp_processor_id();
-
 	raw_spin_lock(&ctx->lock);
 	ctx->is_active = 1;
 	if (likely(!ctx->nr_events))
@@ -1352,11 +1343,11 @@ ctx_sched_in(struct perf_event_context *ctx,
 	 * in order to give them the best chance of going on.
 	 */
 	if (event_type & EVENT_PINNED)
-		ctx_pinned_sched_in(ctx, cpuctx, cpu);
+		ctx_pinned_sched_in(ctx, cpuctx);
 
 	/* Then walk through the lower prio flexible groups */
 	if (event_type & EVENT_FLEXIBLE)
-		ctx_flexible_sched_in(ctx, cpuctx, cpu);
+		ctx_flexible_sched_in(ctx, cpuctx);
 
 	perf_enable();
  out:
-- 
cgit v1.2.3


From c79c5ffdce14abb4de3878c5aa8c3c6e5093c69b Mon Sep 17 00:00:00 2001
From: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 26 Feb 2010 01:54:20 +0000
Subject: ethtool: Add n-tuple string length to drvinfo and return it

The drvinfo struct should include the number of strings that
get_rx_ntuple will return.  It will be variable if an underlying
driver implements its own get_rx_ntuple routine, so userspace
needs to know how much data is coming.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 1 +
 net/core/ethtool.c      | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index cca1c3de140d..f7992a256b71 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -63,6 +63,7 @@ struct ethtool_drvinfo {
 	char	reserved2[12];
 	__u32	n_priv_flags;	/* number of flags valid in ETHTOOL_GPFLAGS */
 	__u32	n_stats;	/* number of u64's from ETHTOOL_GSTATS */
+	__u32	n_ntuples;	/* number of n-tuple filters from GSTRINGS */
 	__u32	testinfo_len;
 	__u32	eedump_len;	/* Size of data from ETHTOOL_GEEPROM (bytes) */
 	__u32	regdump_len;	/* Size of data from ETHTOOL_GREGS (bytes) */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 31b1eddc1b84..1c94f48e27b5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -224,6 +224,9 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 		rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
 		if (rc >= 0)
 			info.n_priv_flags = rc;
+		rc = ops->get_sset_count(dev, ETH_SS_NTUPLE_FILTERS);
+		if (rc >= 0)
+			info.n_ntuples = rc;
 	}
 	if (ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
-- 
cgit v1.2.3


From e751e76a5f7adeee7438e68b0965559ad2864d0d Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Feb 2010 00:20:36 -0500
Subject: block: Remove unused accessor function

blk_queue_max_hw_sectors is no longer called by any subsystem and can be
removed.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 9 ---------
 include/linux/blkdev.h | 1 -
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 605df9b3de8f..4db46f2fcbe5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -239,15 +239,6 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_hw_sectors)
 }
 EXPORT_SYMBOL(blk_queue_max_sectors);
 
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
-{
-	if (BLK_DEF_MAX_SECTORS > max_sectors)
-		q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
-	else
-		q->limits.max_hw_sectors = max_sectors;
-}
-EXPORT_SYMBOL(blk_queue_max_hw_sectors);
-
 /**
  * blk_queue_max_discard_sectors - set max sectors for a single discard
  * @q:  the request queue for the device
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2f17793048e7..c051cea2df07 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -922,7 +922,6 @@ extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
-extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
-- 
cgit v1.2.3


From eb28d31bc97e6374d81f404da309401ffaed467b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Feb 2010 00:20:37 -0500
Subject: block: Add BLK_ prefix to definitions

Add a BLK_ prefix to block layer constants.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c    |  4 ++--
 drivers/block/ps3vram.c |  4 ++--
 include/linux/blkdev.h  | 10 +++++++---
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4db46f2fcbe5..3c53b0beb8dd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -94,7 +94,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_phys_segments = MAX_PHYS_SEGMENTS;
 	lim->max_hw_segments = MAX_HW_SEGMENTS;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
-	lim->max_segment_size = MAX_SEGMENT_SIZE;
+	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = BLK_DEF_MAX_SECTORS;
 	lim->max_hw_sectors = INT_MAX;
 	lim->max_discard_sectors = 0;
@@ -154,7 +154,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->unplug_timer.data = (unsigned long)q;
 
 	blk_set_default_limits(&q->limits);
-	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
+	blk_queue_max_sectors(q, BLK_SAFE_MAX_SECTORS);
 
 	/*
 	 * If the caller didn't supply a lock, fall back to our embedded
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 1fb6c3135fc8..a7ecb43b16ab 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -753,8 +753,8 @@ static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
 	blk_queue_make_request(queue, ps3vram_make_request);
 	blk_queue_max_phys_segments(queue, MAX_PHYS_SEGMENTS);
 	blk_queue_max_hw_segments(queue, MAX_HW_SEGMENTS);
-	blk_queue_max_segment_size(queue, MAX_SEGMENT_SIZE);
-	blk_queue_max_sectors(queue, SAFE_MAX_SECTORS);
+	blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
+	blk_queue_max_sectors(queue, BLK_SAFE_MAX_SECTORS);
 
 	gendisk = alloc_disk(1);
 	if (!gendisk) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c051cea2df07..5d378627f446 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1016,11 +1016,15 @@ extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
 #define MAX_PHYS_SEGMENTS 128
 #define MAX_HW_SEGMENTS 128
 #define SAFE_MAX_SECTORS 255
-#define BLK_DEF_MAX_SECTORS 1024
-
 #define MAX_SEGMENT_SIZE	65536
 
-#define BLK_SEG_BOUNDARY_MASK	0xFFFFFFFFUL
+enum blk_default_limits {
+	BLK_MAX_SEGMENTS	= 128,
+	BLK_SAFE_MAX_SECTORS	= 255,
+	BLK_DEF_MAX_SECTORS	= 1024,
+	BLK_MAX_SEGMENT_SIZE	= 65536,
+	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
+};
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-- 
cgit v1.2.3


From 086fa5ff0854c676ec333760f4c0154b3b242616 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Feb 2010 00:20:38 -0500
Subject: block: Rename blk_queue_max_sectors to blk_queue_max_hw_sectors

The block layer calling convention is blk_queue_<limit name>.
blk_queue_max_sectors predates this practice, leading to some confusion.
Rename the function to appropriately reflect that its intended use is to
set max_hw_sectors.

Also introduce a temporary wrapper for backwards compability.  This can
be removed after the merge window is closed.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/um/drivers/ubd_kern.c          | 2 +-
 block/blk-settings.c                | 8 ++++----
 drivers/ata/libata-scsi.c           | 2 +-
 drivers/block/DAC960.c              | 2 +-
 drivers/block/brd.c                 | 2 +-
 drivers/block/cciss.c               | 2 +-
 drivers/block/drbd/drbd_nl.c        | 2 +-
 drivers/block/floppy.c              | 2 +-
 drivers/block/hd.c                  | 2 +-
 drivers/block/mg_disk.c             | 2 +-
 drivers/block/paride/pd.c           | 2 +-
 drivers/block/pktcdvd.c             | 4 ++--
 drivers/block/ps3disk.c             | 2 +-
 drivers/block/ps3vram.c             | 2 +-
 drivers/block/sunvdc.c              | 2 +-
 drivers/block/ub.c                  | 2 +-
 drivers/block/viodasd.c             | 2 +-
 drivers/block/xd.c                  | 2 +-
 drivers/block/xen-blkfront.c        | 2 +-
 drivers/cdrom/viocd.c               | 2 +-
 drivers/firewire/sbp2.c             | 2 +-
 drivers/ide/ide-disk.c              | 2 +-
 drivers/ide/ide-floppy.c            | 4 ++--
 drivers/ide/ide-probe.c             | 2 +-
 drivers/ieee1394/sbp2.c             | 2 +-
 drivers/md/linear.c                 | 2 +-
 drivers/md/multipath.c              | 4 ++--
 drivers/md/raid0.c                  | 4 ++--
 drivers/md/raid1.c                  | 4 ++--
 drivers/md/raid10.c                 | 4 ++--
 drivers/memstick/core/mspro_block.c | 2 +-
 drivers/message/i2o/i2o_block.c     | 2 +-
 drivers/mmc/card/queue.c            | 4 ++--
 drivers/s390/block/dasd.c           | 2 +-
 drivers/s390/char/tape_block.c      | 2 +-
 drivers/scsi/ipr.c                  | 2 +-
 drivers/scsi/pmcraid.c              | 2 +-
 drivers/scsi/scsi_lib.c             | 2 +-
 drivers/scsi/scsi_scan.c            | 2 +-
 drivers/usb/storage/scsiglue.c      | 6 +++---
 include/linux/blkdev.h              | 9 ++++++++-
 41 files changed, 60 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 5ff554677f40..c2051b0737cb 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -747,7 +747,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 	ubd_dev->fd = fd;
 
 	if(ubd_dev->cow.file != NULL){
-		blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
+		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 
 		err = -ENOMEM;
 		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 3c53b0beb8dd..61afae9dbc6d 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -154,7 +154,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->unplug_timer.data = (unsigned long)q;
 
 	blk_set_default_limits(&q->limits);
-	blk_queue_max_sectors(q, BLK_SAFE_MAX_SECTORS);
+	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
 
 	/*
 	 * If the caller didn't supply a lock, fall back to our embedded
@@ -210,7 +210,7 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
 /**
- * blk_queue_max_sectors - set max sectors for a request for this queue
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
  * @max_hw_sectors:  max hardware sectors in the usual 512b unit
  *
@@ -225,7 +225,7 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
  *    per-device basis in /sys/block/<device>/queue/max_sectors_kb.
  *    The soft limit can not exceed max_hw_sectors.
  **/
-void blk_queue_max_sectors(struct request_queue *q, unsigned int max_hw_sectors)
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
 {
 	if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
@@ -237,7 +237,7 @@ void blk_queue_max_sectors(struct request_queue *q, unsigned int max_hw_sectors)
 	q->limits.max_sectors = min_t(unsigned int, max_hw_sectors,
 				      BLK_DEF_MAX_SECTORS);
 }
-EXPORT_SYMBOL(blk_queue_max_sectors);
+EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
 /**
  * blk_queue_max_discard_sectors - set max sectors for a single discard
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index d096fbcbc771..bea003a24d27 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1097,7 +1097,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 		dev->flags |= ATA_DFLAG_NO_UNLOAD;
 
 	/* configure max sectors */
-	blk_queue_max_sectors(sdev->request_queue, dev->max_sectors);
+	blk_queue_max_hw_sectors(sdev->request_queue, dev->max_sectors);
 
 	if (dev->class == ATA_DEV_ATAPI) {
 		struct request_queue *q = sdev->request_queue;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 7412b5d4f5f3..1c0cd35e1913 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2535,7 +2535,7 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
   	RequestQueue->queuedata = Controller;
   	blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit);
 	blk_queue_max_phys_segments(RequestQueue, Controller->DriverScatterGatherLimit);
-	blk_queue_max_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
+	blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
 	disk->queue = RequestQueue;
 	sprintf(disk->disk_name, "rd/c%dd%d", Controller->ControllerNumber, n);
 	disk->major = MajorNumber;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 4f688434daf1..c6ddeacb77fd 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -434,7 +434,7 @@ static struct brd_device *brd_alloc(int i)
 		goto out_free_dev;
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
 	blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL);
-	blk_queue_max_sectors(brd->brd_queue, 1024);
+	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
 	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
 	disk = brd->brd_disk = alloc_disk(1 << part_shift);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 86acdca5d0ce..030e52d72254 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1802,7 +1802,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	/* This is a limit in the driver and could be eliminated. */
 	blk_queue_max_phys_segments(disk->queue, h->maxsgentries);
 
-	blk_queue_max_sectors(disk->queue, h->cciss_max_sectors);
+	blk_queue_max_hw_sectors(disk->queue, h->cciss_max_sectors);
 
 	blk_queue_softirq_done(disk->queue, cciss_softirq_done);
 
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 1292e0620663..9b55e64196fc 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -709,7 +709,7 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
 
 	max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
 
-	blk_queue_max_sectors(q, max_seg_s >> 9);
+	blk_queue_max_hw_sectors(q, max_seg_s >> 9);
 	blk_queue_max_phys_segments(q, max_segments ? max_segments : MAX_PHYS_SEGMENTS);
 	blk_queue_max_hw_segments(q, max_segments ? max_segments : MAX_HW_SEGMENTS);
 	blk_queue_max_segment_size(q, max_seg_s);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 3266b4f65daa..b9b117059b62 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4234,7 +4234,7 @@ static int __init floppy_init(void)
 		err = -ENOMEM;
 		goto out_unreg_driver;
 	}
-	blk_queue_max_sectors(floppy_queue, 64);
+	blk_queue_max_hw_sectors(floppy_queue, 64);
 
 	blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
 			    floppy_find, NULL, NULL);
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index d5cdce08ffd2..5116c65c07cb 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -719,7 +719,7 @@ static int __init hd_init(void)
 		return -ENOMEM;
 	}
 
-	blk_queue_max_sectors(hd_queue, 255);
+	blk_queue_max_hw_sectors(hd_queue, 255);
 	init_timer(&device_timer);
 	device_timer.function = hd_times_out;
 	blk_queue_logical_block_size(hd_queue, 512);
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 02b2583df7fc..5416c9a606e4 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -980,7 +980,7 @@ static int mg_probe(struct platform_device *plat_dev)
 				__func__, __LINE__);
 		goto probe_err_6;
 	}
-	blk_queue_max_sectors(host->breq, MG_MAX_SECTS);
+	blk_queue_max_hw_sectors(host->breq, MG_MAX_SECTS);
 	blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE);
 
 	init_timer(&host->timer);
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 569e39e8f114..e712cd51af15 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -906,7 +906,7 @@ static int __init pd_init(void)
 	if (!pd_queue)
 		goto out1;
 
-	blk_queue_max_sectors(pd_queue, cluster);
+	blk_queue_max_hw_sectors(pd_queue, cluster);
 
 	if (register_blkdev(major, name))
 		goto out2;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7cd2973ebb7b..6e1daa24da2f 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2312,7 +2312,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 		 * even if the size is a multiple of the packet size.
 		 */
 		spin_lock_irq(q->queue_lock);
-		blk_queue_max_sectors(q, pd->settings.size);
+		blk_queue_max_hw_sectors(q, pd->settings.size);
 		spin_unlock_irq(q->queue_lock);
 		set_bit(PACKET_WRITABLE, &pd->flags);
 	} else {
@@ -2613,7 +2613,7 @@ static void pkt_init_queue(struct pktcdvd_device *pd)
 
 	blk_queue_make_request(q, pkt_make_request);
 	blk_queue_logical_block_size(q, CD_FRAMESIZE);
-	blk_queue_max_sectors(q, PACKET_MAX_SECTORS);
+	blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
 	blk_queue_merge_bvec(q, pkt_merge_bvec);
 	q->queuedata = pd;
 }
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 03a130dca8ab..9cd1a4a542b8 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -474,7 +474,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
 
 	blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH);
 
-	blk_queue_max_sectors(queue, dev->bounce_size >> 9);
+	blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
 	blk_queue_segment_boundary(queue, -1UL);
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
 	blk_queue_logical_block_size(queue, dev->blk_size);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index a7ecb43b16ab..6416b262934b 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -754,7 +754,7 @@ static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
 	blk_queue_max_phys_segments(queue, MAX_PHYS_SEGMENTS);
 	blk_queue_max_hw_segments(queue, MAX_HW_SEGMENTS);
 	blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
-	blk_queue_max_sectors(queue, BLK_SAFE_MAX_SECTORS);
+	blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
 
 	gendisk = alloc_disk(1);
 	if (!gendisk) {
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 411f064760b4..dd30cddd0f7f 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -693,7 +693,7 @@ static int probe_disk(struct vdc_port *port)
 
 	blk_queue_max_hw_segments(q, port->ring_cookies);
 	blk_queue_max_phys_segments(q, port->ring_cookies);
-	blk_queue_max_sectors(q, port->max_xfer_size);
+	blk_queue_max_hw_sectors(q, port->max_xfer_size);
 	g->major = vdc_major;
 	g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
 	strcpy(g->disk_name, port->disk_name);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index d86d1357ccef..352ea24d66e8 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -2323,7 +2323,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
 	blk_queue_max_hw_segments(q, UB_MAX_REQ_SG);
 	blk_queue_max_phys_segments(q, UB_MAX_REQ_SG);
 	blk_queue_segment_boundary(q, 0xffffffff);	/* Dubious. */
-	blk_queue_max_sectors(q, UB_MAX_SECTORS);
+	blk_queue_max_hw_sectors(q, UB_MAX_SECTORS);
 	blk_queue_logical_block_size(q, lun->capacity.bsize);
 
 	lun->disk = disk;
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index a8c8b56b275e..d44ece7d7b7c 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -473,7 +473,7 @@ retry:
 	d->disk = g;
 	blk_queue_max_hw_segments(q, VIOMAXBLOCKDMA);
 	blk_queue_max_phys_segments(q, VIOMAXBLOCKDMA);
-	blk_queue_max_sectors(q, VIODASD_MAXSECTORS);
+	blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS);
 	g->major = VIODASD_MAJOR;
 	g->first_minor = dev_no << PARTITION_SHIFT;
 	if (dev_no >= 26)
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index d1fd032e7514..1a325fb05c92 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -242,7 +242,7 @@ static int __init xd_init(void)
 	}
 
 	/* xd_maxsectors depends on controller - so set after detection */
-	blk_queue_max_sectors(xd_queue, xd_maxsectors);
+	blk_queue_max_hw_sectors(xd_queue, xd_maxsectors);
 
 	for (i = 0; i < xd_drives; i++)
 		add_disk(xd_gendisk[i]);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a84702d1a35e..f9861aaa1fef 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -346,7 +346,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
-	blk_queue_max_sectors(rq, 512);
+	blk_queue_max_hw_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
 	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 57ca69e0ac55..b1dfd23eb832 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -618,7 +618,7 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 			sizeof(gendisk->disk_name));
 	blk_queue_max_hw_segments(q, 1);
 	blk_queue_max_phys_segments(q, 1);
-	blk_queue_max_sectors(q, 4096 / 512);
+	blk_queue_max_hw_sectors(q, 4096 / 512);
 	gendisk->queue = q;
 	gendisk->fops = &viocd_fops;
 	gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index d485cdd8cbac..70fef40cd22f 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1571,7 +1571,7 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
 		sdev->start_stop_pwr_cond = 1;
 
 	if (lu->tgt->workarounds & SBP2_WORKAROUND_128K_MAX_TRANS)
-		blk_queue_max_sectors(sdev->request_queue, 128 * 1024 / 512);
+		blk_queue_max_hw_sectors(sdev->request_queue, 128 * 1024 / 512);
 
 	blk_queue_max_segment_size(sdev->request_queue, SBP2_MAX_SEG_SIZE);
 
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7f878017b736..3b128dce9c3a 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -679,7 +679,7 @@ static void ide_disk_setup(ide_drive_t *drive)
 		if (max_s > hwif->rqsize)
 			max_s = hwif->rqsize;
 
-		blk_queue_max_sectors(q, max_s);
+		blk_queue_max_hw_sectors(q, max_s);
 	}
 
 	printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name,
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index fefbdfc8db06..efd907623469 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -486,7 +486,7 @@ static void ide_floppy_setup(ide_drive_t *drive)
 		drive->atapi_flags |= IDE_AFLAG_ZIP_DRIVE;
 		/* This value will be visible in the /proc/ide/hdx/settings */
 		drive->pc_delay = IDEFLOPPY_PC_DELAY;
-		blk_queue_max_sectors(drive->queue, 64);
+		blk_queue_max_hw_sectors(drive->queue, 64);
 	}
 
 	/*
@@ -494,7 +494,7 @@ static void ide_floppy_setup(ide_drive_t *drive)
 	 * nasty clicking noises without it, so please don't remove this.
 	 */
 	if (strncmp((char *)&id[ATA_ID_PROD], "IOMEGA Clik!", 11) == 0) {
-		blk_queue_max_sectors(drive->queue, 64);
+		blk_queue_max_hw_sectors(drive->queue, 64);
 		drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
 		/* IOMEGA Clik! drives do not support lock/unlock commands */
 		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 4d76ba473097..1ec8b31277bd 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -774,7 +774,7 @@ static int ide_init_queue(ide_drive_t *drive)
 
 	if (hwif->rqsize < max_sectors)
 		max_sectors = hwif->rqsize;
-	blk_queue_max_sectors(q, max_sectors);
+	blk_queue_max_hw_sectors(q, max_sectors);
 
 #ifdef CONFIG_PCI
 	/* When we have an IOMMU, we may have a problem where pci_map_sg()
diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index f199896c4113..c88696a6cf8a 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -2020,7 +2020,7 @@ static int sbp2scsi_slave_configure(struct scsi_device *sdev)
 	if (lu->workarounds & SBP2_WORKAROUND_POWER_CONDITION)
 		sdev->start_stop_pwr_cond = 1;
 	if (lu->workarounds & SBP2_WORKAROUND_128K_MAX_TRANS)
-		blk_queue_max_sectors(sdev->request_queue, 128 * 1024 / 512);
+		blk_queue_max_hw_sectors(sdev->request_queue, 128 * 1024 / 512);
 
 	blk_queue_max_segment_size(sdev->request_queue, SBP2_MAX_SEG_SIZE);
 	return 0;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 00435bd20699..af2d39d603c7 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -177,7 +177,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+			blk_queue_max_hw_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		conf->array_sectors += rdev->sectors;
 		cnt++;
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 32a662fc55c9..4b323f45ad74 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -308,7 +308,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 		 */
 			if (q->merge_bvec_fn &&
 			    queue_max_sectors(q) > (PAGE_SIZE>>9))
-				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+				blk_queue_max_hw_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			conf->working_disks++;
 			mddev->degraded--;
@@ -478,7 +478,7 @@ static int multipath_run (mddev_t *mddev)
 		 * a merge_bvec_fn to be involved in multipath */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+			blk_queue_max_hw_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		if (!test_bit(Faulty, &rdev->flags))
 			conf->working_disks++;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 77605cdceaf1..a1f7147b757f 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -182,7 +182,7 @@ static int create_strip_zones(mddev_t *mddev)
 
 		if (rdev1->bdev->bd_disk->queue->merge_bvec_fn &&
 		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+			blk_queue_max_hw_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		if (!smallest || (rdev1->sectors < smallest->sectors))
 			smallest = rdev1;
@@ -325,7 +325,7 @@ static int raid0_run(mddev_t *mddev)
 	}
 	if (md_check_no_bitmap(mddev))
 		return -EINVAL;
-	blk_queue_max_sectors(mddev->queue, mddev->chunk_sectors);
+	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 	mddev->queue->queue_lock = &mddev->queue->__queue_lock;
 
 	ret = create_strip_zones(mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 859bd3ffe435..5a06122abd3b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1158,7 +1158,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			 */
 			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 			    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+				blk_queue_max_hw_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
@@ -2103,7 +2103,7 @@ static int run(mddev_t *mddev)
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+			blk_queue_max_hw_sectors(mddev->queue, PAGE_SIZE>>9);
 	}
 
 	mddev->degraded = 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d119b7b75e71..7584f9ab9bcf 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1161,7 +1161,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			 */
 			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 			    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+				blk_queue_max_hw_sectors(mddev->queue, PAGE_SIZE>>9);
 
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
@@ -2260,7 +2260,7 @@ static int run(mddev_t *mddev)
 		 */
 		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 		    queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
-			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
+			blk_queue_max_hw_sectors(mddev->queue, PAGE_SIZE>>9);
 
 		disk->head_position = 0;
 	}
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index bd83fa0a4970..44d4178c4c15 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1226,7 +1226,7 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	blk_queue_prep_rq(msb->queue, mspro_block_prepare_req);
 
 	blk_queue_bounce_limit(msb->queue, limit);
-	blk_queue_max_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
+	blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
 	blk_queue_max_phys_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
 	blk_queue_max_hw_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
 	blk_queue_max_segment_size(msb->queue,
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index e39986a78273..d033cfdb516f 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -1066,7 +1066,7 @@ static int i2o_block_probe(struct device *dev)
 	queue->queuedata = i2o_blk_dev;
 
 	blk_queue_max_phys_segments(queue, I2O_MAX_PHYS_SEGMENTS);
-	blk_queue_max_sectors(queue, max_sectors);
+	blk_queue_max_hw_sectors(queue, max_sectors);
 	blk_queue_max_hw_segments(queue, i2o_sg_tablesize(c, body_size));
 
 	osm_debug("max sectors = %d\n", queue->max_sectors);
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index c5a7a855f4b1..09b633d5657b 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -154,7 +154,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 
 		if (mq->bounce_buf) {
 			blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
-			blk_queue_max_sectors(mq->queue, bouncesz / 512);
+			blk_queue_max_hw_sectors(mq->queue, bouncesz / 512);
 			blk_queue_max_phys_segments(mq->queue, bouncesz / 512);
 			blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
 			blk_queue_max_segment_size(mq->queue, bouncesz);
@@ -180,7 +180,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 
 	if (!mq->bounce_buf) {
 		blk_queue_bounce_limit(mq->queue, limit);
-		blk_queue_max_sectors(mq->queue,
+		blk_queue_max_hw_sectors(mq->queue,
 			min(host->max_blk_count, host->max_req_size / 512));
 		blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
 		blk_queue_max_hw_segments(mq->queue, host->max_hw_segs);
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 5905936c7c60..14b1e25b9dcf 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2129,7 +2129,7 @@ static void dasd_setup_queue(struct dasd_block *block)
 
 	blk_queue_logical_block_size(block->request_queue, block->bp_block);
 	max = block->base->discipline->max_blocks << block->s2b_shift;
-	blk_queue_max_sectors(block->request_queue, max);
+	blk_queue_max_hw_sectors(block->request_queue, max);
 	blk_queue_max_phys_segments(block->request_queue, -1L);
 	blk_queue_max_hw_segments(block->request_queue, -1L);
 	/* with page sized segments we can translate each segement into
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 8d3d720737da..509ed056fddd 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -222,7 +222,7 @@ tapeblock_setup_device(struct tape_device * device)
 		goto cleanup_queue;
 
 	blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
-	blk_queue_max_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC);
+	blk_queue_max_hw_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC);
 	blk_queue_max_phys_segments(blkdat->request_queue, -1L);
 	blk_queue_max_hw_segments(blkdat->request_queue, -1L);
 	blk_queue_max_segment_size(blkdat->request_queue, -1L);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9e52d16c7c39..032f0d0e6cb4 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3674,7 +3674,7 @@ static int ipr_slave_configure(struct scsi_device *sdev)
 		if (ipr_is_vset_device(res)) {
 			blk_queue_rq_timeout(sdev->request_queue,
 					     IPR_VSET_RW_TIMEOUT);
-			blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS);
+			blk_queue_max_hw_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS);
 		}
 		if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res))
 			sdev->allow_restart = 1;
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index b6f1ef954af1..9b1c1433c26b 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -235,7 +235,7 @@ static int pmcraid_slave_configure(struct scsi_device *scsi_dev)
 		scsi_dev->allow_restart = 1;
 		blk_queue_rq_timeout(scsi_dev->request_queue,
 				     PMCRAID_VSET_IO_TIMEOUT);
-		blk_queue_max_sectors(scsi_dev->request_queue,
+		blk_queue_max_hw_sectors(scsi_dev->request_queue,
 				      PMCRAID_VSET_MAX_SECTORS);
 	}
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c6642423cc67..ac3cca74bdfb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1627,7 +1627,7 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 	blk_queue_max_hw_segments(q, shost->sg_tablesize);
 	blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
 
-	blk_queue_max_sectors(q, shost->max_sectors);
+	blk_queue_max_hw_sectors(q, shost->max_sectors);
 	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
 	blk_queue_segment_boundary(q, shost->dma_boundary);
 	dma_set_seg_boundary(dev, shost->dma_boundary);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 012f73a96880..5d9b5130d8c8 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -879,7 +879,7 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
 	 * broken RA4x00 Compaq Disk Array
 	 */
 	if (*bflags & BLIST_MAX_512)
-		blk_queue_max_sectors(sdev->request_queue, 512);
+		blk_queue_max_hw_sectors(sdev->request_queue, 512);
 
 	/*
 	 * Some devices may not want to have a start command automatically
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index e5e6df39e737..aadc16b5eed7 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -134,14 +134,14 @@ static int slave_configure(struct scsi_device *sdev)
 		if (us->fflags & US_FL_MAX_SECTORS_MIN)
 			max_sectors = PAGE_CACHE_SIZE >> 9;
 		if (queue_max_sectors(sdev->request_queue) > max_sectors)
-			blk_queue_max_sectors(sdev->request_queue,
+			blk_queue_max_hw_sectors(sdev->request_queue,
 					      max_sectors);
 	} else if (sdev->type == TYPE_TAPE) {
 		/* Tapes need much higher max_sector limits, so just
 		 * raise it to the maximum possible (4 GB / 512) and
 		 * let the queue segment size sort out the real limit.
 		 */
-		blk_queue_max_sectors(sdev->request_queue, 0x7FFFFF);
+		blk_queue_max_hw_sectors(sdev->request_queue, 0x7FFFFF);
 	}
 
 	/* Some USB host controllers can't do DMA; they have to use PIO.
@@ -495,7 +495,7 @@ static ssize_t store_max_sectors(struct device *dev, struct device_attribute *at
 	unsigned short ms;
 
 	if (sscanf(buf, "%hu", &ms) > 0 && ms <= SCSI_DEFAULT_MAX_SECTORS) {
-		blk_queue_max_sectors(sdev->request_queue, ms);
+		blk_queue_max_hw_sectors(sdev->request_queue, ms);
 		return strlen(buf);
 	}
 	return -EINVAL;	
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5d378627f446..57bc48446e92 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -921,7 +921,14 @@ extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-extern void blk_queue_max_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
+
+/* Temporary compatibility wrapper */
+static inline void blk_queue_max_sectors(struct request_queue *q, unsigned int max)
+{
+	blk_queue_max_hw_sectors(q, max);
+}
+
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
-- 
cgit v1.2.3


From 8a78362c4eefc1deddbefe2c7f38aabbc2429d6b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Feb 2010 00:20:39 -0500
Subject: block: Consolidate phys_segment and hw_segment limits

Except for SCSI no device drivers distinguish between physical and
hardware segment limits.  Consolidate the two into a single segment
limit.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/um/drivers/ubd_kern.c          |  2 +-
 block/blk-core.c                    |  3 +-
 block/blk-merge.c                   |  8 ++---
 block/blk-settings.c                | 60 ++++++++-----------------------------
 drivers/ata/sata_nv.c               |  2 +-
 drivers/block/DAC960.c              |  2 +-
 drivers/block/cciss.c               |  5 +---
 drivers/block/cpqarray.c            |  5 +---
 drivers/block/drbd/drbd_nl.c        |  3 +-
 drivers/block/paride/pf.c           |  3 +-
 drivers/block/pktcdvd.c             |  4 +--
 drivers/block/ps3disk.c             |  3 +-
 drivers/block/ps3vram.c             |  3 +-
 drivers/block/sunvdc.c              |  3 +-
 drivers/block/sx8.c                 |  3 +-
 drivers/block/ub.c                  |  3 +-
 drivers/block/viodasd.c             |  3 +-
 drivers/block/xen-blkfront.c        |  3 +-
 drivers/cdrom/gdrom.c               |  2 +-
 drivers/cdrom/viocd.c               |  3 +-
 drivers/ide/ide-probe.c             |  3 +-
 drivers/md/raid5.c                  |  2 +-
 drivers/memstick/core/mspro_block.c |  3 +-
 drivers/message/i2o/i2o_block.c     |  3 +-
 drivers/mmc/card/queue.c            |  6 ++--
 drivers/s390/block/dasd.c           |  3 +-
 drivers/s390/char/tape_block.c      |  3 +-
 drivers/scsi/ibmvscsi/ibmvfc.c      |  4 +--
 drivers/scsi/scsi_lib.c             |  4 +--
 drivers/scsi/sg.c                   |  6 ++--
 drivers/scsi/st.c                   |  3 +-
 drivers/staging/hv/blkvsc_drv.c     |  5 +---
 fs/bio.c                            |  9 ++----
 include/linux/blkdev.h              | 27 ++++++++++-------
 include/linux/i2o.h                 |  2 +-
 35 files changed, 70 insertions(+), 136 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index c2051b0737cb..c1ff6903b622 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -849,7 +849,7 @@ static int ubd_add(int n, char **error_out)
 	}
 	ubd_dev->queue->queuedata = ubd_dev;
 
-	blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
+	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 	if(err){
 		*error_out = "Failed to register device";
diff --git a/block/blk-core.c b/block/blk-core.c
index 36c0deebc2dc..9fe174dc74d1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1614,8 +1614,7 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 	 * limitation.
 	 */
 	blk_recalc_rq_segments(rq);
-	if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
-	    rq->nr_phys_segments > queue_max_hw_segments(q)) {
+	if (rq->nr_phys_segments > queue_max_segments(q)) {
 		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
 		return -EIO;
 	}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 99cb5cf1f447..5e7dc9973458 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -206,8 +206,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 {
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
-	if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) ||
-	    req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) {
+	if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -300,10 +299,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 		total_phys_segments--;
 	}
 
-	if (total_phys_segments > queue_max_phys_segments(q))
-		return 0;
-
-	if (total_phys_segments > queue_max_hw_segments(q))
+	if (total_phys_segments > queue_max_segments(q))
 		return 0;
 
 	/* Merge is OK... */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 61afae9dbc6d..31e7a9375c13 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -91,8 +91,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
  */
 void blk_set_default_limits(struct queue_limits *lim)
 {
-	lim->max_phys_segments = MAX_PHYS_SEGMENTS;
-	lim->max_hw_segments = MAX_HW_SEGMENTS;
+	lim->max_segments = BLK_MAX_SEGMENTS;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = BLK_DEF_MAX_SECTORS;
@@ -252,17 +251,15 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
 
 /**
- * blk_queue_max_phys_segments - set max phys segments for a request for this queue
+ * blk_queue_max_segments - set max hw segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the number of
- *    physical data segments in a request.  This would be the largest sized
- *    scatter list the driver could handle.
+ *    hw data segments in a request.
  **/
-void blk_queue_max_phys_segments(struct request_queue *q,
-				 unsigned short max_segments)
+void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
 {
 	if (!max_segments) {
 		max_segments = 1;
@@ -270,33 +267,9 @@ void blk_queue_max_phys_segments(struct request_queue *q,
 		       __func__, max_segments);
 	}
 
-	q->limits.max_phys_segments = max_segments;
+	q->limits.max_segments = max_segments;
 }
-EXPORT_SYMBOL(blk_queue_max_phys_segments);
-
-/**
- * blk_queue_max_hw_segments - set max hw segments for a request for this queue
- * @q:  the request queue for the device
- * @max_segments:  max number of segments
- *
- * Description:
- *    Enables a low level driver to set an upper limit on the number of
- *    hw data segments in a request.  This would be the largest number of
- *    address/length pairs the host adapter can actually give at once
- *    to the device.
- **/
-void blk_queue_max_hw_segments(struct request_queue *q,
-			       unsigned short max_segments)
-{
-	if (!max_segments) {
-		max_segments = 1;
-		printk(KERN_INFO "%s: set to minimum %d\n",
-		       __func__, max_segments);
-	}
-
-	q->limits.max_hw_segments = max_segments;
-}
-EXPORT_SYMBOL(blk_queue_max_hw_segments);
+EXPORT_SYMBOL(blk_queue_max_segments);
 
 /**
  * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
@@ -531,11 +504,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
 					    b->seg_boundary_mask);
 
-	t->max_phys_segments = min_not_zero(t->max_phys_segments,
-					    b->max_phys_segments);
-
-	t->max_hw_segments = min_not_zero(t->max_hw_segments,
-					  b->max_hw_segments);
+	t->max_segments = min_not_zero(t->max_segments, b->max_segments);
 
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
@@ -739,22 +708,19 @@ EXPORT_SYMBOL(blk_queue_update_dma_pad);
  * does is adjust the queue so that the buf is always appended
  * silently to the scatterlist.
  *
- * Note: This routine adjusts max_hw_segments to make room for
- * appending the drain buffer.  If you call
- * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after
- * calling this routine, you must set the limit to one fewer than your
- * device can support otherwise there won't be room for the drain
- * buffer.
+ * Note: This routine adjusts max_hw_segments to make room for appending
+ * the drain buffer.  If you call blk_queue_max_segments() after calling
+ * this routine, you must set the limit to one fewer than your device
+ * can support otherwise there won't be room for the drain buffer.
  */
 int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size)
 {
-	if (queue_max_hw_segments(q) < 2 || queue_max_phys_segments(q) < 2)
+	if (queue_max_segments(q) < 2)
 		return -EINVAL;
 	/* make room for appending the drain */
-	blk_queue_max_hw_segments(q, queue_max_hw_segments(q) - 1);
-	blk_queue_max_phys_segments(q, queue_max_phys_segments(q) - 1);
+	blk_queue_max_segments(q, queue_max_segments(q) - 1);
 	q->dma_drain_needed = dma_drain_needed;
 	q->dma_drain_buffer = buf;
 	q->dma_drain_size = size;
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 0c82d335c55d..684fe04dbbb7 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -772,7 +772,7 @@ static int nv_adma_slave_config(struct scsi_device *sdev)
 	}
 
 	blk_queue_segment_boundary(sdev->request_queue, segment_boundary);
-	blk_queue_max_hw_segments(sdev->request_queue, sg_tablesize);
+	blk_queue_max_segments(sdev->request_queue, sg_tablesize);
 	ata_port_printk(ap, KERN_INFO,
 		"DMA mask 0x%llX, segment boundary 0x%lX, hw segs %hu\n",
 		(unsigned long long)*ap->host->dev->dma_mask,
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 1c0cd35e1913..459f1bc25a7b 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2534,7 +2534,7 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
   	blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
   	RequestQueue->queuedata = Controller;
   	blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit);
-	blk_queue_max_phys_segments(RequestQueue, Controller->DriverScatterGatherLimit);
+	blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
 	blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
 	disk->queue = RequestQueue;
 	sprintf(disk->disk_name, "rd/c%dd%d", Controller->ControllerNumber, n);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 030e52d72254..a29e69418a03 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1797,10 +1797,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 	blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
 
 	/* This is a hardware imposed limit. */
-	blk_queue_max_hw_segments(disk->queue, h->maxsgentries);
-
-	/* This is a limit in the driver and could be eliminated. */
-	blk_queue_max_phys_segments(disk->queue, h->maxsgentries);
+	blk_queue_max_segments(disk->queue, h->maxsgentries);
 
 	blk_queue_max_hw_sectors(disk->queue, h->cciss_max_sectors);
 
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 6422651ec364..91d11631cec9 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -448,11 +448,8 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
 		blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask);
 
 	/* This is a hardware imposed limit. */
-	blk_queue_max_hw_segments(q, SG_MAX);
+	blk_queue_max_segments(q, SG_MAX);
 
-	/* This is a driver limit and could be eliminated. */
-	blk_queue_max_phys_segments(q, SG_MAX);
-	
 	init_timer(&hba[i]->timer);
 	hba[i]->timer.expires = jiffies + IDA_TIMER;
 	hba[i]->timer.data = (unsigned long)hba[i];
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9b55e64196fc..4df3b40b1057 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -710,8 +710,7 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
 	max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
 
 	blk_queue_max_hw_sectors(q, max_seg_s >> 9);
-	blk_queue_max_phys_segments(q, max_segments ? max_segments : MAX_PHYS_SEGMENTS);
-	blk_queue_max_hw_segments(q, max_segments ? max_segments : MAX_HW_SEGMENTS);
+	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
 	blk_queue_max_segment_size(q, max_seg_s);
 	blk_queue_logical_block_size(q, 512);
 	blk_queue_segment_boundary(q, PAGE_SIZE-1);
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index ea54ea393553..ddb4f9abd480 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -956,8 +956,7 @@ static int __init pf_init(void)
 		return -ENOMEM;
 	}
 
-	blk_queue_max_phys_segments(pf_queue, cluster);
-	blk_queue_max_hw_segments(pf_queue, cluster);
+	blk_queue_max_segments(pf_queue, cluster);
 
 	for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
 		struct gendisk *disk = pf->disk;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 6e1daa24da2f..b72935b8f203 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -950,14 +950,14 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
 static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
 {
 	if ((pd->settings.size << 9) / CD_FRAMESIZE
-	    <= queue_max_phys_segments(q)) {
+	    <= queue_max_segments(q)) {
 		/*
 		 * The cdrom device can handle one segment/frame
 		 */
 		clear_bit(PACKET_MERGE_SEGS, &pd->flags);
 		return 0;
 	} else if ((pd->settings.size << 9) / PAGE_SIZE
-		   <= queue_max_phys_segments(q)) {
+		   <= queue_max_segments(q)) {
 		/*
 		 * We can handle this case at the expense of some extra memory
 		 * copies during write operations
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 9cd1a4a542b8..bc95469d33c1 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -482,8 +482,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
 	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH,
 			  ps3disk_prepare_flush);
 
-	blk_queue_max_phys_segments(queue, -1);
-	blk_queue_max_hw_segments(queue, -1);
+	blk_queue_max_segments(queue, -1);
 	blk_queue_max_segment_size(queue, dev->bounce_size);
 
 	gendisk = alloc_disk(PS3DISK_MINORS);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 6416b262934b..83ebb390b164 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -751,8 +751,7 @@ static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
 	priv->queue = queue;
 	queue->queuedata = dev;
 	blk_queue_make_request(queue, ps3vram_make_request);
-	blk_queue_max_phys_segments(queue, MAX_PHYS_SEGMENTS);
-	blk_queue_max_hw_segments(queue, MAX_HW_SEGMENTS);
+	blk_queue_max_segments(queue, BLK_MAX_HW_SEGMENTS);
 	blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
 	blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index dd30cddd0f7f..48e8fee9f2d4 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -691,8 +691,7 @@ static int probe_disk(struct vdc_port *port)
 
 	port->disk = g;
 
-	blk_queue_max_hw_segments(q, port->ring_cookies);
-	blk_queue_max_phys_segments(q, port->ring_cookies);
+	blk_queue_max_segments(q, port->ring_cookies);
 	blk_queue_max_hw_sectors(q, port->max_xfer_size);
 	g->major = vdc_major;
 	g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 7bd7b2f83116..b70f0fca9a42 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1518,8 +1518,7 @@ static int carm_init_disks(struct carm_host *host)
 			break;
 		}
 		disk->queue = q;
-		blk_queue_max_hw_segments(q, CARM_MAX_REQ_SG);
-		blk_queue_max_phys_segments(q, CARM_MAX_REQ_SG);
+		blk_queue_max_segments(q, CARM_MAX_REQ_SG);
 		blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
 
 		q->queuedata = port;
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 352ea24d66e8..2e889838e819 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -2320,8 +2320,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
 	disk->queue = q;
 
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
-	blk_queue_max_hw_segments(q, UB_MAX_REQ_SG);
-	blk_queue_max_phys_segments(q, UB_MAX_REQ_SG);
+	blk_queue_max_segments(q, UB_MAX_REQ_SG);
 	blk_queue_segment_boundary(q, 0xffffffff);	/* Dubious. */
 	blk_queue_max_hw_sectors(q, UB_MAX_SECTORS);
 	blk_queue_logical_block_size(q, lun->capacity.bsize);
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index d44ece7d7b7c..c12b31362ac6 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -471,8 +471,7 @@ retry:
 	}
 
 	d->disk = g;
-	blk_queue_max_hw_segments(q, VIOMAXBLOCKDMA);
-	blk_queue_max_phys_segments(q, VIOMAXBLOCKDMA);
+	blk_queue_max_segments(q, VIOMAXBLOCKDMA);
 	blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS);
 	g->major = VIODASD_MAJOR;
 	g->first_minor = dev_no << PARTITION_SHIFT;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f9861aaa1fef..9c09694b2520 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -353,8 +353,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 	blk_queue_max_segment_size(rq, PAGE_SIZE);
 
 	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index e789e6c9a422..03c71f7698cb 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -741,7 +741,7 @@ static int __devinit probe_gdrom_setupqueue(void)
 {
 	blk_queue_logical_block_size(gd.gdrom_rq, GDROM_HARD_SECTOR);
 	/* using DMA so memory will need to be contiguous */
-	blk_queue_max_hw_segments(gd.gdrom_rq, 1);
+	blk_queue_max_segments(gd.gdrom_rq, 1);
 	/* set a large max size to get most from DMA */
 	blk_queue_max_segment_size(gd.gdrom_rq, 0x40000);
 	gd.disk->queue = gd.gdrom_rq;
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index b1dfd23eb832..cc435be0bc13 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -616,8 +616,7 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	gendisk->first_minor = deviceno;
 	strncpy(gendisk->disk_name, c->name,
 			sizeof(gendisk->disk_name));
-	blk_queue_max_hw_segments(q, 1);
-	blk_queue_max_phys_segments(q, 1);
+	blk_queue_max_segments(q, 1);
 	blk_queue_max_hw_sectors(q, 4096 / 512);
 	gendisk->queue = q;
 	gendisk->fops = &viocd_fops;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 1ec8b31277bd..f8c1ae6ad74c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -790,8 +790,7 @@ static int ide_init_queue(ide_drive_t *drive)
 		max_sg_entries >>= 1;
 #endif /* CONFIG_PCI */
 
-	blk_queue_max_hw_segments(q, max_sg_entries);
-	blk_queue_max_phys_segments(q, max_sg_entries);
+	blk_queue_max_segments(q, max_sg_entries);
 
 	/* assign drive queue */
 	drive->queue = q;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ceb24afdc147..509c8f3dd9a5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3739,7 +3739,7 @@ static int bio_fits_rdev(struct bio *bi)
 	if ((bi->bi_size>>9) > queue_max_sectors(q))
 		return 0;
 	blk_recount_segments(q, bi);
-	if (bi->bi_phys_segments > queue_max_phys_segments(q))
+	if (bi->bi_phys_segments > queue_max_segments(q))
 		return 0;
 
 	if (q->merge_bvec_fn)
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 44d4178c4c15..972b87069d55 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1227,8 +1227,7 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 
 	blk_queue_bounce_limit(msb->queue, limit);
 	blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
-	blk_queue_max_phys_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
-	blk_queue_max_hw_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
+	blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
 	blk_queue_max_segment_size(msb->queue,
 				   MSPRO_BLOCK_MAX_PAGES * msb->page_size);
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index d033cfdb516f..2658b1484a2c 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -1065,9 +1065,8 @@ static int i2o_block_probe(struct device *dev)
 	queue = gd->queue;
 	queue->queuedata = i2o_blk_dev;
 
-	blk_queue_max_phys_segments(queue, I2O_MAX_PHYS_SEGMENTS);
 	blk_queue_max_hw_sectors(queue, max_sectors);
-	blk_queue_max_hw_segments(queue, i2o_sg_tablesize(c, body_size));
+	blk_queue_max_segments(queue, i2o_sg_tablesize(c, body_size));
 
 	osm_debug("max sectors = %d\n", queue->max_sectors);
 	osm_debug("phys segments = %d\n", queue->max_phys_segments);
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 09b633d5657b..381fe032caa1 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -155,8 +155,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 		if (mq->bounce_buf) {
 			blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
 			blk_queue_max_hw_sectors(mq->queue, bouncesz / 512);
-			blk_queue_max_phys_segments(mq->queue, bouncesz / 512);
-			blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
+			blk_queue_max_segments(mq->queue, bouncesz / 512);
 			blk_queue_max_segment_size(mq->queue, bouncesz);
 
 			mq->sg = kmalloc(sizeof(struct scatterlist),
@@ -182,8 +181,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 		blk_queue_bounce_limit(mq->queue, limit);
 		blk_queue_max_hw_sectors(mq->queue,
 			min(host->max_blk_count, host->max_req_size / 512));
-		blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
-		blk_queue_max_hw_segments(mq->queue, host->max_hw_segs);
+		blk_queue_max_segments(mq->queue, host->max_hw_segs);
 		blk_queue_max_segment_size(mq->queue, host->max_seg_size);
 
 		mq->sg = kmalloc(sizeof(struct scatterlist) *
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 14b1e25b9dcf..8831e9308d05 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2130,8 +2130,7 @@ static void dasd_setup_queue(struct dasd_block *block)
 	blk_queue_logical_block_size(block->request_queue, block->bp_block);
 	max = block->base->discipline->max_blocks << block->s2b_shift;
 	blk_queue_max_hw_sectors(block->request_queue, max);
-	blk_queue_max_phys_segments(block->request_queue, -1L);
-	blk_queue_max_hw_segments(block->request_queue, -1L);
+	blk_queue_max_segments(block->request_queue, -1L);
 	/* with page sized segments we can translate each segement into
 	 * one idaw/tidaw
 	 */
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 509ed056fddd..097da8ce6be6 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -223,8 +223,7 @@ tapeblock_setup_device(struct tape_device * device)
 
 	blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE);
 	blk_queue_max_hw_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC);
-	blk_queue_max_phys_segments(blkdat->request_queue, -1L);
-	blk_queue_max_hw_segments(blkdat->request_queue, -1L);
+	blk_queue_max_segments(blkdat->request_queue, -1L);
 	blk_queue_max_segment_size(blkdat->request_queue, -1L);
 	blk_queue_segment_boundary(blkdat->request_queue, -1L);
 
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 87b536a97cb4..732f6d35b4a8 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -4195,7 +4195,7 @@ static void ibmvfc_tgt_add_rport(struct ibmvfc_target *tgt)
 		if (tgt->service_parms.class3_parms[0] & 0x80000000)
 			rport->supported_classes |= FC_COS_CLASS3;
 		if (rport->rqst_q)
-			blk_queue_max_hw_segments(rport->rqst_q, 1);
+			blk_queue_max_segments(rport->rqst_q, 1);
 	} else
 		tgt_dbg(tgt, "rport add failed\n");
 	spin_unlock_irqrestore(vhost->host->host_lock, flags);
@@ -4669,7 +4669,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	}
 
 	if (shost_to_fc_host(shost)->rqst_q)
-		blk_queue_max_hw_segments(shost_to_fc_host(shost)->rqst_q, 1);
+		blk_queue_max_segments(shost_to_fc_host(shost)->rqst_q, 1);
 	dev_set_drvdata(dev, vhost);
 	spin_lock(&ibmvfc_driver_lock);
 	list_add_tail(&vhost->queue, &ibmvfc_head);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ac3cca74bdfb..f8fbf47377ae 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1624,8 +1624,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 	/*
 	 * this limit is imposed by hardware restrictions
 	 */
-	blk_queue_max_hw_segments(q, shost->sg_tablesize);
-	blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+	blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize,
+					SCSI_MAX_SG_CHAIN_SEGMENTS));
 
 	blk_queue_max_hw_sectors(q, shost->max_sectors);
 	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 040f751809ea..c996d98636f3 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -287,8 +287,7 @@ sg_open(struct inode *inode, struct file *filp)
 	if (list_empty(&sdp->sfds)) {	/* no existing opens on this device */
 		sdp->sgdebug = 0;
 		q = sdp->device->request_queue;
-		sdp->sg_tablesize = min(queue_max_hw_segments(q),
-					queue_max_phys_segments(q));
+		sdp->sg_tablesize = queue_max_segments(q);
 	}
 	if ((sfp = sg_add_sfp(sdp, dev)))
 		filp->private_data = sfp;
@@ -1376,8 +1375,7 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp)
 	sdp->device = scsidp;
 	INIT_LIST_HEAD(&sdp->sfds);
 	init_waitqueue_head(&sdp->o_excl_wait);
-	sdp->sg_tablesize = min(queue_max_hw_segments(q),
-				queue_max_phys_segments(q));
+	sdp->sg_tablesize = queue_max_segments(q);
 	sdp->index = k;
 	kref_init(&sdp->d_ref);
 
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index d04ea9a6f673..f67d1a159aad 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3983,8 +3983,7 @@ static int st_probe(struct device *dev)
 		return -ENODEV;
 	}
 
-	i = min(queue_max_hw_segments(SDp->request_queue),
-		queue_max_phys_segments(SDp->request_queue));
+	i = queue_max_segments(SDp->request_queue);
 	if (st_max_sg_segs < i)
 		i = st_max_sg_segs;
 	buffer = new_tape_buffer((SDp->host)->unchecked_isa_dma, i);
diff --git a/drivers/staging/hv/blkvsc_drv.c b/drivers/staging/hv/blkvsc_drv.c
index 62b282844a53..45d908114d11 100644
--- a/drivers/staging/hv/blkvsc_drv.c
+++ b/drivers/staging/hv/blkvsc_drv.c
@@ -363,10 +363,7 @@ static int blkvsc_probe(struct device *device)
 	blkdev->gd->queue = blk_init_queue(blkvsc_request, &blkdev->lock);
 
 	blk_queue_max_segment_size(blkdev->gd->queue, PAGE_SIZE);
-	blk_queue_max_phys_segments(blkdev->gd->queue,
-				    MAX_MULTIPAGE_BUFFER_COUNT);
-	blk_queue_max_hw_segments(blkdev->gd->queue,
-				  MAX_MULTIPAGE_BUFFER_COUNT);
+	blk_queue_max_segments(blkdev->gd->queue, MAX_MULTIPAGE_BUFFER_COUNT);
 	blk_queue_segment_boundary(blkdev->gd->queue, PAGE_SIZE-1);
 	blk_queue_bounce_limit(blkdev->gd->queue, BLK_BOUNCE_ANY);
 	blk_queue_dma_alignment(blkdev->gd->queue, 511);
diff --git a/fs/bio.c b/fs/bio.c
index 88094afc29ea..dc17afd672e3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -507,10 +507,8 @@ int bio_get_nr_vecs(struct block_device *bdev)
 	int nr_pages;
 
 	nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (nr_pages > queue_max_phys_segments(q))
-		nr_pages = queue_max_phys_segments(q);
-	if (nr_pages > queue_max_hw_segments(q))
-		nr_pages = queue_max_hw_segments(q);
+	if (nr_pages > queue_max_segments(q))
+		nr_pages = queue_max_segments(q);
 
 	return nr_pages;
 }
@@ -575,8 +573,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 	 * make this too complex.
 	 */
 
-	while (bio->bi_phys_segments >= queue_max_phys_segments(q)
-	       || bio->bi_phys_segments >= queue_max_hw_segments(q)) {
+	while (bio->bi_phys_segments >= queue_max_segments(q)) {
 
 		if (retried_segments)
 			return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 57bc48446e92..ebd22dbed861 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -316,8 +316,7 @@ struct queue_limits {
 	unsigned int		discard_alignment;
 
 	unsigned short		logical_block_size;
-	unsigned short		max_hw_segments;
-	unsigned short		max_phys_segments;
+	unsigned short		max_segments;
 
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
@@ -929,8 +928,19 @@ static inline void blk_queue_max_sectors(struct request_queue *q, unsigned int m
 	blk_queue_max_hw_sectors(q, max);
 }
 
-extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
-extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
+extern void blk_queue_max_segments(struct request_queue *, unsigned short);
+
+static inline void blk_queue_max_phys_segments(struct request_queue *q, unsigned short max)
+{
+	blk_queue_max_segments(q, max);
+}
+
+static inline void blk_queue_max_hw_segments(struct request_queue *q, unsigned short max)
+{
+	blk_queue_max_segments(q, max);
+}
+
+
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
@@ -1055,14 +1065,9 @@ static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
 	return q->limits.max_hw_sectors;
 }
 
-static inline unsigned short queue_max_hw_segments(struct request_queue *q)
-{
-	return q->limits.max_hw_segments;
-}
-
-static inline unsigned short queue_max_phys_segments(struct request_queue *q)
+static inline unsigned short queue_max_segments(struct request_queue *q)
 {
-	return q->limits.max_phys_segments;
+	return q->limits.max_segments;
 }
 
 static inline unsigned int queue_max_segment_size(struct request_queue *q)
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 4c4e57d1f19d..87018dc5527d 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -385,7 +385,7 @@
 /* defines for max_sectors and max_phys_segments */
 #define I2O_MAX_SECTORS			1024
 #define I2O_MAX_SECTORS_LIMITED		128
-#define I2O_MAX_PHYS_SEGMENTS		MAX_PHYS_SEGMENTS
+#define I2O_MAX_PHYS_SEGMENTS		BLK_MAX_SEGMENTS
 
 /*
  *	Message structures
-- 
cgit v1.2.3


From 58c24a61614f5da290068e47fc5ec65370eb61dd Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Fri, 26 Feb 2010 14:00:43 +0100
Subject: block: remove padding from io_context on 64bit builds

On 64 bit builds when CONFIG_BLK_CGROUP=n (the default) this removes 8
bytes of padding from structure io_context and drops its size from 72 to
64 bytes, so needing one fewer cachelines and allowing more objects per
slab in it's kmem_cache.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>

----
patch against 2.6.33
compiled & test on x86_64 AMDX2
regards
Richard
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/iocontext.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 78ef023227d4..1195a806fe0c 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -49,8 +49,8 @@ struct io_context {
 	/*
 	 * For request batching
 	 */
-	unsigned long last_waited; /* Time last woken after wait for request */
 	int nr_batch_requests;     /* Number of requests left in the batch */
+	unsigned long last_waited; /* Time last woken after wait for request */
 
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
-- 
cgit v1.2.3


From 738b0343e73604750feb107e063c28b3ca36cb84 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 26 Feb 2010 05:12:02 -0800
Subject: Revert "ethtool: Add n-tuple string length to drvinfo and return it"

This reverts commit c79c5ffdce14abb4de3878c5aa8c3c6e5093c69b.

As Jeff points out we can't break the user visible interface
like this, we need to add this into the reserved[] thing.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 1 -
 net/core/ethtool.c      | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f7992a256b71..cca1c3de140d 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -63,7 +63,6 @@ struct ethtool_drvinfo {
 	char	reserved2[12];
 	__u32	n_priv_flags;	/* number of flags valid in ETHTOOL_GPFLAGS */
 	__u32	n_stats;	/* number of u64's from ETHTOOL_GSTATS */
-	__u32	n_ntuples;	/* number of n-tuple filters from GSTRINGS */
 	__u32	testinfo_len;
 	__u32	eedump_len;	/* Size of data from ETHTOOL_GEEPROM (bytes) */
 	__u32	regdump_len;	/* Size of data from ETHTOOL_GREGS (bytes) */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1c94f48e27b5..31b1eddc1b84 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -224,9 +224,6 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 		rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
 		if (rc >= 0)
 			info.n_priv_flags = rc;
-		rc = ops->get_sset_count(dev, ETH_SS_NTUPLE_FILTERS);
-		if (rc >= 0)
-			info.n_ntuples = rc;
 	}
 	if (ops->get_regs_len)
 		info.regdump_len = ops->get_regs_len(dev);
-- 
cgit v1.2.3


From 773c3e75d1fc7ea5058bfeab5d82bac5b85f8cd8 Mon Sep 17 00:00:00 2001
From: Sriramakrishnan <srk@ti.com>
Date: Fri, 26 Feb 2010 05:22:03 -0800
Subject: can: ti hecc module : add platform specific initialization callback.

CAN module on AM3517 requires programming of IO expander as part
of init sequence - to enable CAN PHY. Added platform specific
callback to handle phy control(switch on /off).

Signed-off-by: Sriramakrishnan <srk@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/ti_hecc.c            | 17 ++++++++++++++++-
 include/linux/can/platform/ti_hecc.h |  8 ++++++--
 2 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index b861cd561074..0c3d2ba0d178 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -28,9 +28,11 @@
  *         .mbx_offset             = 0x2000,
  *         .int_line               = 0,
  *         .revision               = 1,
+ *         .transceiver_switch     = hecc_phy_control,
  * };
  *
- * Please see include/can/platform/ti_hecc.h for description of above fields
+ * Please see include/linux/can/platform/ti_hecc.h for description of
+ * above fields.
  *
  */
 
@@ -220,6 +222,7 @@ struct ti_hecc_priv {
 	u32 tx_head;
 	u32 tx_tail;
 	u32 rx_next;
+	void (*transceiver_switch)(int);
 };
 
 static inline int get_tx_head_mb(struct ti_hecc_priv *priv)
@@ -317,6 +320,13 @@ static int ti_hecc_set_btc(struct ti_hecc_priv *priv)
 	return 0;
 }
 
+static void ti_hecc_transceiver_switch(const struct ti_hecc_priv *priv,
+					int on)
+{
+	if (priv->transceiver_switch)
+		priv->transceiver_switch(on);
+}
+
 static void ti_hecc_reset(struct net_device *ndev)
 {
 	u32 cnt;
@@ -818,10 +828,13 @@ static int ti_hecc_open(struct net_device *ndev)
 		return err;
 	}
 
+	ti_hecc_transceiver_switch(priv, 1);
+
 	/* Open common can device */
 	err = open_candev(ndev);
 	if (err) {
 		dev_err(ndev->dev.parent, "open_candev() failed %d\n", err);
+		ti_hecc_transceiver_switch(priv, 0);
 		free_irq(ndev->irq, ndev);
 		return err;
 	}
@@ -842,6 +855,7 @@ static int ti_hecc_close(struct net_device *ndev)
 	ti_hecc_stop(ndev);
 	free_irq(ndev->irq, ndev);
 	close_candev(ndev);
+	ti_hecc_transceiver_switch(priv, 0);
 
 	return 0;
 }
@@ -903,6 +917,7 @@ static int ti_hecc_probe(struct platform_device *pdev)
 	priv->hecc_ram_offset = pdata->hecc_ram_offset;
 	priv->mbx_offset = pdata->mbx_offset;
 	priv->int_line = pdata->int_line;
+	priv->transceiver_switch = pdata->transceiver_switch;
 
 	priv->can.bittiming_const = &ti_hecc_bittiming_const;
 	priv->can.do_set_mode = ti_hecc_do_set_mode;
diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h
index 4688c7bb1bd1..af17cb3f7a84 100644
--- a/include/linux/can/platform/ti_hecc.h
+++ b/include/linux/can/platform/ti_hecc.h
@@ -1,3 +1,6 @@
+#ifndef __CAN_PLATFORM_TI_HECC_H__
+#define __CAN_PLATFORM_TI_HECC_H__
+
 /*
  * TI HECC (High End CAN Controller) driver platform header
  *
@@ -23,6 +26,7 @@
  * @mbx_offset:		Mailbox RAM offset
  * @int_line:		Interrupt line to use - 0 or 1
  * @version:		version for future use
+ * @transceiver_switch:	platform specific callback fn for transceiver control
  *
  * Platform data structure to get all platform specific settings.
  * this structure also accounts the fact that the IP may have different
@@ -35,6 +39,6 @@ struct ti_hecc_platform_data {
 	u32 mbx_offset;
 	u32 int_line;
 	u32 version;
+	void (*transceiver_switch) (int);
 };
-
-
+#endif
-- 
cgit v1.2.3


From 4c13dd3b48fcb6fbe44f241eb11a057ecd1cba75 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 26 Feb 2010 09:36:12 +0300
Subject: failslab: add ability to filter slab caches

This patch allow to inject faults only for specific slabs.
In order to preserve default behavior cache filter is off by
default (all caches are faulty).

One may define specific set of slabs like this:
# mark skbuff_head_cache as faulty
echo 1 > /sys/kernel/slab/skbuff_head_cache/failslab
# Turn on cache filter (off by default)
echo 1 > /sys/kernel/debug/failslab/cache-filter
# Turn on fault injection
echo 1 > /sys/kernel/debug/failslab/times
echo 1 > /sys/kernel/debug/failslab/probability

Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 Documentation/vm/slub.txt    |  1 +
 include/linux/fault-inject.h |  5 +++--
 include/linux/slab.h         |  5 +++++
 mm/failslab.c                | 18 +++++++++++++++---
 mm/slab.c                    |  2 +-
 mm/slub.c                    | 29 +++++++++++++++++++++++++++--
 6 files changed, 52 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
index b37300edf27c..07375e73981a 100644
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt
@@ -41,6 +41,7 @@ Possible debug options are
 	P		Poisoning (object and padding)
 	U		User tracking (free and alloc)
 	T		Trace (please only use on single slabs)
+	A		Toggle failslab filter mark for the cache
 	O		Switch debugging off for caches that would have
 			caused higher minimum slab orders
 	-		Switch all debugging off (useful if the kernel is
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 06ca9b21dad2..7b64ad40e4ce 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -82,9 +82,10 @@ static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
 #endif /* CONFIG_FAULT_INJECTION */
 
 #ifdef CONFIG_FAILSLAB
-extern bool should_failslab(size_t size, gfp_t gfpflags);
+extern bool should_failslab(size_t size, gfp_t gfpflags, unsigned long flags);
 #else
-static inline bool should_failslab(size_t size, gfp_t gfpflags)
+static inline bool should_failslab(size_t size, gfp_t gfpflags,
+				unsigned long flags)
 {
 	return false;
 }
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2da8372519f5..488446289cab 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -70,6 +70,11 @@
 #else
 # define SLAB_NOTRACK		0x00000000UL
 #endif
+#ifdef CONFIG_FAILSLAB
+# define SLAB_FAILSLAB		0x02000000UL	/* Fault injection mark */
+#else
+# define SLAB_FAILSLAB		0x00000000UL
+#endif
 
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
diff --git a/mm/failslab.c b/mm/failslab.c
index 9339de5f0a91..bb41f98dd8b7 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -1,18 +1,22 @@
 #include <linux/fault-inject.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 
 static struct {
 	struct fault_attr attr;
 	u32 ignore_gfp_wait;
+	int cache_filter;
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 	struct dentry *ignore_gfp_wait_file;
+	struct dentry *cache_filter_file;
 #endif
 } failslab = {
 	.attr = FAULT_ATTR_INITIALIZER,
 	.ignore_gfp_wait = 1,
+	.cache_filter = 0,
 };
 
-bool should_failslab(size_t size, gfp_t gfpflags)
+bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
 {
 	if (gfpflags & __GFP_NOFAIL)
 		return false;
@@ -20,6 +24,9 @@ bool should_failslab(size_t size, gfp_t gfpflags)
         if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT))
 		return false;
 
+	if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB))
+		return false;
+
 	return should_fail(&failslab.attr, size);
 }
 
@@ -30,7 +37,6 @@ static int __init setup_failslab(char *str)
 __setup("failslab=", setup_failslab);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
 static int __init failslab_debugfs_init(void)
 {
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
@@ -46,8 +52,14 @@ static int __init failslab_debugfs_init(void)
 		debugfs_create_bool("ignore-gfp-wait", mode, dir,
 				      &failslab.ignore_gfp_wait);
 
-	if (!failslab.ignore_gfp_wait_file) {
+	failslab.cache_filter_file =
+		debugfs_create_bool("cache-filter", mode, dir,
+				      &failslab.cache_filter);
+
+	if (!failslab.ignore_gfp_wait_file ||
+	    !failslab.cache_filter_file) {
 		err = -ENOMEM;
+		debugfs_remove(failslab.cache_filter_file);
 		debugfs_remove(failslab.ignore_gfp_wait_file);
 		cleanup_fault_attr_dentries(&failslab.attr);
 	}
diff --git a/mm/slab.c b/mm/slab.c
index 7451bdacaf18..33496b704859 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3101,7 +3101,7 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
 	if (cachep == &cache_cache)
 		return false;
 
-	return should_failslab(obj_size(cachep), flags);
+	return should_failslab(obj_size(cachep), flags, cachep->flags);
 }
 
 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
diff --git a/mm/slub.c b/mm/slub.c
index 8d71aaf888d7..cab5288736c8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -151,7 +151,8 @@
  * Set of flags that will prevent slab merging
  */
 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE)
+		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+		SLAB_FAILSLAB)
 
 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
 		SLAB_CACHE_DMA | SLAB_NOTRACK)
@@ -1020,6 +1021,9 @@ static int __init setup_slub_debug(char *str)
 		case 't':
 			slub_debug |= SLAB_TRACE;
 			break;
+		case 'a':
+			slub_debug |= SLAB_FAILSLAB;
+			break;
 		default:
 			printk(KERN_ERR "slub_debug option '%c' "
 				"unknown. skipped\n", *str);
@@ -1718,7 +1722,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	lockdep_trace_alloc(gfpflags);
 	might_sleep_if(gfpflags & __GFP_WAIT);
 
-	if (should_failslab(s->objsize, gfpflags))
+	if (should_failslab(s->objsize, gfpflags, s->flags))
 		return NULL;
 
 	local_irq_save(flags);
@@ -4171,6 +4175,23 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf,
 }
 SLAB_ATTR(trace);
 
+#ifdef CONFIG_FAILSLAB
+static ssize_t failslab_show(struct kmem_cache *s, char *buf)
+{
+	return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
+}
+
+static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
+							size_t length)
+{
+	s->flags &= ~SLAB_FAILSLAB;
+	if (buf[0] == '1')
+		s->flags |= SLAB_FAILSLAB;
+	return length;
+}
+SLAB_ATTR(failslab);
+#endif
+
 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
@@ -4467,6 +4488,10 @@ static struct attribute *slab_attrs[] = {
 	&deactivate_remote_frees_attr.attr,
 	&order_fallback_attr.attr,
 #endif
+#ifdef CONFIG_FAILSLAB
+	&failslab_attr.attr,
+#endif
+
 	NULL
 };
 
-- 
cgit v1.2.3


From 5bdd00b93e368acc793748340c15cd2811fdd02b Mon Sep 17 00:00:00 2001
From: Theodore Kilgore <kilgota@banach.math.auburn.edu>
Date: Fri, 25 Dec 2009 05:16:32 -0300
Subject: V4L/DVB (13992): gspca_sn9c2028: New gspca subdriver

New gspca subdriver adding support for SN9C2028 dual-mode cameras.

Signed-off-by: Theodore Kilgore <kilgota@banach.math.auburn.edu>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/gspca/Kconfig    |  10 +
 drivers/media/video/gspca/Makefile   |   2 +
 drivers/media/video/gspca/sn9c2028.c | 757 +++++++++++++++++++++++++++++++++++
 drivers/media/video/gspca/sn9c2028.h |  51 +++
 include/linux/videodev2.h            |   1 +
 5 files changed, 821 insertions(+)
 create mode 100644 drivers/media/video/gspca/sn9c2028.c
 create mode 100644 drivers/media/video/gspca/sn9c2028.h

(limited to 'include/linux')

diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig
index 561bab0874ce..20b0f62fe77f 100644
--- a/drivers/media/video/gspca/Kconfig
+++ b/drivers/media/video/gspca/Kconfig
@@ -141,6 +141,16 @@ config USB_GSPCA_PAC7311
 	  To compile this driver as a module, choose M here: the
 	  module will be called gspca_pac7311.
 
+config USB_GSPCA_SN9C2028
+	tristate "SONIX Dual-Mode USB Camera Driver"
+	depends on VIDEO_V4L2 && USB_GSPCA
+	help
+	  Say Y here if you want streaming support for Sonix SN9C2028 cameras.
+	  These are supported as stillcams in libgphoto2/camlibs/sonix.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gspca_sn9c2028.
+
 config USB_GSPCA_SN9C20X
 	tristate "SN9C20X USB Camera Driver"
 	depends on VIDEO_V4L2 && USB_GSPCA
diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile
index 553753d5c5ea..643722046749 100644
--- a/drivers/media/video/gspca/Makefile
+++ b/drivers/media/video/gspca/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_USB_GSPCA_OV534_9)  += gspca_ov534_9.o
 obj-$(CONFIG_USB_GSPCA_PAC207)   += gspca_pac207.o
 obj-$(CONFIG_USB_GSPCA_PAC7302)  += gspca_pac7302.o
 obj-$(CONFIG_USB_GSPCA_PAC7311)  += gspca_pac7311.o
+obj-$(CONFIG_USB_GSPCA_SN9C2028) += gspca_sn9c2028.o
 obj-$(CONFIG_USB_GSPCA_SN9C20X)  += gspca_sn9c20x.o
 obj-$(CONFIG_USB_GSPCA_SONIXB)   += gspca_sonixb.o
 obj-$(CONFIG_USB_GSPCA_SONIXJ)   += gspca_sonixj.o
@@ -45,6 +46,7 @@ gspca_ov534_9-objs  := ov534_9.o
 gspca_pac207-objs   := pac207.o
 gspca_pac7302-objs  := pac7302.o
 gspca_pac7311-objs  := pac7311.o
+gspca_sn9c2028-objs := sn9c2028.o
 gspca_sn9c20x-objs  := sn9c20x.o
 gspca_sonixb-objs   := sonixb.o
 gspca_sonixj-objs   := sonixj.o
diff --git a/drivers/media/video/gspca/sn9c2028.c b/drivers/media/video/gspca/sn9c2028.c
new file mode 100644
index 000000000000..dda5fd4aa69e
--- /dev/null
+++ b/drivers/media/video/gspca/sn9c2028.c
@@ -0,0 +1,757 @@
+/*
+ * SN9C2028 library
+ *
+ * Copyright (C) 2009 Theodore Kilgore <kilgota@auburn.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define MODULE_NAME "sn9c2028"
+
+#include "gspca.h"
+
+MODULE_AUTHOR("Theodore Kilgore");
+MODULE_DESCRIPTION("Sonix SN9C2028 USB Camera Driver");
+MODULE_LICENSE("GPL");
+
+/* specific webcam descriptor */
+struct sd {
+	struct gspca_dev gspca_dev;  /* !! must be the first item */
+	u8 sof_read;
+	u16 model;
+};
+
+struct init_command {
+	unsigned char instruction[6];
+	unsigned char to_read; /* length to read. 0 means no reply requested */
+};
+
+/* V4L2 controls supported by the driver */
+static struct ctrl sd_ctrls[] = {
+};
+
+/* How to change the resolution of any of the VGA cams is unknown */
+static const struct v4l2_pix_format vga_mode[] = {
+	{640, 480, V4L2_PIX_FMT_SN9C2028, V4L2_FIELD_NONE,
+		.bytesperline = 640,
+		.sizeimage = 640 * 480 * 3 / 4,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+};
+
+/* No way to change the resolution of the CIF cams is known */
+static const struct v4l2_pix_format cif_mode[] = {
+	{352, 288, V4L2_PIX_FMT_SN9C2028, V4L2_FIELD_NONE,
+		.bytesperline = 352,
+		.sizeimage = 352 * 288 * 3 / 4,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+};
+
+/* the bytes to write are in gspca_dev->usb_buf */
+static int sn9c2028_command(struct gspca_dev *gspca_dev, u8 *command)
+{
+	int rc;
+
+	PDEBUG(D_USBO, "sending command %02x%02x%02x%02x%02x%02x", command[0],
+	       command[1], command[2], command[3], command[4], command[5]);
+
+	memcpy(gspca_dev->usb_buf, command, 6);
+	rc = usb_control_msg(gspca_dev->dev,
+			usb_sndctrlpipe(gspca_dev->dev, 0),
+			USB_REQ_GET_CONFIGURATION,
+			USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
+			2, 0, gspca_dev->usb_buf, 6, 500);
+	if (rc < 0) {
+		PDEBUG(D_ERR, "command write [%02x] error %d",
+				gspca_dev->usb_buf[0], rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int sn9c2028_read1(struct gspca_dev *gspca_dev)
+{
+	int rc;
+
+	rc = usb_control_msg(gspca_dev->dev,
+			usb_rcvctrlpipe(gspca_dev->dev, 0),
+			USB_REQ_GET_STATUS,
+			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
+			1, 0, gspca_dev->usb_buf, 1, 500);
+	if (rc != 1) {
+		PDEBUG(D_ERR, "read1 error %d", rc);
+		return (rc < 0) ? rc : -EIO;
+	}
+	PDEBUG(D_USBI, "read1 response %02x", gspca_dev->usb_buf[0]);
+	return gspca_dev->usb_buf[0];
+}
+
+static int sn9c2028_read4(struct gspca_dev *gspca_dev, u8 *reading)
+{
+	int rc;
+	rc = usb_control_msg(gspca_dev->dev,
+			usb_rcvctrlpipe(gspca_dev->dev, 0),
+			USB_REQ_GET_STATUS,
+			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
+			4, 0, gspca_dev->usb_buf, 4, 500);
+	if (rc != 4) {
+		PDEBUG(D_ERR, "read4 error %d", rc);
+		return (rc < 0) ? rc : -EIO;
+	}
+	memcpy(reading, gspca_dev->usb_buf, 4);
+	PDEBUG(D_USBI, "read4 response %02x%02x%02x%02x", reading[0],
+	       reading[1], reading[2], reading[3]);
+	return rc;
+}
+
+static int sn9c2028_long_command(struct gspca_dev *gspca_dev, u8 *command)
+{
+	int i, status;
+	__u8 reading[4];
+
+	status = sn9c2028_command(gspca_dev, command);
+	if (status < 0)
+		return status;
+
+	status = -1;
+	for (i = 0; i < 256 && status < 2; i++)
+		status = sn9c2028_read1(gspca_dev);
+	if (status != 2) {
+		PDEBUG(D_ERR, "long command status read error %d", status);
+		return (status < 0) ? status : -EIO;
+	}
+
+	memset(reading, 0, 4);
+	status = sn9c2028_read4(gspca_dev, reading);
+	if (status < 0)
+		return status;
+
+	/* in general, the first byte of the response is the first byte of
+	 * the command, or'ed with 8 */
+	status = sn9c2028_read1(gspca_dev);
+	if (status < 0)
+		return status;
+
+	return 0;
+}
+
+static int sn9c2028_short_command(struct gspca_dev *gspca_dev, u8 *command)
+{
+	int err_code;
+
+	err_code = sn9c2028_command(gspca_dev, command);
+	if (err_code < 0)
+		return err_code;
+
+	err_code = sn9c2028_read1(gspca_dev);
+	if (err_code < 0)
+		return err_code;
+
+	return 0;
+}
+
+/* this function is called at probe time */
+static int sd_config(struct gspca_dev *gspca_dev,
+		     const struct usb_device_id *id)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct cam *cam = &gspca_dev->cam;
+
+	PDEBUG(D_PROBE, "SN9C2028 camera detected (vid/pid 0x%04X:0x%04X)",
+	       id->idVendor, id->idProduct);
+
+	sd->model = id->idProduct;
+
+	switch (sd->model) {
+	case 0x7005:
+		PDEBUG(D_PROBE, "Genius Smart 300 camera");
+		break;
+	case 0x8000:
+		PDEBUG(D_PROBE, "DC31VC");
+		break;
+	case 0x8001:
+		PDEBUG(D_PROBE, "Spy camera");
+		break;
+	case 0x8003:
+		PDEBUG(D_PROBE, "CIF camera");
+		break;
+	case 0x8008:
+		PDEBUG(D_PROBE, "Mini-Shotz ms-350 camera");
+		break;
+	case 0x800a:
+		PDEBUG(D_PROBE, "Vivitar 3350b type camera");
+		cam->input_flags = V4L2_IN_ST_VFLIP | V4L2_IN_ST_HFLIP;
+		break;
+	}
+
+	switch (sd->model) {
+	case 0x8000:
+	case 0x8001:
+	case 0x8003:
+		cam->cam_mode = cif_mode;
+		cam->nmodes = ARRAY_SIZE(cif_mode);
+		break;
+	default:
+		cam->cam_mode = vga_mode;
+		cam->nmodes = ARRAY_SIZE(vga_mode);
+	}
+	return 0;
+}
+
+/* this function is called at probe and resume time */
+static int sd_init(struct gspca_dev *gspca_dev)
+{
+	int status = -1;
+
+	sn9c2028_read1(gspca_dev);
+	sn9c2028_read1(gspca_dev);
+	status = sn9c2028_read1(gspca_dev);
+
+	return (status < 0) ? status : 0;
+}
+
+static int run_start_commands(struct gspca_dev *gspca_dev,
+			      struct init_command *cam_commands, int n)
+{
+	int i, err_code = -1;
+
+	for (i = 0; i < n; i++) {
+		switch (cam_commands[i].to_read) {
+		case 4:
+			err_code = sn9c2028_long_command(gspca_dev,
+					cam_commands[i].instruction);
+			break;
+		case 1:
+			err_code = sn9c2028_short_command(gspca_dev,
+					cam_commands[i].instruction);
+			break;
+		case 0:
+			err_code = sn9c2028_command(gspca_dev,
+					cam_commands[i].instruction);
+			break;
+		}
+		if (err_code < 0)
+			return err_code;
+	}
+	return 0;
+}
+
+static int start_spy_cam(struct gspca_dev *gspca_dev)
+{
+	struct init_command spy_start_commands[] = {
+		{{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x22, 0x01, 0x04, 0x00, 0x00}, 4},
+		{{0x13, 0x23, 0x01, 0x03, 0x00, 0x00}, 4},
+		{{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, /* width  352 */
+		{{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, /* height 288 */
+		/* {{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4}, */
+		{{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4},
+		{{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4}, /* red gain ?*/
+		/* {{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4}, */
+		{{0x13, 0x29, 0x01, 0x00, 0x00, 0x00}, 4},
+		/* {{0x13, 0x29, 0x01, 0x0c, 0x00, 0x00}, 4}, */
+		{{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4},
+		/* {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, */
+		{{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4},
+		/* {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, */
+		{{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4},
+		{{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4},
+		{{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4},
+		{{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x02, 0x06, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x03, 0x13, 0x00, 0x00, 0x00}, 4}, /*don't mess with*/
+		/*{{0x11, 0x04, 0x06, 0x00, 0x00, 0x00}, 4}, observed */
+		{{0x11, 0x04, 0x00, 0x00, 0x00, 0x00}, 4}, /* brighter */
+		/*{{0x11, 0x05, 0x65, 0x00, 0x00, 0x00}, 4}, observed */
+		{{0x11, 0x05, 0x00, 0x00, 0x00, 0x00}, 4}, /* brighter */
+		{{0x11, 0x06, 0xb1, 0x00, 0x00, 0x00}, 4}, /* observed */
+		{{0x11, 0x07, 0x00, 0x00, 0x00, 0x00}, 4},
+		/*{{0x11, 0x08, 0x06, 0x00, 0x00, 0x00}, 4}, observed */
+		{{0x11, 0x08, 0x0b, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x09, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0a, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0b, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0c, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0d, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0e, 0x04, 0x00, 0x00, 0x00}, 4},
+		/* {{0x11, 0x0f, 0x00, 0x00, 0x00, 0x00}, 4}, */
+		/* brightness or gain. 0 is default. 4 is good
+		 * indoors at night with incandescent lighting */
+		{{0x11, 0x0f, 0x04, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x10, 0x06, 0x00, 0x00, 0x00}, 4}, /*hstart or hoffs*/
+		{{0x11, 0x11, 0x06, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x14, 0x02, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x13, 0x01, 0x00, 0x00, 0x00}, 4},
+		/* {{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1}, observed */
+		{{0x1b, 0x02, 0x11, 0x00, 0x00, 0x00}, 1}, /* brighter */
+		/* {{0x1b, 0x13, 0x01, 0x00, 0x00, 0x00}, 1}, observed */
+		{{0x1b, 0x13, 0x11, 0x00, 0x00, 0x00}, 1},
+		{{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1}, /* compresses */
+		/* Camera should start to capture now. */
+	};
+
+	return run_start_commands(gspca_dev, spy_start_commands,
+				  ARRAY_SIZE(spy_start_commands));
+}
+
+static int start_cif_cam(struct gspca_dev *gspca_dev)
+{
+	struct init_command cif_start_commands[] = {
+		{{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4},
+		/* The entire sequence below seems redundant */
+		/* {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x22, 0x01, 0x06, 0x00, 0x00}, 4},
+		{{0x13, 0x23, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, width?
+		{{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, height?
+		{{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4}, subsample?
+		{{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x29, 0x01, 0x20, 0x00, 0x00}, 4},
+		{{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4},
+		{{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4},
+		{{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4},
+		{{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4},
+		{{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4},*/
+		{{0x1b, 0x21, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x17, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x19, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x03, 0x5a, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x04, 0x27, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x05, 0x01, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x12, 0x14, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x13, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x14, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x15, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x16, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x77, 0xa2, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x06, 0x0f, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x07, 0x14, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x08, 0x0f, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x09, 0x10, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x0e, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x0f, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x12, 0x07, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x10, 0x1f, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1},
+		{{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 1}, /* width/8 */
+		{{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 1}, /* height/8 */
+		/* {{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4}, subsample?
+		 * {{0x13, 0x28, 0x01, 0x1e, 0x00, 0x00}, 4}, does nothing
+		 * {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, */
+		/* {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4},
+		 * causes subsampling
+		 * but not a change in the resolution setting! */
+		{{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2d, 0x01, 0x01, 0x00, 0x00}, 4},
+		{{0x13, 0x2e, 0x01, 0x08, 0x00, 0x00}, 4},
+		{{0x13, 0x2f, 0x01, 0x06, 0x00, 0x00}, 4},
+		{{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x1b, 0x04, 0x6d, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x05, 0x03, 0x00, 0x00, 0x00}, 1},
+		{{0x20, 0x36, 0x06, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x0e, 0x01, 0x00, 0x00, 0x00}, 1},
+		{{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x1b, 0x0f, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x20, 0x36, 0x05, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x10, 0x0f, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1},
+		{{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1},/* use compression */
+		/* Camera should start to capture now. */
+	};
+
+	return run_start_commands(gspca_dev, cif_start_commands,
+				  ARRAY_SIZE(cif_start_commands));
+}
+
+static int start_ms350_cam(struct gspca_dev *gspca_dev)
+{
+	struct init_command ms350_start_commands[] = {
+		{{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x16, 0x01, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x22, 0x01, 0x04, 0x00, 0x00}, 4},
+		{{0x13, 0x23, 0x01, 0x03, 0x00, 0x00}, 4},
+		{{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4},
+		{{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4},
+		{{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4},
+		{{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4},
+		{{0x13, 0x29, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4},
+		{{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4},
+		{{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4},
+		{{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4},
+		{{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x00, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x01, 0x70, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x02, 0x05, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x03, 0x5d, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x04, 0x07, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x05, 0x25, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x06, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x07, 0x09, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x08, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x09, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0a, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0b, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0c, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0d, 0x0c, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0e, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x0f, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x11, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x13, 0x63, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x15, 0x70, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x18, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x11, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, /* width  */
+		{{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, /* height */
+		{{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4}, /* vstart? */
+		{{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4},
+		{{0x13, 0x29, 0x01, 0x40, 0x00, 0x00}, 4}, /* hstart? */
+		{{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4},
+		{{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4},
+		{{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4},
+		{{0x1b, 0x02, 0x05, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1},
+		{{0x20, 0x18, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x02, 0x0a, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 0},
+		/* Camera should start to capture now. */
+	};
+
+	return run_start_commands(gspca_dev, ms350_start_commands,
+				  ARRAY_SIZE(ms350_start_commands));
+}
+
+static int start_genius_cam(struct gspca_dev *gspca_dev)
+{
+	struct init_command genius_start_commands[] = {
+		{{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x16, 0x01, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x10, 0x00, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4},
+		{{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4},
+		/* "preliminary" width and height settings */
+		{{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4},
+		{{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4},
+		{{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4},
+		{{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4},
+		{{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4},
+		{{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x11, 0x64, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x13, 0x91, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x15, 0x20, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x16, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x17, 0x60, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x25, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x26, 0x02, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x27, 0x88, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x30, 0x38, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x31, 0x2a, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x32, 0x2a, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x33, 0x2a, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x34, 0x02, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x5b, 0x0a, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, /* real width */
+		{{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, /* real height */
+		{{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4},
+		{{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4},
+		{{0x13, 0x29, 0x01, 0x62, 0x00, 0x00}, 4},
+		{{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4},
+		{{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4},
+		{{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4},
+		{{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x21, 0x2a, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x23, 0x28, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x11, 0x04, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x13, 0x03, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x15, 0xe0, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x16, 0x02, 0x00, 0x00, 0x00}, 4},
+		{{0x11, 0x17, 0x80, 0x00, 0x00, 0x00}, 4},
+		{{0x1c, 0x20, 0x00, 0x2a, 0x00, 0x00}, 1},
+		{{0x1c, 0x20, 0x00, 0x2a, 0x00, 0x00}, 1},
+		{{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 0}
+		/* Camera should start to capture now. */
+	};
+
+	return run_start_commands(gspca_dev, genius_start_commands,
+				  ARRAY_SIZE(genius_start_commands));
+}
+
+static int start_vivitar_cam(struct gspca_dev *gspca_dev)
+{
+	struct init_command vivitar_start_commands[] = {
+		{{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x22, 0x01, 0x01, 0x00, 0x00}, 4},
+		{{0x13, 0x23, 0x01, 0x01, 0x00, 0x00}, 4},
+		{{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4},
+		{{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4},
+		{{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4},
+		{{0x13, 0x28, 0x01, 0x0a, 0x00, 0x00}, 4},
+		/*
+		 * Above is changed from OEM 0x0b. Fixes Bayer tiling.
+		 * Presumably gives a vertical shift of one row.
+		 */
+		{{0x13, 0x29, 0x01, 0x20, 0x00, 0x00}, 4},
+		/* Above seems to do horizontal shift. */
+		{{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4},
+		{{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4},
+		{{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4},
+		{{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4},
+		/* Above three commands seem to relate to brightness. */
+		{{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4},
+		{{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x1b, 0x12, 0x80, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x01, 0x77, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x02, 0x3a, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x12, 0x78, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x13, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x14, 0x80, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x15, 0x34, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x1b, 0x04, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x20, 0x44, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x23, 0xee, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x26, 0xa0, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x27, 0x9a, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x28, 0xa0, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x29, 0x30, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x2a, 0x80, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x2b, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x2f, 0x3d, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x30, 0x24, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x32, 0x86, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x60, 0xa9, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x61, 0x42, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x65, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x69, 0x38, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x6f, 0x88, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x70, 0x0b, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x71, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x74, 0x21, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x75, 0x86, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x76, 0x00, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x7d, 0xf3, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x17, 0x1c, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x18, 0xc0, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x19, 0x05, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x1a, 0xf6, 0x00, 0x00, 0x00}, 1},
+		/* {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4},
+		{{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4},
+		{{0x13, 0x28, 0x01, 0x0b, 0x00, 0x00}, 4}, */
+		{{0x20, 0x36, 0x06, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x10, 0x26, 0x00, 0x00, 0x00}, 1},
+		{{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x1b, 0x76, 0x03, 0x00, 0x00, 0x00}, 1},
+		{{0x20, 0x36, 0x05, 0x00, 0x00, 0x00}, 1},
+		{{0x1b, 0x00, 0x3f, 0x00, 0x00, 0x00}, 1},
+		/* Above is brightness; OEM driver setting is 0x10 */
+		{{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4},
+		{{0x20, 0x29, 0x30, 0x00, 0x00, 0x00}, 1},
+		{{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1}
+	};
+
+	return run_start_commands(gspca_dev, vivitar_start_commands,
+				  ARRAY_SIZE(vivitar_start_commands));
+}
+
+static int sd_start(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int err_code;
+
+	sd->sof_read = 0;
+
+	switch (sd->model) {
+	case 0x7005:
+		err_code = start_genius_cam(gspca_dev);
+		break;
+	case 0x8001:
+		err_code = start_spy_cam(gspca_dev);
+		break;
+	case 0x8003:
+		err_code = start_cif_cam(gspca_dev);
+		break;
+	case 0x8008:
+		err_code = start_ms350_cam(gspca_dev);
+		break;
+	case 0x800a:
+		err_code = start_vivitar_cam(gspca_dev);
+		break;
+	default:
+		PDEBUG(D_ERR, "Starting unknown camera, please report this");
+		return -ENXIO;
+	}
+
+	return err_code;
+}
+
+static void sd_stopN(struct gspca_dev *gspca_dev)
+{
+	int result;
+	__u8 data[6];
+
+	result = sn9c2028_read1(gspca_dev);
+	if (result < 0)
+		PDEBUG(D_ERR, "Camera Stop read failed");
+
+	memset(data, 0, 6);
+	data[0] = 0x14;
+	result = sn9c2028_command(gspca_dev, data);
+	if (result < 0)
+		PDEBUG(D_ERR, "Camera Stop command failed");
+}
+
+/* Include sn9c2028 sof detection functions */
+#include "sn9c2028.h"
+
+static void sd_pkt_scan(struct gspca_dev *gspca_dev,
+			__u8 *data,			/* isoc packet */
+			int len)			/* iso packet length */
+{
+	unsigned char *sof;
+
+	sof = sn9c2028_find_sof(gspca_dev, data, len);
+	if (sof) {
+		int n;
+
+		/* finish decoding current frame */
+		n = sof - data;
+		if (n > sizeof sn9c2028_sof_marker)
+			n -= sizeof sn9c2028_sof_marker;
+		else
+			n = 0;
+		gspca_frame_add(gspca_dev, LAST_PACKET, data, n);
+		/* Start next frame. */
+		gspca_frame_add(gspca_dev, FIRST_PACKET,
+			sn9c2028_sof_marker, sizeof sn9c2028_sof_marker);
+		len -= sof - data;
+		data = sof;
+	}
+	gspca_frame_add(gspca_dev, INTER_PACKET, data, len);
+}
+
+/* sub-driver description */
+static const struct sd_desc sd_desc = {
+	.name = MODULE_NAME,
+	.ctrls = sd_ctrls,
+	.nctrls = ARRAY_SIZE(sd_ctrls),
+	.config = sd_config,
+	.init = sd_init,
+	.start = sd_start,
+	.stopN = sd_stopN,
+	.pkt_scan = sd_pkt_scan,
+};
+
+/* -- module initialisation -- */
+static const __devinitdata struct usb_device_id device_table[] = {
+	{USB_DEVICE(0x0458, 0x7005)}, /* Genius Smart 300, version 2 */
+	/* The Genius Smart is untested. I can't find an owner ! */
+	/* {USB_DEVICE(0x0c45, 0x8000)}, DC31VC, Don't know this camera */
+	{USB_DEVICE(0x0c45, 0x8001)}, /* Wild Planet digital spy cam */
+	{USB_DEVICE(0x0c45, 0x8003)}, /* Several small CIF cameras */
+	/* {USB_DEVICE(0x0c45, 0x8006)}, Unknown VGA camera */
+	{USB_DEVICE(0x0c45, 0x8008)}, /* Mini-Shotz ms-350 */
+	{USB_DEVICE(0x0c45, 0x800a)}, /* Vivicam 3350B */
+	{}
+};
+MODULE_DEVICE_TABLE(usb, device_table);
+
+/* -- device connect -- */
+static int sd_probe(struct usb_interface *intf,
+			const struct usb_device_id *id)
+{
+	return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd),
+			       THIS_MODULE);
+}
+
+static struct usb_driver sd_driver = {
+	.name = MODULE_NAME,
+	.id_table = device_table,
+	.probe = sd_probe,
+	.disconnect = gspca_disconnect,
+#ifdef CONFIG_PM
+	.suspend = gspca_suspend,
+	.resume = gspca_resume,
+#endif
+};
+
+/* -- module insert / remove -- */
+static int __init sd_mod_init(void)
+{
+	int ret;
+
+	ret = usb_register(&sd_driver);
+	if (ret < 0)
+		return ret;
+	PDEBUG(D_PROBE, "registered");
+	return 0;
+}
+
+static void __exit sd_mod_exit(void)
+{
+	usb_deregister(&sd_driver);
+	PDEBUG(D_PROBE, "deregistered");
+}
+
+module_init(sd_mod_init);
+module_exit(sd_mod_exit);
diff --git a/drivers/media/video/gspca/sn9c2028.h b/drivers/media/video/gspca/sn9c2028.h
new file mode 100644
index 000000000000..8fd1d3e05665
--- /dev/null
+++ b/drivers/media/video/gspca/sn9c2028.h
@@ -0,0 +1,51 @@
+/*
+ * SN9C2028 common functions
+ *
+ * Copyright (C) 2009 Theodore Kilgore <kilgota@auburn,edu>
+ *
+ * Based closely upon the file gspca/pac_common.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+static const unsigned char sn9c2028_sof_marker[5] =
+	{ 0xff, 0xff, 0x00, 0xc4, 0xc4 };
+
+static unsigned char *sn9c2028_find_sof(struct gspca_dev *gspca_dev,
+					unsigned char *m, int len)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int i;
+
+	/* Search for the SOF marker (fixed part) in the header */
+	for (i = 0; i < len; i++) {
+		if (m[i] == sn9c2028_sof_marker[sd->sof_read]) {
+			sd->sof_read++;
+			if (sd->sof_read == sizeof(sn9c2028_sof_marker)) {
+				PDEBUG(D_FRAM,
+					"SOF found, bytes to analyze: %u."
+					" Frame starts at byte #%u",
+					len, i + 1);
+				sd->sof_read = 0;
+				return m + i + 1;
+			}
+		} else {
+			sd->sof_read = 0;
+		}
+	}
+
+	return NULL;
+}
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index d4962a782b8a..1b06360af38d 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -362,6 +362,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 #define V4L2_PIX_FMT_MR97310A v4l2_fourcc('M', '3', '1', '0') /* compressed BGGR bayer */
+#define V4L2_PIX_FMT_SN9C2028 v4l2_fourcc('S', 'O', 'N', 'X') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_SQ905C   v4l2_fourcc('9', '0', '5', 'C') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_PJPG     v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */
 #define V4L2_PIX_FMT_OV511    v4l2_fourcc('O', '5', '1', '1') /* ov511 JPEG */
-- 
cgit v1.2.3


From 54e8bc5d64a651e2fb8b2366637e6a7d920a4c70 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 14 Jan 2010 09:37:18 -0300
Subject: V4L/DVB (14003): gspca_cpai1: New gspca subdriver for CPIA CPiA
 version 1 cams

This new driver supports USB PIA CPiA version 1 cams, replacing the
old v4l1 driver.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig        |    7 +-
 drivers/media/video/gspca/Kconfig  |   11 +
 drivers/media/video/gspca/Makefile |    2 +
 drivers/media/video/gspca/cpia1.c  | 2022 ++++++++++++++++++++++++++++++++++++
 include/linux/videodev2.h          |    1 +
 5 files changed, 2042 insertions(+), 1 deletion(-)
 create mode 100644 drivers/media/video/gspca/cpia1.c

(limited to 'include/linux')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 2f83be766d9f..1773941a1a66 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -638,9 +638,14 @@ config VIDEO_W9966
 	  information.
 
 config VIDEO_CPIA
-	tristate "CPiA Video For Linux"
+	tristate "CPiA Video For Linux (DEPRECATED)"
 	depends on VIDEO_V4L1
+	default n
 	---help---
+	  This driver is DEPRECATED please use the gspca cpia1 module
+	  instead. Note that you need atleast version 0.6.4 of libv4l for
+	  the cpia1 gspca module.
+
 	  This is the video4linux driver for cameras based on Vision's CPiA
 	  (Colour Processor Interface ASIC), such as the Creative Labs Video
 	  Blaster Webcam II. If you have one of these cameras, say Y here
diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig
index 20b0f62fe77f..e0060c1f0544 100644
--- a/drivers/media/video/gspca/Kconfig
+++ b/drivers/media/video/gspca/Kconfig
@@ -39,6 +39,17 @@ config USB_GSPCA_CONEX
 	  To compile this driver as a module, choose M here: the
 	  module will be called gspca_conex.
 
+config USB_GSPCA_CPIA1
+	tristate "cpia CPiA (version 1) Camera Driver"
+	depends on VIDEO_V4L2 && USB_GSPCA
+	help
+	  Say Y here if you want support for USB cameras based on the cpia
+	  CPiA chip. Note that you need atleast version 0.6.4 of libv4l for
+	  applications to understand the videoformat generated by this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gspca_cpia1.
+
 config USB_GSPCA_ETOMS
 	tristate "Etoms USB Camera Driver"
 	depends on VIDEO_V4L2 && USB_GSPCA
diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile
index 643722046749..6e4cf1ce01c9 100644
--- a/drivers/media/video/gspca/Makefile
+++ b/drivers/media/video/gspca/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_USB_GSPCA)          += gspca_main.o
 obj-$(CONFIG_USB_GSPCA_BENQ)     += gspca_benq.o
 obj-$(CONFIG_USB_GSPCA_CONEX)    += gspca_conex.o
+obj-$(CONFIG_USB_GSPCA_CPIA1)    += gspca_cpia1.o
 obj-$(CONFIG_USB_GSPCA_ETOMS)    += gspca_etoms.o
 obj-$(CONFIG_USB_GSPCA_FINEPIX)  += gspca_finepix.o
 obj-$(CONFIG_USB_GSPCA_JEILINJ)  += gspca_jeilinj.o
@@ -35,6 +36,7 @@ obj-$(CONFIG_USB_GSPCA_ZC3XX)    += gspca_zc3xx.o
 gspca_main-objs     := gspca.o
 gspca_benq-objs     := benq.o
 gspca_conex-objs    := conex.o
+gspca_cpia1-objs    := cpia1.o
 gspca_etoms-objs    := etoms.o
 gspca_finepix-objs  := finepix.o
 gspca_jeilinj-objs  := jeilinj.o
diff --git a/drivers/media/video/gspca/cpia1.c b/drivers/media/video/gspca/cpia1.c
new file mode 100644
index 000000000000..82945ed5cbe5
--- /dev/null
+++ b/drivers/media/video/gspca/cpia1.c
@@ -0,0 +1,2022 @@
+/*
+ * cpia CPiA (1) gspca driver
+ *
+ * Copyright (C) 2010 Hans de Goede <hdgoede@redhat.com>
+ *
+ * This module is adapted from the in kernel v4l1 cpia driver which is :
+ *
+ * (C) Copyright 1999-2000 Peter Pregler
+ * (C) Copyright 1999-2000 Scott J. Bertin
+ * (C) Copyright 1999-2000 Johannes Erdfelt <johannes@erdfelt.com>
+ * (C) Copyright 2000 STMicroelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#define MODULE_NAME "cpia1"
+
+#include "gspca.h"
+
+MODULE_AUTHOR("Hans de Goede <hdgoede@redhat.com>");
+MODULE_DESCRIPTION("Vision CPiA");
+MODULE_LICENSE("GPL");
+
+/* constant value's */
+#define MAGIC_0		0x19
+#define MAGIC_1		0x68
+#define DATA_IN		0xC0
+#define DATA_OUT	0x40
+#define VIDEOSIZE_QCIF	0	/* 176x144 */
+#define VIDEOSIZE_CIF	1	/* 352x288 */
+#define SUBSAMPLE_420	0
+#define SUBSAMPLE_422	1
+#define YUVORDER_YUYV	0
+#define YUVORDER_UYVY	1
+#define NOT_COMPRESSED	0
+#define COMPRESSED	1
+#define NO_DECIMATION	0
+#define DECIMATION_ENAB	1
+#define EOI		0xff	/* End Of Image */
+#define EOL		0xfd	/* End Of Line */
+#define FRAME_HEADER_SIZE	64
+
+/* Image grab modes */
+#define CPIA_GRAB_SINGLE	0
+#define CPIA_GRAB_CONTINEOUS	1
+
+/* Compression parameters */
+#define CPIA_COMPRESSION_NONE	0
+#define CPIA_COMPRESSION_AUTO	1
+#define CPIA_COMPRESSION_MANUAL	2
+#define CPIA_COMPRESSION_TARGET_QUALITY         0
+#define CPIA_COMPRESSION_TARGET_FRAMERATE       1
+
+/* Return offsets for GetCameraState */
+#define SYSTEMSTATE	0
+#define GRABSTATE	1
+#define STREAMSTATE	2
+#define FATALERROR	3
+#define CMDERROR	4
+#define DEBUGFLAGS	5
+#define VPSTATUS	6
+#define ERRORCODE	7
+
+/* SystemState */
+#define UNINITIALISED_STATE	0
+#define PASS_THROUGH_STATE	1
+#define LO_POWER_STATE		2
+#define HI_POWER_STATE		3
+#define WARM_BOOT_STATE		4
+
+/* GrabState */
+#define GRAB_IDLE		0
+#define GRAB_ACTIVE		1
+#define GRAB_DONE		2
+
+/* StreamState */
+#define STREAM_NOT_READY	0
+#define STREAM_READY		1
+#define STREAM_OPEN		2
+#define STREAM_PAUSED		3
+#define STREAM_FINISHED		4
+
+/* Fatal Error, CmdError, and DebugFlags */
+#define CPIA_FLAG	  1
+#define SYSTEM_FLAG	  2
+#define INT_CTRL_FLAG	  4
+#define PROCESS_FLAG	  8
+#define COM_FLAG	 16
+#define VP_CTRL_FLAG	 32
+#define CAPTURE_FLAG	 64
+#define DEBUG_FLAG	128
+
+/* VPStatus */
+#define VP_STATE_OK			0x00
+
+#define VP_STATE_FAILED_VIDEOINIT	0x01
+#define VP_STATE_FAILED_AECACBINIT	0x02
+#define VP_STATE_AEC_MAX		0x04
+#define VP_STATE_ACB_BMAX		0x08
+
+#define VP_STATE_ACB_RMIN		0x10
+#define VP_STATE_ACB_GMIN		0x20
+#define VP_STATE_ACB_RMAX		0x40
+#define VP_STATE_ACB_GMAX		0x80
+
+/* default (minimum) compensation values */
+#define COMP_RED        220
+#define COMP_GREEN1     214
+#define COMP_GREEN2     COMP_GREEN1
+#define COMP_BLUE       230
+
+/* exposure status */
+#define EXPOSURE_VERY_LIGHT 0
+#define EXPOSURE_LIGHT      1
+#define EXPOSURE_NORMAL     2
+#define EXPOSURE_DARK       3
+#define EXPOSURE_VERY_DARK  4
+
+#define CPIA_MODULE_CPIA			(0 << 5)
+#define CPIA_MODULE_SYSTEM			(1 << 5)
+#define CPIA_MODULE_VP_CTRL			(5 << 5)
+#define CPIA_MODULE_CAPTURE			(6 << 5)
+#define CPIA_MODULE_DEBUG			(7 << 5)
+
+#define INPUT (DATA_IN << 8)
+#define OUTPUT (DATA_OUT << 8)
+
+#define CPIA_COMMAND_GetCPIAVersion	(INPUT | CPIA_MODULE_CPIA | 1)
+#define CPIA_COMMAND_GetPnPID		(INPUT | CPIA_MODULE_CPIA | 2)
+#define CPIA_COMMAND_GetCameraStatus	(INPUT | CPIA_MODULE_CPIA | 3)
+#define CPIA_COMMAND_GotoHiPower	(OUTPUT | CPIA_MODULE_CPIA | 4)
+#define CPIA_COMMAND_GotoLoPower	(OUTPUT | CPIA_MODULE_CPIA | 5)
+#define CPIA_COMMAND_GotoSuspend	(OUTPUT | CPIA_MODULE_CPIA | 7)
+#define CPIA_COMMAND_GotoPassThrough	(OUTPUT | CPIA_MODULE_CPIA | 8)
+#define CPIA_COMMAND_ModifyCameraStatus	(OUTPUT | CPIA_MODULE_CPIA | 10)
+
+#define CPIA_COMMAND_ReadVCRegs		(INPUT | CPIA_MODULE_SYSTEM | 1)
+#define CPIA_COMMAND_WriteVCReg		(OUTPUT | CPIA_MODULE_SYSTEM | 2)
+#define CPIA_COMMAND_ReadMCPorts	(INPUT | CPIA_MODULE_SYSTEM | 3)
+#define CPIA_COMMAND_WriteMCPort	(OUTPUT | CPIA_MODULE_SYSTEM | 4)
+#define CPIA_COMMAND_SetBaudRate	(OUTPUT | CPIA_MODULE_SYSTEM | 5)
+#define CPIA_COMMAND_SetECPTiming	(OUTPUT | CPIA_MODULE_SYSTEM | 6)
+#define CPIA_COMMAND_ReadIDATA		(INPUT | CPIA_MODULE_SYSTEM | 7)
+#define CPIA_COMMAND_WriteIDATA		(OUTPUT | CPIA_MODULE_SYSTEM | 8)
+#define CPIA_COMMAND_GenericCall	(OUTPUT | CPIA_MODULE_SYSTEM | 9)
+#define CPIA_COMMAND_I2CStart		(OUTPUT | CPIA_MODULE_SYSTEM | 10)
+#define CPIA_COMMAND_I2CStop		(OUTPUT | CPIA_MODULE_SYSTEM | 11)
+#define CPIA_COMMAND_I2CWrite		(OUTPUT | CPIA_MODULE_SYSTEM | 12)
+#define CPIA_COMMAND_I2CRead		(INPUT | CPIA_MODULE_SYSTEM | 13)
+
+#define CPIA_COMMAND_GetVPVersion	(INPUT | CPIA_MODULE_VP_CTRL | 1)
+#define CPIA_COMMAND_ResetFrameCounter	(INPUT | CPIA_MODULE_VP_CTRL | 2)
+#define CPIA_COMMAND_SetColourParams	(OUTPUT | CPIA_MODULE_VP_CTRL | 3)
+#define CPIA_COMMAND_SetExposure	(OUTPUT | CPIA_MODULE_VP_CTRL | 4)
+#define CPIA_COMMAND_SetColourBalance	(OUTPUT | CPIA_MODULE_VP_CTRL | 6)
+#define CPIA_COMMAND_SetSensorFPS	(OUTPUT | CPIA_MODULE_VP_CTRL | 7)
+#define CPIA_COMMAND_SetVPDefaults	(OUTPUT | CPIA_MODULE_VP_CTRL | 8)
+#define CPIA_COMMAND_SetApcor		(OUTPUT | CPIA_MODULE_VP_CTRL | 9)
+#define CPIA_COMMAND_SetFlickerCtrl	(OUTPUT | CPIA_MODULE_VP_CTRL | 10)
+#define CPIA_COMMAND_SetVLOffset	(OUTPUT | CPIA_MODULE_VP_CTRL | 11)
+#define CPIA_COMMAND_GetColourParams	(INPUT | CPIA_MODULE_VP_CTRL | 16)
+#define CPIA_COMMAND_GetColourBalance	(INPUT | CPIA_MODULE_VP_CTRL | 17)
+#define CPIA_COMMAND_GetExposure	(INPUT | CPIA_MODULE_VP_CTRL | 18)
+#define CPIA_COMMAND_SetSensorMatrix	(OUTPUT | CPIA_MODULE_VP_CTRL | 19)
+#define CPIA_COMMAND_ColourBars		(OUTPUT | CPIA_MODULE_VP_CTRL | 25)
+#define CPIA_COMMAND_ReadVPRegs		(INPUT | CPIA_MODULE_VP_CTRL | 30)
+#define CPIA_COMMAND_WriteVPReg		(OUTPUT | CPIA_MODULE_VP_CTRL | 31)
+
+#define CPIA_COMMAND_GrabFrame		(OUTPUT | CPIA_MODULE_CAPTURE | 1)
+#define CPIA_COMMAND_UploadFrame	(OUTPUT | CPIA_MODULE_CAPTURE | 2)
+#define CPIA_COMMAND_SetGrabMode	(OUTPUT | CPIA_MODULE_CAPTURE | 3)
+#define CPIA_COMMAND_InitStreamCap	(OUTPUT | CPIA_MODULE_CAPTURE | 4)
+#define CPIA_COMMAND_FiniStreamCap	(OUTPUT | CPIA_MODULE_CAPTURE | 5)
+#define CPIA_COMMAND_StartStreamCap	(OUTPUT | CPIA_MODULE_CAPTURE | 6)
+#define CPIA_COMMAND_EndStreamCap	(OUTPUT | CPIA_MODULE_CAPTURE | 7)
+#define CPIA_COMMAND_SetFormat		(OUTPUT | CPIA_MODULE_CAPTURE | 8)
+#define CPIA_COMMAND_SetROI		(OUTPUT | CPIA_MODULE_CAPTURE | 9)
+#define CPIA_COMMAND_SetCompression	(OUTPUT | CPIA_MODULE_CAPTURE | 10)
+#define CPIA_COMMAND_SetCompressionTarget (OUTPUT | CPIA_MODULE_CAPTURE | 11)
+#define CPIA_COMMAND_SetYUVThresh	(OUTPUT | CPIA_MODULE_CAPTURE | 12)
+#define CPIA_COMMAND_SetCompressionParams (OUTPUT | CPIA_MODULE_CAPTURE | 13)
+#define CPIA_COMMAND_DiscardFrame	(OUTPUT | CPIA_MODULE_CAPTURE | 14)
+#define CPIA_COMMAND_GrabReset		(OUTPUT | CPIA_MODULE_CAPTURE | 15)
+
+#define CPIA_COMMAND_OutputRS232	(OUTPUT | CPIA_MODULE_DEBUG | 1)
+#define CPIA_COMMAND_AbortProcess	(OUTPUT | CPIA_MODULE_DEBUG | 4)
+#define CPIA_COMMAND_SetDramPage	(OUTPUT | CPIA_MODULE_DEBUG | 5)
+#define CPIA_COMMAND_StartDramUpload	(OUTPUT | CPIA_MODULE_DEBUG | 6)
+#define CPIA_COMMAND_StartDummyDtream	(OUTPUT | CPIA_MODULE_DEBUG | 8)
+#define CPIA_COMMAND_AbortStream	(OUTPUT | CPIA_MODULE_DEBUG | 9)
+#define CPIA_COMMAND_DownloadDRAM	(OUTPUT | CPIA_MODULE_DEBUG | 10)
+#define CPIA_COMMAND_Null		(OUTPUT | CPIA_MODULE_DEBUG | 11)
+
+#define ROUND_UP_EXP_FOR_FLICKER 15
+
+/* Constants for automatic frame rate adjustment */
+#define MAX_EXP       302
+#define MAX_EXP_102   255
+#define LOW_EXP       140
+#define VERY_LOW_EXP   70
+#define TC             94
+#define	EXP_ACC_DARK   50
+#define	EXP_ACC_LIGHT  90
+#define HIGH_COMP_102 160
+#define MAX_COMP      239
+#define DARK_TIME       3
+#define LIGHT_TIME      3
+
+#define FIRMWARE_VERSION(x, y) (sd->params.version.firmwareVersion == (x) && \
+				sd->params.version.firmwareRevision == (y))
+
+/* Developer's Guide Table 5 p 3-34
+ * indexed by [mains][sensorFps.baserate][sensorFps.divisor]*/
+static u8 flicker_jumps[2][2][4] =
+{ { { 76, 38, 19, 9 }, { 92, 46, 23, 11 } },
+  { { 64, 32, 16, 8 }, { 76, 38, 19, 9} }
+};
+
+struct cam_params {
+	struct {
+		u8 firmwareVersion;
+		u8 firmwareRevision;
+		u8 vcVersion;
+		u8 vcRevision;
+	} version;
+	struct {
+		u16 vendor;
+		u16 product;
+		u16 deviceRevision;
+	} pnpID;
+	struct {
+		u8 vpVersion;
+		u8 vpRevision;
+		u16 cameraHeadID;
+	} vpVersion;
+	struct {
+		u8 systemState;
+		u8 grabState;
+		u8 streamState;
+		u8 fatalError;
+		u8 cmdError;
+		u8 debugFlags;
+		u8 vpStatus;
+		u8 errorCode;
+	} status;
+	struct {
+		u8 brightness;
+		u8 contrast;
+		u8 saturation;
+	} colourParams;
+	struct {
+		u8 gainMode;
+		u8 expMode;
+		u8 compMode;
+		u8 centreWeight;
+		u8 gain;
+		u8 fineExp;
+		u8 coarseExpLo;
+		u8 coarseExpHi;
+		u8 redComp;
+		u8 green1Comp;
+		u8 green2Comp;
+		u8 blueComp;
+	} exposure;
+	struct {
+		u8 balanceMode;
+		u8 redGain;
+		u8 greenGain;
+		u8 blueGain;
+	} colourBalance;
+	struct {
+		u8 divisor;
+		u8 baserate;
+	} sensorFps;
+	struct {
+		u8 gain1;
+		u8 gain2;
+		u8 gain4;
+		u8 gain8;
+	} apcor;
+	struct {
+		u8 disabled;
+		u8 flickerMode;
+		u8 coarseJump;
+		u8 allowableOverExposure;
+	} flickerControl;
+	struct {
+		u8 gain1;
+		u8 gain2;
+		u8 gain4;
+		u8 gain8;
+	} vlOffset;
+	struct {
+		u8 mode;
+		u8 decimation;
+	} compression;
+	struct {
+		u8 frTargeting;
+		u8 targetFR;
+		u8 targetQ;
+	} compressionTarget;
+	struct {
+		u8 yThreshold;
+		u8 uvThreshold;
+	} yuvThreshold;
+	struct {
+		u8 hysteresis;
+		u8 threshMax;
+		u8 smallStep;
+		u8 largeStep;
+		u8 decimationHysteresis;
+		u8 frDiffStepThresh;
+		u8 qDiffStepThresh;
+		u8 decimationThreshMod;
+	} compressionParams;
+	struct {
+		u8 videoSize;		/* CIF/QCIF */
+		u8 subSample;
+		u8 yuvOrder;
+	} format;
+	struct {                        /* Intel QX3 specific data */
+		u8 qx3_detected;        /* a QX3 is present */
+		u8 toplight;            /* top light lit , R/W */
+		u8 bottomlight;         /* bottom light lit, R/W */
+		u8 button;              /* snapshot button pressed (R/O) */
+		u8 cradled;             /* microscope is in cradle (R/O) */
+	} qx3;
+	struct {
+		u8 colStart;		/* skip first 8*colStart pixels */
+		u8 colEnd;		/* finish at 8*colEnd pixels */
+		u8 rowStart;		/* skip first 4*rowStart lines */
+		u8 rowEnd;		/* finish at 4*rowEnd lines */
+	} roi;
+	u8 ecpTiming;
+	u8 streamStartLine;
+};
+
+/* specific webcam descriptor */
+struct sd {
+	struct gspca_dev gspca_dev;		/* !! must be the first item */
+	struct cam_params params;		/* camera settings */
+
+	atomic_t cam_exposure;
+	atomic_t fps;
+	int exposure_count;
+	u8 exposure_status;
+	u8 mainsFreq;				/* 0 = 50hz, 1 = 60hz */
+	u8 first_frame;
+	u8 freq;
+};
+
+/* V4L2 controls supported by the driver */
+static int sd_setbrightness(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getbrightness(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setcontrast(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getcontrast(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setsaturation(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getsaturation(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setfreq(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getfreq(struct gspca_dev *gspca_dev, __s32 *val);
+static int sd_setcomptarget(struct gspca_dev *gspca_dev, __s32 val);
+static int sd_getcomptarget(struct gspca_dev *gspca_dev, __s32 *val);
+
+static struct ctrl sd_ctrls[] = {
+	{
+	    {
+		.id      = V4L2_CID_BRIGHTNESS,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Brightness",
+		.minimum = 0,
+		.maximum = 100,
+		.step = 1,
+#define BRIGHTNESS_DEF 50
+		.default_value = BRIGHTNESS_DEF,
+		.flags = 0,
+	    },
+	    .set = sd_setbrightness,
+	    .get = sd_getbrightness,
+	},
+	{
+	    {
+		.id      = V4L2_CID_CONTRAST,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Contrast",
+		.minimum = 0,
+		.maximum = 96,
+		.step    = 8,
+#define CONTRAST_DEF 48
+		.default_value = CONTRAST_DEF,
+	    },
+	    .set = sd_setcontrast,
+	    .get = sd_getcontrast,
+	},
+	{
+	    {
+		.id      = V4L2_CID_SATURATION,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Saturation",
+		.minimum = 0,
+		.maximum = 100,
+		.step    = 1,
+#define SATURATION_DEF 50
+		.default_value = SATURATION_DEF,
+	    },
+	    .set = sd_setsaturation,
+	    .get = sd_getsaturation,
+	},
+	{
+		{
+			.id	 = V4L2_CID_POWER_LINE_FREQUENCY,
+			.type    = V4L2_CTRL_TYPE_MENU,
+			.name    = "Light frequency filter",
+			.minimum = 0,
+			.maximum = 2,	/* 0: 0, 1: 50Hz, 2:60Hz */
+			.step    = 1,
+#define FREQ_DEF 1
+			.default_value = FREQ_DEF,
+		},
+		.set = sd_setfreq,
+		.get = sd_getfreq,
+	},
+	{
+		{
+#define V4L2_CID_COMP_TARGET V4L2_CID_PRIVATE_BASE
+			.id	 = V4L2_CID_COMP_TARGET,
+			.type    = V4L2_CTRL_TYPE_MENU,
+			.name    = "Compression Target",
+			.minimum = 0,
+			.maximum = 1,
+			.step    = 1,
+#define COMP_TARGET_DEF CPIA_COMPRESSION_TARGET_QUALITY
+			.default_value = COMP_TARGET_DEF,
+		},
+		.set = sd_setcomptarget,
+		.get = sd_getcomptarget,
+	},
+};
+
+static const struct v4l2_pix_format mode[] = {
+	{160, 120, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE,
+		/* The sizeimage is trial and error, as with low framerates
+		   the camera will pad out usb frames, making the image
+		   data larger then strictly necessary */
+		.bytesperline = 160,
+		.sizeimage = 65536,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 3},
+	{176, 144, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE,
+		.bytesperline = 172,
+		.sizeimage = 65536,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 2},
+	{320, 240, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 262144,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 1},
+	{352, 288, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE,
+		.bytesperline = 352,
+		.sizeimage = 262144,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+};
+
+/**********************************************************************
+ *
+ * General functions
+ *
+ **********************************************************************/
+
+static int cpia_usb_transferCmd(struct gspca_dev *gspca_dev, u8 *command)
+{
+	u8 requesttype;
+	unsigned int pipe;
+	int ret, databytes = command[6] | (command[7] << 8);
+	/* Sometimes we see spurious EPIPE errors */
+	int retries = 3;
+
+	if (command[0] == DATA_IN) {
+		pipe = usb_rcvctrlpipe(gspca_dev->dev, 0);
+		requesttype = USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE;
+	} else if (command[0] == DATA_OUT) {
+		pipe = usb_sndctrlpipe(gspca_dev->dev, 0);
+		requesttype = USB_TYPE_VENDOR | USB_RECIP_DEVICE;
+	} else {
+		PDEBUG(D_ERR, "Unexpected first byte of command: %x",
+		       command[0]);
+		return -EINVAL;
+	}
+
+retry:
+	ret = usb_control_msg(gspca_dev->dev, pipe,
+			      command[1],
+			      requesttype,
+			      command[2] | (command[3] << 8),
+			      command[4] | (command[5] << 8),
+			      gspca_dev->usb_buf, databytes, 1000);
+
+	if (ret < 0)
+		PDEBUG(D_ERR, "usb_control_msg %02x, error %d", command[1],
+		       ret);
+
+	if (ret == -EPIPE && retries > 0) {
+		retries--;
+		goto retry;
+	}
+
+	return (ret < 0) ? ret : 0;
+}
+
+/* send an arbitrary command to the camera */
+static int do_command(struct gspca_dev *gspca_dev, u16 command,
+		      u8 a, u8 b, u8 c, u8 d)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int ret, datasize;
+	u8 cmd[8];
+
+	switch (command) {
+	case CPIA_COMMAND_GetCPIAVersion:
+	case CPIA_COMMAND_GetPnPID:
+	case CPIA_COMMAND_GetCameraStatus:
+	case CPIA_COMMAND_GetVPVersion:
+	case CPIA_COMMAND_GetColourParams:
+	case CPIA_COMMAND_GetColourBalance:
+	case CPIA_COMMAND_GetExposure:
+		datasize = 8;
+		break;
+	case CPIA_COMMAND_ReadMCPorts:
+	case CPIA_COMMAND_ReadVCRegs:
+		datasize = 4;
+		break;
+	default:
+		datasize = 0;
+		break;
+	}
+
+	cmd[0] = command >> 8;
+	cmd[1] = command & 0xff;
+	cmd[2] = a;
+	cmd[3] = b;
+	cmd[4] = c;
+	cmd[5] = d;
+	cmd[6] = datasize;
+	cmd[7] = 0;
+
+	ret = cpia_usb_transferCmd(gspca_dev, cmd);
+	if (ret)
+		return ret;
+
+	switch (command) {
+	case CPIA_COMMAND_GetCPIAVersion:
+		sd->params.version.firmwareVersion = gspca_dev->usb_buf[0];
+		sd->params.version.firmwareRevision = gspca_dev->usb_buf[1];
+		sd->params.version.vcVersion = gspca_dev->usb_buf[2];
+		sd->params.version.vcRevision = gspca_dev->usb_buf[3];
+		break;
+	case CPIA_COMMAND_GetPnPID:
+		sd->params.pnpID.vendor =
+			gspca_dev->usb_buf[0] | (gspca_dev->usb_buf[1] << 8);
+		sd->params.pnpID.product =
+			gspca_dev->usb_buf[2] | (gspca_dev->usb_buf[3] << 8);
+		sd->params.pnpID.deviceRevision =
+			gspca_dev->usb_buf[4] | (gspca_dev->usb_buf[5] << 8);
+		break;
+	case CPIA_COMMAND_GetCameraStatus:
+		sd->params.status.systemState = gspca_dev->usb_buf[0];
+		sd->params.status.grabState = gspca_dev->usb_buf[1];
+		sd->params.status.streamState = gspca_dev->usb_buf[2];
+		sd->params.status.fatalError = gspca_dev->usb_buf[3];
+		sd->params.status.cmdError = gspca_dev->usb_buf[4];
+		sd->params.status.debugFlags = gspca_dev->usb_buf[5];
+		sd->params.status.vpStatus = gspca_dev->usb_buf[6];
+		sd->params.status.errorCode = gspca_dev->usb_buf[7];
+		break;
+	case CPIA_COMMAND_GetVPVersion:
+		sd->params.vpVersion.vpVersion = gspca_dev->usb_buf[0];
+		sd->params.vpVersion.vpRevision = gspca_dev->usb_buf[1];
+		sd->params.vpVersion.cameraHeadID =
+			gspca_dev->usb_buf[2] | (gspca_dev->usb_buf[3] << 8);
+		break;
+	case CPIA_COMMAND_GetColourParams:
+		sd->params.colourParams.brightness = gspca_dev->usb_buf[0];
+		sd->params.colourParams.contrast = gspca_dev->usb_buf[1];
+		sd->params.colourParams.saturation = gspca_dev->usb_buf[2];
+		break;
+	case CPIA_COMMAND_GetColourBalance:
+		sd->params.colourBalance.redGain = gspca_dev->usb_buf[0];
+		sd->params.colourBalance.greenGain = gspca_dev->usb_buf[1];
+		sd->params.colourBalance.blueGain = gspca_dev->usb_buf[2];
+		break;
+	case CPIA_COMMAND_GetExposure:
+		sd->params.exposure.gain = gspca_dev->usb_buf[0];
+		sd->params.exposure.fineExp = gspca_dev->usb_buf[1];
+		sd->params.exposure.coarseExpLo = gspca_dev->usb_buf[2];
+		sd->params.exposure.coarseExpHi = gspca_dev->usb_buf[3];
+		sd->params.exposure.redComp = gspca_dev->usb_buf[4];
+		sd->params.exposure.green1Comp = gspca_dev->usb_buf[5];
+		sd->params.exposure.green2Comp = gspca_dev->usb_buf[6];
+		sd->params.exposure.blueComp = gspca_dev->usb_buf[7];
+		break;
+
+	case CPIA_COMMAND_ReadMCPorts:
+		if (!sd->params.qx3.qx3_detected)
+			break;
+		/* test button press */
+		sd->params.qx3.button = ((gspca_dev->usb_buf[1] & 0x02) == 0);
+		if (sd->params.qx3.button) {
+			/* button pressed - unlock the latch */
+			do_command(gspca_dev, CPIA_COMMAND_WriteMCPort,
+				   3, 0xDF, 0xDF, 0);
+			do_command(gspca_dev, CPIA_COMMAND_WriteMCPort,
+				   3, 0xFF, 0xFF, 0);
+		}
+
+		/* test whether microscope is cradled */
+		sd->params.qx3.cradled = ((gspca_dev->usb_buf[2] & 0x40) == 0);
+		break;
+	}
+
+	return 0;
+}
+
+/* send a command to the camera with an additional data transaction */
+static int do_command_extended(struct gspca_dev *gspca_dev, u16 command,
+			       u8 a, u8 b, u8 c, u8 d,
+			       u8 e, u8 f, u8 g, u8 h,
+			       u8 i, u8 j, u8 k, u8 l)
+{
+	u8 cmd[8];
+
+	cmd[0] = command >> 8;
+	cmd[1] = command & 0xff;
+	cmd[2] = a;
+	cmd[3] = b;
+	cmd[4] = c;
+	cmd[5] = d;
+	cmd[6] = 8;
+	cmd[7] = 0;
+	gspca_dev->usb_buf[0] = e;
+	gspca_dev->usb_buf[1] = f;
+	gspca_dev->usb_buf[2] = g;
+	gspca_dev->usb_buf[3] = h;
+	gspca_dev->usb_buf[4] = i;
+	gspca_dev->usb_buf[5] = j;
+	gspca_dev->usb_buf[6] = k;
+	gspca_dev->usb_buf[7] = l;
+
+	return cpia_usb_transferCmd(gspca_dev, cmd);
+}
+
+/*  find_over_exposure
+ *  Finds a suitable value of OverExposure for use with SetFlickerCtrl
+ *  Some calculation is required because this value changes with the brightness
+ *  set with SetColourParameters
+ *
+ *  Parameters: Brightness - last brightness value set with SetColourParameters
+ *
+ *  Returns: OverExposure value to use with SetFlickerCtrl
+ */
+#define FLICKER_MAX_EXPOSURE                    250
+#define FLICKER_ALLOWABLE_OVER_EXPOSURE         146
+#define FLICKER_BRIGHTNESS_CONSTANT             59
+static int find_over_exposure(int brightness)
+{
+	int MaxAllowableOverExposure, OverExposure;
+
+	MaxAllowableOverExposure = FLICKER_MAX_EXPOSURE - brightness -
+				   FLICKER_BRIGHTNESS_CONSTANT;
+
+	if (MaxAllowableOverExposure < FLICKER_ALLOWABLE_OVER_EXPOSURE)
+		OverExposure = MaxAllowableOverExposure;
+	else
+		OverExposure = FLICKER_ALLOWABLE_OVER_EXPOSURE;
+
+	return OverExposure;
+}
+#undef FLICKER_MAX_EXPOSURE
+#undef FLICKER_ALLOWABLE_OVER_EXPOSURE
+#undef FLICKER_BRIGHTNESS_CONSTANT
+
+/* initialise cam_data structure  */
+static void reset_camera_params(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct cam_params *params = &sd->params;
+
+	/* The following parameter values are the defaults from
+	 * "Software Developer's Guide for CPiA Cameras".  Any changes
+	 * to the defaults are noted in comments. */
+	params->colourParams.brightness = BRIGHTNESS_DEF;
+	params->colourParams.contrast = CONTRAST_DEF;
+	params->colourParams.saturation = SATURATION_DEF;
+	params->exposure.gainMode = 4;
+	params->exposure.expMode = 2;		/* AEC */
+	params->exposure.compMode = 1;
+	params->exposure.centreWeight = 1;
+	params->exposure.gain = 0;
+	params->exposure.fineExp = 0;
+	params->exposure.coarseExpLo = 185;
+	params->exposure.coarseExpHi = 0;
+	params->exposure.redComp = COMP_RED;
+	params->exposure.green1Comp = COMP_GREEN1;
+	params->exposure.green2Comp = COMP_GREEN2;
+	params->exposure.blueComp = COMP_BLUE;
+	params->colourBalance.balanceMode = 2;	/* ACB */
+	params->colourBalance.redGain = 32;
+	params->colourBalance.greenGain = 6;
+	params->colourBalance.blueGain = 92;
+	params->apcor.gain1 = 0x18;
+	params->apcor.gain2 = 0x16;
+	params->apcor.gain4 = 0x24;
+	params->apcor.gain8 = 0x34;
+	params->flickerControl.flickerMode = 0;
+	params->flickerControl.disabled = 1;
+
+	params->flickerControl.coarseJump =
+		flicker_jumps[sd->mainsFreq]
+			     [params->sensorFps.baserate]
+			     [params->sensorFps.divisor];
+	params->flickerControl.allowableOverExposure =
+		find_over_exposure(params->colourParams.brightness);
+	params->vlOffset.gain1 = 20;
+	params->vlOffset.gain2 = 24;
+	params->vlOffset.gain4 = 26;
+	params->vlOffset.gain8 = 26;
+	params->compressionParams.hysteresis = 3;
+	params->compressionParams.threshMax = 11;
+	params->compressionParams.smallStep = 1;
+	params->compressionParams.largeStep = 3;
+	params->compressionParams.decimationHysteresis = 2;
+	params->compressionParams.frDiffStepThresh = 5;
+	params->compressionParams.qDiffStepThresh = 3;
+	params->compressionParams.decimationThreshMod = 2;
+	/* End of default values from Software Developer's Guide */
+
+	/* Set Sensor FPS to 15fps. This seems better than 30fps
+	 * for indoor lighting. */
+	params->sensorFps.divisor = 1;
+	params->sensorFps.baserate = 1;
+
+	params->yuvThreshold.yThreshold = 6; /* From windows driver */
+	params->yuvThreshold.uvThreshold = 6; /* From windows driver */
+
+	params->format.subSample = SUBSAMPLE_420;
+	params->format.yuvOrder = YUVORDER_YUYV;
+
+	params->compression.mode = CPIA_COMPRESSION_AUTO;
+	params->compression.decimation = NO_DECIMATION;
+
+	params->compressionTarget.frTargeting = COMP_TARGET_DEF;
+	params->compressionTarget.targetFR = 15; /* From windows driver */
+	params->compressionTarget.targetQ = 5; /* From windows driver */
+
+	params->qx3.qx3_detected = 0;
+	params->qx3.toplight = 0;
+	params->qx3.bottomlight = 0;
+	params->qx3.button = 0;
+	params->qx3.cradled = 0;
+}
+
+static void printstatus(struct cam_params *params)
+{
+	PDEBUG(D_PROBE, "status: %02x %02x %02x %02x %02x %02x %02x %02x",
+	       params->status.systemState, params->status.grabState,
+	       params->status.streamState, params->status.fatalError,
+	       params->status.cmdError, params->status.debugFlags,
+	       params->status.vpStatus, params->status.errorCode);
+}
+
+static int goto_low_power(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int ret;
+
+	ret = do_command(gspca_dev, CPIA_COMMAND_GotoLoPower, 0, 0, 0, 0);
+	if (ret)
+		return ret;
+
+	do_command(gspca_dev, CPIA_COMMAND_GetCameraStatus, 0, 0, 0, 0);
+	if (ret)
+		return ret;
+
+	if (sd->params.status.systemState != LO_POWER_STATE) {
+		if (sd->params.status.systemState != WARM_BOOT_STATE) {
+			PDEBUG(D_ERR,
+			       "unexpected state after lo power cmd: %02x",
+			       sd->params.status.systemState);
+			printstatus(&sd->params);
+		}
+		return -EIO;
+	}
+
+	PDEBUG(D_CONF, "camera now in LOW power state");
+	return 0;
+}
+
+static int goto_high_power(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int ret;
+
+	ret = do_command(gspca_dev, CPIA_COMMAND_GotoHiPower, 0, 0, 0, 0);
+	if (ret)
+		return ret;
+
+	msleep_interruptible(40);	/* windows driver does it too */
+
+	if (signal_pending(current))
+		return -EINTR;
+
+	do_command(gspca_dev, CPIA_COMMAND_GetCameraStatus, 0, 0, 0, 0);
+	if (ret)
+		return ret;
+
+	if (sd->params.status.systemState != HI_POWER_STATE) {
+		PDEBUG(D_ERR, "unexpected state after hi power cmd: %02x",
+			       sd->params.status.systemState);
+		printstatus(&sd->params);
+		return -EIO;
+	}
+
+	PDEBUG(D_CONF, "camera now in HIGH power state");
+	return 0;
+}
+
+static int get_version_information(struct gspca_dev *gspca_dev)
+{
+	int ret;
+
+	/* GetCPIAVersion */
+	ret = do_command(gspca_dev, CPIA_COMMAND_GetCPIAVersion, 0, 0, 0, 0);
+	if (ret)
+		return ret;
+
+	/* GetPnPID */
+	return do_command(gspca_dev, CPIA_COMMAND_GetPnPID, 0, 0, 0, 0);
+}
+
+static int save_camera_state(struct gspca_dev *gspca_dev)
+{
+	int ret;
+
+	ret = do_command(gspca_dev, CPIA_COMMAND_GetColourBalance, 0, 0, 0, 0);
+	if (ret)
+		return ret;
+
+	return do_command(gspca_dev, CPIA_COMMAND_GetExposure, 0, 0, 0, 0);
+}
+
+int command_setformat(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int ret;
+
+	ret = do_command(gspca_dev, CPIA_COMMAND_SetFormat,
+			 sd->params.format.videoSize,
+			 sd->params.format.subSample,
+			 sd->params.format.yuvOrder, 0);
+	if (ret)
+		return ret;
+
+	return do_command(gspca_dev, CPIA_COMMAND_SetROI,
+			  sd->params.roi.colStart, sd->params.roi.colEnd,
+			  sd->params.roi.rowStart, sd->params.roi.rowEnd);
+}
+
+int command_setcolourparams(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	return do_command(gspca_dev, CPIA_COMMAND_SetColourParams,
+			  sd->params.colourParams.brightness,
+			  sd->params.colourParams.contrast,
+			  sd->params.colourParams.saturation, 0);
+}
+
+int command_setapcor(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	return do_command(gspca_dev, CPIA_COMMAND_SetApcor,
+			  sd->params.apcor.gain1,
+			  sd->params.apcor.gain2,
+			  sd->params.apcor.gain4,
+			  sd->params.apcor.gain8);
+}
+
+int command_setvloffset(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	return do_command(gspca_dev, CPIA_COMMAND_SetVLOffset,
+			  sd->params.vlOffset.gain1,
+			  sd->params.vlOffset.gain2,
+			  sd->params.vlOffset.gain4,
+			  sd->params.vlOffset.gain8);
+}
+
+int command_setexposure(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int ret;
+
+	ret = do_command_extended(gspca_dev, CPIA_COMMAND_SetExposure,
+				  sd->params.exposure.gainMode,
+				  1,
+				  sd->params.exposure.compMode,
+				  sd->params.exposure.centreWeight,
+				  sd->params.exposure.gain,
+				  sd->params.exposure.fineExp,
+				  sd->params.exposure.coarseExpLo,
+				  sd->params.exposure.coarseExpHi,
+				  sd->params.exposure.redComp,
+				  sd->params.exposure.green1Comp,
+				  sd->params.exposure.green2Comp,
+				  sd->params.exposure.blueComp);
+	if (ret)
+		return ret;
+
+	if (sd->params.exposure.expMode != 1) {
+		ret = do_command_extended(gspca_dev, CPIA_COMMAND_SetExposure,
+					  0,
+					  sd->params.exposure.expMode,
+					  0, 0,
+					  sd->params.exposure.gain,
+					  sd->params.exposure.fineExp,
+					  sd->params.exposure.coarseExpLo,
+					  sd->params.exposure.coarseExpHi,
+					  0, 0, 0, 0);
+	}
+
+	return ret;
+}
+
+int command_setcolourbalance(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	if (sd->params.colourBalance.balanceMode == 1) {
+		int ret;
+
+		ret = do_command(gspca_dev, CPIA_COMMAND_SetColourBalance,
+				 1,
+				 sd->params.colourBalance.redGain,
+				 sd->params.colourBalance.greenGain,
+				 sd->params.colourBalance.blueGain);
+		if (ret)
+			return ret;
+
+		return do_command(gspca_dev, CPIA_COMMAND_SetColourBalance,
+				  3, 0, 0, 0);
+	}
+	if (sd->params.colourBalance.balanceMode == 2) {
+		return do_command(gspca_dev, CPIA_COMMAND_SetColourBalance,
+				  2, 0, 0, 0);
+	}
+	if (sd->params.colourBalance.balanceMode == 3) {
+		return do_command(gspca_dev, CPIA_COMMAND_SetColourBalance,
+				  3, 0, 0, 0);
+	}
+
+	return -EINVAL;
+}
+
+int command_setcompressiontarget(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	return do_command(gspca_dev, CPIA_COMMAND_SetCompressionTarget,
+			  sd->params.compressionTarget.frTargeting,
+			  sd->params.compressionTarget.targetFR,
+			  sd->params.compressionTarget.targetQ, 0);
+}
+
+int command_setyuvtresh(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	return do_command(gspca_dev, CPIA_COMMAND_SetYUVThresh,
+			  sd->params.yuvThreshold.yThreshold,
+			  sd->params.yuvThreshold.uvThreshold, 0, 0);
+}
+
+int command_setcompressionparams(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	return do_command_extended(gspca_dev,
+			    CPIA_COMMAND_SetCompressionParams,
+			    0, 0, 0, 0,
+			    sd->params.compressionParams.hysteresis,
+			    sd->params.compressionParams.threshMax,
+			    sd->params.compressionParams.smallStep,
+			    sd->params.compressionParams.largeStep,
+			    sd->params.compressionParams.decimationHysteresis,
+			    sd->params.compressionParams.frDiffStepThresh,
+			    sd->params.compressionParams.qDiffStepThresh,
+			    sd->params.compressionParams.decimationThreshMod);
+}
+
+int command_setcompression(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	return do_command(gspca_dev, CPIA_COMMAND_SetCompression,
+			  sd->params.compression.mode,
+			  sd->params.compression.decimation, 0, 0);
+}
+
+int command_setsensorfps(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	return do_command(gspca_dev, CPIA_COMMAND_SetSensorFPS,
+			  sd->params.sensorFps.divisor,
+			  sd->params.sensorFps.baserate, 0, 0);
+}
+
+int command_setflickerctrl(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	return do_command(gspca_dev, CPIA_COMMAND_SetFlickerCtrl,
+			  sd->params.flickerControl.flickerMode,
+			  sd->params.flickerControl.coarseJump,
+			  sd->params.flickerControl.allowableOverExposure,
+			  0);
+}
+
+int command_setecptiming(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	return do_command(gspca_dev, CPIA_COMMAND_SetECPTiming,
+			  sd->params.ecpTiming, 0, 0, 0);
+}
+
+int command_pause(struct gspca_dev *gspca_dev)
+{
+	return do_command(gspca_dev, CPIA_COMMAND_EndStreamCap, 0, 0, 0, 0);
+}
+
+int command_resume(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	return do_command(gspca_dev, CPIA_COMMAND_InitStreamCap,
+			  0, sd->params.streamStartLine, 0, 0);
+}
+
+int command_setlights(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int ret, p1, p2;
+
+	if (!sd->params.qx3.qx3_detected)
+		return 0;
+
+	p1 = (sd->params.qx3.bottomlight == 0) << 1;
+	p2 = (sd->params.qx3.toplight == 0) << 3;
+
+	ret = do_command(gspca_dev, CPIA_COMMAND_WriteVCReg,
+			 0x90, 0x8F, 0x50, 0);
+	if (ret)
+		return ret;
+
+	return do_command(gspca_dev, CPIA_COMMAND_WriteMCPort, 2, 0,
+			  p1 | p2 | 0xE0, 0);
+}
+
+static int set_flicker(struct gspca_dev *gspca_dev, int on, int apply)
+{
+	/* Everything in here is from the Windows driver */
+/* define for compgain calculation */
+#if 0
+#define COMPGAIN(base, curexp, newexp) \
+    (u8) ((((float) base - 128.0) * ((float) curexp / (float) newexp)) + 128.5)
+#define EXP_FROM_COMP(basecomp, curcomp, curexp) \
+    (u16)((float)curexp * (float)(u8)(curcomp + 128) / \
+    (float)(u8)(basecomp - 128))
+#else
+  /* equivalent functions without floating point math */
+#define COMPGAIN(base, curexp, newexp) \
+    (u8)(128 + (((u32)(2*(base-128)*curexp + newexp)) / (2 * newexp)))
+#define EXP_FROM_COMP(basecomp, curcomp, curexp) \
+    (u16)(((u32)(curexp * (u8)(curcomp + 128)) / (u8)(basecomp - 128)))
+#endif
+
+	struct sd *sd = (struct sd *) gspca_dev;
+	int currentexp = sd->params.exposure.coarseExpLo +
+			 sd->params.exposure.coarseExpHi * 256;
+	int ret, startexp;
+
+	if (on) {
+		int cj = sd->params.flickerControl.coarseJump;
+		sd->params.flickerControl.flickerMode = 1;
+		sd->params.flickerControl.disabled = 0;
+		if (sd->params.exposure.expMode != 2) {
+			sd->params.exposure.expMode = 2;
+			sd->exposure_status = EXPOSURE_NORMAL;
+		}
+		currentexp = currentexp << sd->params.exposure.gain;
+		sd->params.exposure.gain = 0;
+		/* round down current exposure to nearest value */
+		startexp = (currentexp + ROUND_UP_EXP_FOR_FLICKER) / cj;
+		if (startexp < 1)
+			startexp = 1;
+		startexp = (startexp * cj) - 1;
+		if (FIRMWARE_VERSION(1, 2))
+			while (startexp > MAX_EXP_102)
+				startexp -= cj;
+		else
+			while (startexp > MAX_EXP)
+				startexp -= cj;
+		sd->params.exposure.coarseExpLo = startexp & 0xff;
+		sd->params.exposure.coarseExpHi = startexp >> 8;
+		if (currentexp > startexp) {
+			if (currentexp > (2 * startexp))
+				currentexp = 2 * startexp;
+			sd->params.exposure.redComp =
+				COMPGAIN(COMP_RED, currentexp, startexp);
+			sd->params.exposure.green1Comp =
+				COMPGAIN(COMP_GREEN1, currentexp, startexp);
+			sd->params.exposure.green2Comp =
+				COMPGAIN(COMP_GREEN2, currentexp, startexp);
+			sd->params.exposure.blueComp =
+				COMPGAIN(COMP_BLUE, currentexp, startexp);
+		} else {
+			sd->params.exposure.redComp = COMP_RED;
+			sd->params.exposure.green1Comp = COMP_GREEN1;
+			sd->params.exposure.green2Comp = COMP_GREEN2;
+			sd->params.exposure.blueComp = COMP_BLUE;
+		}
+		if (FIRMWARE_VERSION(1, 2))
+			sd->params.exposure.compMode = 0;
+		else
+			sd->params.exposure.compMode = 1;
+
+		sd->params.apcor.gain1 = 0x18;
+		sd->params.apcor.gain2 = 0x18;
+		sd->params.apcor.gain4 = 0x16;
+		sd->params.apcor.gain8 = 0x14;
+	} else {
+		sd->params.flickerControl.flickerMode = 0;
+		sd->params.flickerControl.disabled = 1;
+		/* Average equivalent coarse for each comp channel */
+		startexp = EXP_FROM_COMP(COMP_RED,
+				sd->params.exposure.redComp, currentexp);
+		startexp += EXP_FROM_COMP(COMP_GREEN1,
+				sd->params.exposure.green1Comp, currentexp);
+		startexp += EXP_FROM_COMP(COMP_GREEN2,
+				sd->params.exposure.green2Comp, currentexp);
+		startexp += EXP_FROM_COMP(COMP_BLUE,
+				sd->params.exposure.blueComp, currentexp);
+		startexp = startexp >> 2;
+		while (startexp > MAX_EXP && sd->params.exposure.gain <
+		       sd->params.exposure.gainMode - 1) {
+			startexp = startexp >> 1;
+			++sd->params.exposure.gain;
+		}
+		if (FIRMWARE_VERSION(1, 2) && startexp > MAX_EXP_102)
+			startexp = MAX_EXP_102;
+		if (startexp > MAX_EXP)
+			startexp = MAX_EXP;
+		sd->params.exposure.coarseExpLo = startexp & 0xff;
+		sd->params.exposure.coarseExpHi = startexp >> 8;
+		sd->params.exposure.redComp = COMP_RED;
+		sd->params.exposure.green1Comp = COMP_GREEN1;
+		sd->params.exposure.green2Comp = COMP_GREEN2;
+		sd->params.exposure.blueComp = COMP_BLUE;
+		sd->params.exposure.compMode = 1;
+		sd->params.apcor.gain1 = 0x18;
+		sd->params.apcor.gain2 = 0x16;
+		sd->params.apcor.gain4 = 0x24;
+		sd->params.apcor.gain8 = 0x34;
+	}
+	sd->params.vlOffset.gain1 = 20;
+	sd->params.vlOffset.gain2 = 24;
+	sd->params.vlOffset.gain4 = 26;
+	sd->params.vlOffset.gain8 = 26;
+
+	if (apply) {
+		ret = command_setexposure(gspca_dev);
+		if (ret)
+			return ret;
+
+		ret = command_setapcor(gspca_dev);
+		if (ret)
+			return ret;
+
+		ret = command_setvloffset(gspca_dev);
+		if (ret)
+			return ret;
+
+		ret = command_setflickerctrl(gspca_dev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+#undef EXP_FROM_COMP
+#undef COMPGAIN
+}
+
+/* monitor the exposure and adjust the sensor frame rate if needed */
+static void monitor_exposure(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	u8 exp_acc, bcomp, gain, coarseL, cmd[8];
+	int ret, light_exp, dark_exp, very_dark_exp;
+	int old_exposure, new_exposure, framerate;
+	int setfps = 0, setexp = 0, setflicker = 0;
+
+	/* get necessary stats and register settings from camera */
+	/* do_command can't handle this, so do it ourselves */
+	cmd[0] = CPIA_COMMAND_ReadVPRegs >> 8;
+	cmd[1] = CPIA_COMMAND_ReadVPRegs & 0xff;
+	cmd[2] = 30;
+	cmd[3] = 4;
+	cmd[4] = 9;
+	cmd[5] = 8;
+	cmd[6] = 8;
+	cmd[7] = 0;
+	ret = cpia_usb_transferCmd(gspca_dev, cmd);
+	if (ret) {
+		PDEBUG(D_ERR, "ReadVPRegs(30,4,9,8) - failed: %d", ret);
+		return;
+	}
+	exp_acc = gspca_dev->usb_buf[0];
+	bcomp = gspca_dev->usb_buf[1];
+	gain = gspca_dev->usb_buf[2];
+	coarseL = gspca_dev->usb_buf[3];
+
+	light_exp = sd->params.colourParams.brightness +
+		    TC - 50 + EXP_ACC_LIGHT;
+	if (light_exp > 255)
+		light_exp = 255;
+	dark_exp = sd->params.colourParams.brightness +
+		   TC - 50 - EXP_ACC_DARK;
+	if (dark_exp < 0)
+		dark_exp = 0;
+	very_dark_exp = dark_exp / 2;
+
+	old_exposure = sd->params.exposure.coarseExpHi * 256 +
+		       sd->params.exposure.coarseExpLo;
+
+	if (!sd->params.flickerControl.disabled) {
+		/* Flicker control on */
+		int max_comp = FIRMWARE_VERSION(1, 2) ? MAX_COMP :
+							HIGH_COMP_102;
+		bcomp += 128;	/* decode */
+		if (bcomp >= max_comp && exp_acc < dark_exp) {
+			/* dark */
+			if (exp_acc < very_dark_exp) {
+				/* very dark */
+				if (sd->exposure_status == EXPOSURE_VERY_DARK)
+					++sd->exposure_count;
+				else {
+					sd->exposure_status =
+						EXPOSURE_VERY_DARK;
+					sd->exposure_count = 1;
+				}
+			} else {
+				/* just dark */
+				if (sd->exposure_status == EXPOSURE_DARK)
+					++sd->exposure_count;
+				else {
+					sd->exposure_status = EXPOSURE_DARK;
+					sd->exposure_count = 1;
+				}
+			}
+		} else if (old_exposure <= LOW_EXP || exp_acc > light_exp) {
+			/* light */
+			if (old_exposure <= VERY_LOW_EXP) {
+				/* very light */
+				if (sd->exposure_status == EXPOSURE_VERY_LIGHT)
+					++sd->exposure_count;
+				else {
+					sd->exposure_status =
+						EXPOSURE_VERY_LIGHT;
+					sd->exposure_count = 1;
+				}
+			} else {
+				/* just light */
+				if (sd->exposure_status == EXPOSURE_LIGHT)
+					++sd->exposure_count;
+				else {
+					sd->exposure_status = EXPOSURE_LIGHT;
+					sd->exposure_count = 1;
+				}
+			}
+		} else {
+			/* not dark or light */
+			sd->exposure_status = EXPOSURE_NORMAL;
+		}
+	} else {
+		/* Flicker control off */
+		if (old_exposure >= MAX_EXP && exp_acc < dark_exp) {
+			/* dark */
+			if (exp_acc < very_dark_exp) {
+				/* very dark */
+				if (sd->exposure_status == EXPOSURE_VERY_DARK)
+					++sd->exposure_count;
+				else {
+					sd->exposure_status =
+						EXPOSURE_VERY_DARK;
+					sd->exposure_count = 1;
+				}
+			} else {
+				/* just dark */
+				if (sd->exposure_status == EXPOSURE_DARK)
+					++sd->exposure_count;
+				else {
+					sd->exposure_status = EXPOSURE_DARK;
+					sd->exposure_count = 1;
+				}
+			}
+		} else if (old_exposure <= LOW_EXP || exp_acc > light_exp) {
+			/* light */
+			if (old_exposure <= VERY_LOW_EXP) {
+				/* very light */
+				if (sd->exposure_status == EXPOSURE_VERY_LIGHT)
+					++sd->exposure_count;
+				else {
+					sd->exposure_status =
+						EXPOSURE_VERY_LIGHT;
+					sd->exposure_count = 1;
+				}
+			} else {
+				/* just light */
+				if (sd->exposure_status == EXPOSURE_LIGHT)
+					++sd->exposure_count;
+				else {
+					sd->exposure_status = EXPOSURE_LIGHT;
+					sd->exposure_count = 1;
+				}
+			}
+		} else {
+			/* not dark or light */
+			sd->exposure_status = EXPOSURE_NORMAL;
+		}
+	}
+
+	framerate = atomic_read(&sd->fps);
+	if (framerate > 30 || framerate < 1)
+		framerate = 1;
+
+	if (!sd->params.flickerControl.disabled) {
+		/* Flicker control on */
+		if ((sd->exposure_status == EXPOSURE_VERY_DARK ||
+		     sd->exposure_status == EXPOSURE_DARK) &&
+		    sd->exposure_count >= DARK_TIME * framerate &&
+		    sd->params.sensorFps.divisor < 3) {
+
+			/* dark for too long */
+			++sd->params.sensorFps.divisor;
+			setfps = 1;
+
+			sd->params.flickerControl.coarseJump =
+				flicker_jumps[sd->mainsFreq]
+					     [sd->params.sensorFps.baserate]
+					     [sd->params.sensorFps.divisor];
+			setflicker = 1;
+
+			new_exposure = sd->params.flickerControl.coarseJump-1;
+			while (new_exposure < old_exposure / 2)
+				new_exposure +=
+					sd->params.flickerControl.coarseJump;
+			sd->params.exposure.coarseExpLo = new_exposure & 0xff;
+			sd->params.exposure.coarseExpHi = new_exposure >> 8;
+			setexp = 1;
+			sd->exposure_status = EXPOSURE_NORMAL;
+			PDEBUG(D_CONF, "Automatically decreasing sensor_fps");
+
+		} else if ((sd->exposure_status == EXPOSURE_VERY_LIGHT ||
+			    sd->exposure_status == EXPOSURE_LIGHT) &&
+			   sd->exposure_count >= LIGHT_TIME * framerate &&
+			   sd->params.sensorFps.divisor > 0) {
+
+			/* light for too long */
+			int max_exp = FIRMWARE_VERSION(1, 2) ? MAX_EXP_102 :
+							       MAX_EXP;
+			--sd->params.sensorFps.divisor;
+			setfps = 1;
+
+			sd->params.flickerControl.coarseJump =
+				flicker_jumps[sd->mainsFreq]
+					     [sd->params.sensorFps.baserate]
+					     [sd->params.sensorFps.divisor];
+			setflicker = 1;
+
+			new_exposure = sd->params.flickerControl.coarseJump-1;
+			while (new_exposure < 2 * old_exposure &&
+			       new_exposure +
+			       sd->params.flickerControl.coarseJump < max_exp)
+				new_exposure +=
+					sd->params.flickerControl.coarseJump;
+			sd->params.exposure.coarseExpLo = new_exposure & 0xff;
+			sd->params.exposure.coarseExpHi = new_exposure >> 8;
+			setexp = 1;
+			sd->exposure_status = EXPOSURE_NORMAL;
+			PDEBUG(D_CONF, "Automatically increasing sensor_fps");
+		}
+	} else {
+		/* Flicker control off */
+		if ((sd->exposure_status == EXPOSURE_VERY_DARK ||
+		     sd->exposure_status == EXPOSURE_DARK) &&
+		    sd->exposure_count >= DARK_TIME * framerate &&
+		    sd->params.sensorFps.divisor < 3) {
+
+			/* dark for too long */
+			++sd->params.sensorFps.divisor;
+			setfps = 1;
+
+			if (sd->params.exposure.gain > 0) {
+				--sd->params.exposure.gain;
+				setexp = 1;
+			}
+			sd->exposure_status = EXPOSURE_NORMAL;
+			PDEBUG(D_CONF, "Automatically decreasing sensor_fps");
+
+		} else if ((sd->exposure_status == EXPOSURE_VERY_LIGHT ||
+			    sd->exposure_status == EXPOSURE_LIGHT) &&
+			   sd->exposure_count >= LIGHT_TIME * framerate &&
+			   sd->params.sensorFps.divisor > 0) {
+
+			/* light for too long */
+			--sd->params.sensorFps.divisor;
+			setfps = 1;
+
+			if (sd->params.exposure.gain <
+			    sd->params.exposure.gainMode - 1) {
+				++sd->params.exposure.gain;
+				setexp = 1;
+			}
+			sd->exposure_status = EXPOSURE_NORMAL;
+			PDEBUG(D_CONF, "Automatically increasing sensor_fps");
+		}
+	}
+
+	if (setexp)
+		command_setexposure(gspca_dev);
+
+	if (setfps)
+		command_setsensorfps(gspca_dev);
+
+	if (setflicker)
+		command_setflickerctrl(gspca_dev);
+}
+
+/*-----------------------------------------------------------------*/
+/* if flicker is switched off, this function switches it back on.It checks,
+   however, that conditions are suitable before restarting it.
+   This should only be called for firmware version 1.2.
+
+   It also adjust the colour balance when an exposure step is detected - as
+   long as flicker is running
+*/
+static void restart_flicker(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int cam_exposure, old_exp;
+
+	if (!FIRMWARE_VERSION(1, 2))
+		return;
+
+	cam_exposure = atomic_read(&sd->cam_exposure);
+
+	if (sd->params.flickerControl.flickerMode == 0 ||
+	    cam_exposure == 0)
+		return;
+
+	old_exp = sd->params.exposure.coarseExpLo +
+		  sd->params.exposure.coarseExpHi*256;
+	/*
+	  see how far away camera exposure is from a valid
+	  flicker exposure value
+	*/
+	cam_exposure %= sd->params.flickerControl.coarseJump;
+	if (!sd->params.flickerControl.disabled &&
+	    cam_exposure <= sd->params.flickerControl.coarseJump - 3) {
+		/* Flicker control auto-disabled */
+		sd->params.flickerControl.disabled = 1;
+	}
+
+	if (sd->params.flickerControl.disabled &&
+	    old_exp > sd->params.flickerControl.coarseJump +
+		      ROUND_UP_EXP_FOR_FLICKER) {
+		/* exposure is now high enough to switch
+		   flicker control back on */
+		set_flicker(gspca_dev, 1, 1);
+	}
+}
+
+/* this function is called at probe time */
+static int sd_config(struct gspca_dev *gspca_dev,
+			const struct usb_device_id *id)
+{
+	struct cam *cam;
+
+	reset_camera_params(gspca_dev);
+
+	PDEBUG(D_PROBE, "cpia CPiA camera detected (vid/pid 0x%04X:0x%04X)",
+	       id->idVendor, id->idProduct);
+
+	cam = &gspca_dev->cam;
+	cam->cam_mode = mode;
+	cam->nmodes = ARRAY_SIZE(mode);
+
+	sd_setfreq(gspca_dev, FREQ_DEF);
+
+	return 0;
+}
+
+/* -- start the camera -- */
+static int sd_start(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int priv, ret;
+
+	/* Start the camera in low power mode */
+	if (goto_low_power(gspca_dev)) {
+		if (sd->params.status.systemState != WARM_BOOT_STATE) {
+			PDEBUG(D_ERR, "unexpected systemstate: %02x",
+			       sd->params.status.systemState);
+			printstatus(&sd->params);
+			return -ENODEV;
+		}
+
+		/* FIXME: this is just dirty trial and error */
+		ret = goto_high_power(gspca_dev);
+		if (ret)
+			return ret;
+
+		ret = do_command(gspca_dev, CPIA_COMMAND_DiscardFrame,
+				 0, 0, 0, 0);
+		if (ret)
+			return ret;
+
+		ret = goto_low_power(gspca_dev);
+		if (ret)
+			return ret;
+	}
+
+	/* procedure described in developer's guide p3-28 */
+
+	/* Check the firmware version. */
+	sd->params.version.firmwareVersion = 0;
+	get_version_information(gspca_dev);
+	if (sd->params.version.firmwareVersion != 1) {
+		PDEBUG(D_ERR, "only firmware version 1 is supported (got: %d)",
+		       sd->params.version.firmwareVersion);
+		return -ENODEV;
+	}
+
+	/* A bug in firmware 1-02 limits gainMode to 2 */
+	if (sd->params.version.firmwareRevision <= 2 &&
+	    sd->params.exposure.gainMode > 2) {
+		sd->params.exposure.gainMode = 2;
+	}
+
+	/* set QX3 detected flag */
+	sd->params.qx3.qx3_detected = (sd->params.pnpID.vendor == 0x0813 &&
+				       sd->params.pnpID.product == 0x0001);
+
+	/* The fatal error checking should be done after
+	 * the camera powers up (developer's guide p 3-38) */
+
+	/* Set streamState before transition to high power to avoid bug
+	 * in firmware 1-02 */
+	ret = do_command(gspca_dev, CPIA_COMMAND_ModifyCameraStatus,
+			 STREAMSTATE, 0, STREAM_NOT_READY, 0);
+	if (ret)
+		return ret;
+
+	/* GotoHiPower */
+	ret = goto_high_power(gspca_dev);
+	if (ret)
+		return ret;
+
+	/* Check the camera status */
+	ret = do_command(gspca_dev, CPIA_COMMAND_GetCameraStatus, 0, 0, 0, 0);
+	if (ret)
+		return ret;
+
+	if (sd->params.status.fatalError) {
+		PDEBUG(D_ERR, "fatal_error: %04x, vp_status: %04x",
+		       sd->params.status.fatalError,
+		       sd->params.status.vpStatus);
+		return -EIO;
+	}
+
+	/* VPVersion can't be retrieved before the camera is in HiPower,
+	 * so get it here instead of in get_version_information. */
+	ret = do_command(gspca_dev, CPIA_COMMAND_GetVPVersion, 0, 0, 0, 0);
+	if (ret)
+		return ret;
+
+	/* Determine video mode settings */
+	sd->params.streamStartLine = 120;
+
+	priv = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv;
+	if (priv & 0x01) { /* crop */
+		sd->params.roi.colStart = 2;
+		sd->params.roi.rowStart = 6;
+	} else {
+		sd->params.roi.colStart = 0;
+		sd->params.roi.rowStart = 0;
+	}
+
+	if (priv & 0x02) { /* quarter */
+		sd->params.format.videoSize = VIDEOSIZE_QCIF;
+		sd->params.roi.colStart /= 2;
+		sd->params.roi.rowStart /= 2;
+		sd->params.streamStartLine /= 2;
+	} else
+		sd->params.format.videoSize = VIDEOSIZE_CIF;
+
+	sd->params.roi.colEnd = sd->params.roi.colStart +
+				(gspca_dev->width >> 3);
+	sd->params.roi.rowEnd = sd->params.roi.rowStart +
+				(gspca_dev->height >> 2);
+
+	/* And now set the camera to a known state */
+	ret = do_command(gspca_dev, CPIA_COMMAND_SetGrabMode,
+			 CPIA_GRAB_CONTINEOUS, 0, 0, 0);
+	if (ret)
+		return ret;
+	/* We start with compression disabled, as we need one uncompressed
+	   frame to handle later compressed frames */
+	ret = do_command(gspca_dev, CPIA_COMMAND_SetCompression,
+			 CPIA_COMPRESSION_NONE,
+			 NO_DECIMATION, 0, 0);
+	if (ret)
+		return ret;
+	ret = command_setcompressiontarget(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setcolourparams(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setformat(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setyuvtresh(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setecptiming(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setcompressionparams(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setexposure(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setcolourbalance(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setsensorfps(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setapcor(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setflickerctrl(gspca_dev);
+	if (ret)
+		return ret;
+	ret = command_setvloffset(gspca_dev);
+	if (ret)
+		return ret;
+
+	/* Start stream */
+	ret = command_resume(gspca_dev);
+	if (ret)
+		return ret;
+
+	/* Wait 6 frames before turning compression on for the sensor to get
+	   all settings and AEC/ACB to settle */
+	sd->first_frame = 6;
+	sd->exposure_status = EXPOSURE_NORMAL;
+	sd->exposure_count = 0;
+	atomic_set(&sd->cam_exposure, 0);
+	atomic_set(&sd->fps, 0);
+
+	return 0;
+}
+
+static void sd_stopN(struct gspca_dev *gspca_dev)
+{
+	command_pause(gspca_dev);
+
+	/* save camera state for later open (developers guide ch 3.5.3) */
+	save_camera_state(gspca_dev);
+
+	/* GotoLoPower */
+	goto_low_power(gspca_dev);
+
+	/* Update the camera status */
+	do_command(gspca_dev, CPIA_COMMAND_GetCameraStatus, 0, 0, 0, 0);
+}
+
+/* this function is called at probe and resume time */
+static int sd_init(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int ret;
+
+	/* Start / Stop the camera to make sure we are talking to
+	   a supported camera, and to get some information from it
+	   to print. */
+	ret = sd_start(gspca_dev);
+	if (ret)
+		return ret;
+
+	sd_stopN(gspca_dev);
+
+	PDEBUG(D_PROBE, "CPIA Version:             %d.%02d (%d.%d)",
+			sd->params.version.firmwareVersion,
+			sd->params.version.firmwareRevision,
+			sd->params.version.vcVersion,
+			sd->params.version.vcRevision);
+	PDEBUG(D_PROBE, "CPIA PnP-ID:              %04x:%04x:%04x",
+			sd->params.pnpID.vendor, sd->params.pnpID.product,
+			sd->params.pnpID.deviceRevision);
+	PDEBUG(D_PROBE, "VP-Version:               %d.%d %04x",
+			sd->params.vpVersion.vpVersion,
+			sd->params.vpVersion.vpRevision,
+			sd->params.vpVersion.cameraHeadID);
+
+	return 0;
+}
+
+static void sd_pkt_scan(struct gspca_dev *gspca_dev,
+			u8 *data,
+			int len)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	/* Check for SOF */
+	if (len >= 64 &&
+	    data[0] == MAGIC_0 && data[1] == MAGIC_1 &&
+	    data[16] == sd->params.format.videoSize &&
+	    data[17] == sd->params.format.subSample &&
+	    data[18] == sd->params.format.yuvOrder &&
+	    data[24] == sd->params.roi.colStart &&
+	    data[25] == sd->params.roi.colEnd &&
+	    data[26] == sd->params.roi.rowStart &&
+	    data[27] == sd->params.roi.rowEnd) {
+		struct gspca_frame *frame = gspca_get_i_frame(gspca_dev);
+
+		atomic_set(&sd->cam_exposure, data[39] * 2);
+		atomic_set(&sd->fps, data[41]);
+
+		if (frame == NULL) {
+			gspca_dev->last_packet_type = DISCARD_PACKET;
+			return;
+		}
+
+		/* Check for proper EOF for last frame */
+		if ((frame->data_end - frame->data) > 4 &&
+		    frame->data_end[-4] == 0xff &&
+		    frame->data_end[-3] == 0xff &&
+		    frame->data_end[-2] == 0xff &&
+		    frame->data_end[-1] == 0xff)
+			gspca_frame_add(gspca_dev, LAST_PACKET,
+						NULL, 0);
+
+		gspca_frame_add(gspca_dev, FIRST_PACKET, data, len);
+		return;
+	}
+
+	gspca_frame_add(gspca_dev, INTER_PACKET, data, len);
+}
+
+static void sd_dq_callback(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	/* Set the normal compression settings once we have captured a
+	   few uncompressed frames (and AEC has hopefully settled) */
+	if (sd->first_frame) {
+		sd->first_frame--;
+		if (sd->first_frame == 0)
+			command_setcompression(gspca_dev);
+	}
+
+	/* Switch flicker control back on if it got turned off */
+	restart_flicker(gspca_dev);
+
+	/* If AEC is enabled, monitor the exposure and
+	   adjust the sensor frame rate if needed */
+	if (sd->params.exposure.expMode == 2)
+		monitor_exposure(gspca_dev);
+
+	/* Update our knowledge of the camera state */
+	do_command(gspca_dev, CPIA_COMMAND_GetExposure, 0, 0, 0, 0);
+	if (sd->params.qx3.qx3_detected)
+		do_command(gspca_dev, CPIA_COMMAND_ReadMCPorts, 0, 0, 0, 0);
+}
+
+static int sd_setbrightness(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int ret;
+
+	sd->params.colourParams.brightness = val;
+	sd->params.flickerControl.allowableOverExposure =
+		find_over_exposure(sd->params.colourParams.brightness);
+	if (gspca_dev->streaming) {
+		ret = command_setcolourparams(gspca_dev);
+		if (ret)
+			return ret;
+		return command_setflickerctrl(gspca_dev);
+	}
+	return 0;
+}
+
+static int sd_getbrightness(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->params.colourParams.brightness;
+	return 0;
+}
+
+static int sd_setcontrast(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->params.colourParams.contrast = val;
+	if (gspca_dev->streaming)
+		return command_setcolourparams(gspca_dev);
+
+	return 0;
+}
+
+static int sd_getcontrast(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->params.colourParams.contrast;
+	return 0;
+}
+
+static int sd_setsaturation(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->params.colourParams.saturation = val;
+	if (gspca_dev->streaming)
+		return command_setcolourparams(gspca_dev);
+
+	return 0;
+}
+
+static int sd_getsaturation(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->params.colourParams.saturation;
+	return 0;
+}
+
+static int sd_setfreq(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int on;
+
+	switch (val) {
+	case 0:		/* V4L2_CID_POWER_LINE_FREQUENCY_DISABLED */
+		on = 0;
+		break;
+	case 1:		/* V4L2_CID_POWER_LINE_FREQUENCY_50HZ */
+		on = 1;
+		sd->mainsFreq = 0;
+		break;
+	case 2:		/* V4L2_CID_POWER_LINE_FREQUENCY_60HZ */
+		on = 1;
+		sd->mainsFreq = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	sd->freq = val;
+	sd->params.flickerControl.coarseJump =
+		flicker_jumps[sd->mainsFreq]
+			     [sd->params.sensorFps.baserate]
+			     [sd->params.sensorFps.divisor];
+
+	return set_flicker(gspca_dev, on, gspca_dev->streaming);
+}
+
+static int sd_getfreq(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->freq;
+	return 0;
+}
+
+static int sd_setcomptarget(struct gspca_dev *gspca_dev, __s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->params.compressionTarget.frTargeting = val;
+	if (gspca_dev->streaming)
+		return command_setcompressiontarget(gspca_dev);
+
+	return 0;
+}
+
+static int sd_getcomptarget(struct gspca_dev *gspca_dev, __s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	*val = sd->params.compressionTarget.frTargeting;
+	return 0;
+}
+
+static int sd_querymenu(struct gspca_dev *gspca_dev,
+			struct v4l2_querymenu *menu)
+{
+	switch (menu->id) {
+	case V4L2_CID_POWER_LINE_FREQUENCY:
+		switch (menu->index) {
+		case 0:		/* V4L2_CID_POWER_LINE_FREQUENCY_DISABLED */
+			strcpy((char *) menu->name, "NoFliker");
+			return 0;
+		case 1:		/* V4L2_CID_POWER_LINE_FREQUENCY_50HZ */
+			strcpy((char *) menu->name, "50 Hz");
+			return 0;
+		case 2:		/* V4L2_CID_POWER_LINE_FREQUENCY_60HZ */
+			strcpy((char *) menu->name, "60 Hz");
+			return 0;
+		}
+		break;
+	case V4L2_CID_COMP_TARGET:
+		switch (menu->index) {
+		case CPIA_COMPRESSION_TARGET_QUALITY:
+			strcpy((char *) menu->name, "Quality");
+			return 0;
+		case CPIA_COMPRESSION_TARGET_FRAMERATE:
+			strcpy((char *) menu->name, "Framerate");
+			return 0;
+		}
+		break;
+	}
+	return -EINVAL;
+}
+
+/* sub-driver description */
+static const struct sd_desc sd_desc = {
+	.name = MODULE_NAME,
+	.ctrls = sd_ctrls,
+	.nctrls = ARRAY_SIZE(sd_ctrls),
+	.config = sd_config,
+	.init = sd_init,
+	.start = sd_start,
+	.stopN = sd_stopN,
+	.dq_callback = sd_dq_callback,
+	.pkt_scan = sd_pkt_scan,
+	.querymenu = sd_querymenu,
+};
+
+/* -- module initialisation -- */
+static const __devinitdata struct usb_device_id device_table[] = {
+	{USB_DEVICE(0x0553, 0x0002)},
+	{USB_DEVICE(0x0813, 0x0001)},
+	{}
+};
+MODULE_DEVICE_TABLE(usb, device_table);
+
+/* -- device connect -- */
+static int sd_probe(struct usb_interface *intf,
+			const struct usb_device_id *id)
+{
+	return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd),
+				THIS_MODULE);
+}
+
+static struct usb_driver sd_driver = {
+	.name = MODULE_NAME,
+	.id_table = device_table,
+	.probe = sd_probe,
+	.disconnect = gspca_disconnect,
+#ifdef CONFIG_PM
+	.suspend = gspca_suspend,
+	.resume = gspca_resume,
+#endif
+};
+
+/* -- module insert / remove -- */
+static int __init sd_mod_init(void)
+{
+	int ret;
+	ret = usb_register(&sd_driver);
+	if (ret < 0)
+		return ret;
+	PDEBUG(D_PROBE, "registered");
+	return 0;
+}
+static void __exit sd_mod_exit(void)
+{
+	usb_deregister(&sd_driver);
+	PDEBUG(D_PROBE, "deregistered");
+}
+
+module_init(sd_mod_init);
+module_exit(sd_mod_exit);
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 1b06360af38d..3793d168b44d 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -350,6 +350,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_MPEG     v4l2_fourcc('M', 'P', 'E', 'G') /* MPEG-1/2/4    */
 
 /*  Vendor-specific formats   */
+#define V4L2_PIX_FMT_CPIA1    v4l2_fourcc('C', 'P', 'I', 'A') /* cpia1 YUV */
 #define V4L2_PIX_FMT_WNVA     v4l2_fourcc('W', 'N', 'V', 'A') /* Winnov hw compress */
 #define V4L2_PIX_FMT_SN9C10X  v4l2_fourcc('S', '9', '1', '0') /* SN9C10x compression */
 #define V4L2_PIX_FMT_SN9C20X_I420 v4l2_fourcc('S', '9', '2', '0') /* SN9C20x YUV 4:2:0 */
-- 
cgit v1.2.3


From 53823639173cc9e9a261f68f4abefe62364b86c6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 23 Jan 2010 22:02:51 +0100
Subject: PM / Runtime: Add sysfs switch for disabling device run-time PM

Add new device sysfs attribute, power/control, allowing the user
space to block the run-time power management of the devices.  If this
attribute is set to "on", the driver of the device won't be able to power
manage it at run time (without breaking the rules) and the device will
always be in the full power state (except when the entire system goes
into a sleep state).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
---
 drivers/base/power/runtime.c | 45 ++++++++++++++++++++++++++++++++++++++
 drivers/base/power/sysfs.c   | 51 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm.h           |  1 +
 include/linux/pm_runtime.h   |  4 ++++
 4 files changed, 101 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index f8b044e8aef7..626dd147b75f 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1010,6 +1010,50 @@ void pm_runtime_enable(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_enable);
 
+/**
+ * pm_runtime_forbid - Block run-time PM of a device.
+ * @dev: Device to handle.
+ *
+ * Increase the device's usage count and clear its power.runtime_auto flag,
+ * so that it cannot be suspended at run time until pm_runtime_allow() is called
+ * for it.
+ */
+void pm_runtime_forbid(struct device *dev)
+{
+	spin_lock_irq(&dev->power.lock);
+	if (!dev->power.runtime_auto)
+		goto out;
+
+	dev->power.runtime_auto = false;
+	atomic_inc(&dev->power.usage_count);
+	__pm_runtime_resume(dev, false);
+
+ out:
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_forbid);
+
+/**
+ * pm_runtime_allow - Unblock run-time PM of a device.
+ * @dev: Device to handle.
+ *
+ * Decrease the device's usage count and set its power.runtime_auto flag.
+ */
+void pm_runtime_allow(struct device *dev)
+{
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.runtime_auto)
+		goto out;
+
+	dev->power.runtime_auto = true;
+	if (atomic_dec_and_test(&dev->power.usage_count))
+		__pm_runtime_idle(dev);
+
+ out:
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_allow);
+
 /**
  * pm_runtime_init - Initialize run-time PM fields in given device object.
  * @dev: Device object to initialize.
@@ -1028,6 +1072,7 @@ void pm_runtime_init(struct device *dev)
 
 	atomic_set(&dev->power.child_count, 0);
 	pm_suspend_ignore_children(dev, false);
+	dev->power.runtime_auto = true;
 
 	dev->power.request_pending = false;
 	dev->power.request = RPM_REQ_NONE;
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 596aeecfdffe..c011ff15632c 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -4,9 +4,25 @@
 
 #include <linux/device.h>
 #include <linux/string.h>
+#include <linux/pm_runtime.h>
 #include "power.h"
 
 /*
+ *	control - Report/change current runtime PM setting of the device
+ *
+ *	Runtime power management of a device can be blocked with the help of
+ *	this attribute.  All devices have one of the following two values for
+ *	the power/control file:
+ *
+ *	 + "auto\n" to allow the device to be power managed at run time;
+ *	 + "on\n" to prevent the device from being power managed at run time;
+ *
+ *	The default for all devices is "auto", which means that devices may be
+ *	subject to automatic power management, depending on their drivers.
+ *	Changing this attribute to "on" prevents the driver from power managing
+ *	the device at run time.  Doing that while the device is suspended causes
+ *	it to be woken up.
+ *
  *	wakeup - Report/change current wakeup option for device
  *
  *	Some devices support "wakeup" events, which are hardware signals
@@ -43,6 +59,38 @@
 static const char enabled[] = "enabled";
 static const char disabled[] = "disabled";
 
+#ifdef CONFIG_PM_RUNTIME
+static const char ctrl_auto[] = "auto";
+static const char ctrl_on[] = "on";
+
+static ssize_t control_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "%s\n",
+				dev->power.runtime_auto ? ctrl_auto : ctrl_on);
+}
+
+static ssize_t control_store(struct device * dev, struct device_attribute *attr,
+			     const char * buf, size_t n)
+{
+	char *cp;
+	int len = n;
+
+	cp = memchr(buf, '\n', n);
+	if (cp)
+		len = cp - buf;
+	if (len == sizeof ctrl_auto - 1 && strncmp(buf, ctrl_auto, len) == 0)
+		pm_runtime_allow(dev);
+	else if (len == sizeof ctrl_on - 1 && strncmp(buf, ctrl_on, len) == 0)
+		pm_runtime_forbid(dev);
+	else
+		return -EINVAL;
+	return n;
+}
+
+static DEVICE_ATTR(control, 0644, control_show, control_store);
+#endif
+
 static ssize_t
 wake_show(struct device * dev, struct device_attribute *attr, char * buf)
 {
@@ -79,6 +127,9 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
 
 
 static struct attribute * power_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
+	&dev_attr_control.attr,
+#endif
 	&dev_attr_wakeup.attr,
 	NULL,
 };
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 198b8f9fe05e..25b1eca8049d 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -430,6 +430,7 @@ struct dev_pm_info {
 	unsigned int		request_pending:1;
 	unsigned int		deferred_resume:1;
 	unsigned int		run_wake:1;
+	unsigned int		runtime_auto:1;
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
 	int			runtime_error;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 370ce0a6fe4a..7d773aac5314 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -28,6 +28,8 @@ extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
 extern int pm_runtime_barrier(struct device *dev);
 extern void pm_runtime_enable(struct device *dev);
 extern void __pm_runtime_disable(struct device *dev, bool check_resume);
+extern void pm_runtime_allow(struct device *dev);
+extern void pm_runtime_forbid(struct device *dev);
 
 static inline bool pm_children_suspended(struct device *dev)
 {
@@ -78,6 +80,8 @@ static inline int __pm_runtime_set_status(struct device *dev,
 static inline int pm_runtime_barrier(struct device *dev) { return 0; }
 static inline void pm_runtime_enable(struct device *dev) {}
 static inline void __pm_runtime_disable(struct device *dev, bool c) {}
+static inline void pm_runtime_allow(struct device *dev) {}
+static inline void pm_runtime_forbid(struct device *dev) {}
 
 static inline bool pm_children_suspended(struct device *dev) { return false; }
 static inline void pm_suspend_ignore_children(struct device *dev, bool en) {}
-- 
cgit v1.2.3


From 5af84b82701a96be4b033aaa51d86c72e2ded061 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 23 Jan 2010 22:23:32 +0100
Subject: PM: Asynchronous suspend and resume of devices

Theoretically, the total time of system sleep transitions (suspend
to RAM, hibernation) can be reduced by running suspend and resume
callbacks of device drivers in parallel with each other.  However,
there are dependencies between devices such that we're not allowed
to suspend the parent of a device before suspending the device
itself.  Analogously, we're not allowed to resume a device before
resuming its parent.

The most straightforward way to take these dependencies into accout
is to start the async threads used for suspending and resuming
devices at the core level, so that async_schedule() is called for
each suspend and resume callback supposed to be executed
asynchronously.

For this purpose, introduce a new device flag, power.async_suspend,
used to mark the devices whose suspend and resume callbacks are to be
executed asynchronously (ie. in parallel with the main suspend/resume
thread and possibly in parallel with each other) and helper function
device_enable_async_suspend() allowing one to set power.async_suspend
for given device (power.async_suspend is unset by default for all
devices).  For each device with the power.async_suspend flag set the
PM core will use async_schedule() to execute its suspend and resume
callbacks.

The async threads started for different devices as a result of
calling async_schedule() are synchronized with each other and with
the main suspend/resume thread with the help of completions, in the
following way:
(1) There is a completion, power.completion, for each device object.
(2) Each device's completion is reset before calling async_schedule()
    for the device or, in the case of devices with the
    power.async_suspend flags unset, before executing the device's
    suspend and resume callbacks.
(3) During suspend, right before running the bus type, device type
    and device class suspend callbacks for the device, the PM core
    waits for the completions of all the device's children to be
    completed.
(4) During resume, right before running the bus type, device type and
    device class resume callbacks for the device, the PM core waits
    for the completion of the device's parent to be completed.
(5) The PM core completes power.completion for each device right
    after the bus type, device type and device class suspend (or
    resume) callbacks executed for the device have returned.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/main.c    | 115 ++++++++++++++++++++++++++++++++++++++++---
 include/linux/device.h       |   6 +++
 include/linux/pm.h           |   3 ++
 include/linux/resume-trace.h |   7 +++
 4 files changed, 125 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 6ca5cdf63849..3b44c201ddad 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -25,6 +25,7 @@
 #include <linux/resume-trace.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/async.h>
 
 #include "../base.h"
 #include "power.h"
@@ -42,6 +43,7 @@
 LIST_HEAD(dpm_list);
 
 static DEFINE_MUTEX(dpm_list_mtx);
+static pm_message_t pm_transition;
 
 /*
  * Set once the preparation of devices for a PM transition has started, reset
@@ -56,6 +58,7 @@ static bool transition_started;
 void device_pm_init(struct device *dev)
 {
 	dev->power.status = DPM_ON;
+	init_completion(&dev->power.completion);
 	pm_runtime_init(dev);
 }
 
@@ -111,6 +114,7 @@ void device_pm_remove(struct device *dev)
 	pr_debug("PM: Removing info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus",
 		 kobject_name(&dev->kobj));
+	complete_all(&dev->power.completion);
 	mutex_lock(&dpm_list_mtx);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
@@ -187,6 +191,31 @@ static void initcall_debug_report(struct device *dev, ktime_t calltime,
 	}
 }
 
+/**
+ * dpm_wait - Wait for a PM operation to complete.
+ * @dev: Device to wait for.
+ * @async: If unset, wait only if the device's power.async_suspend flag is set.
+ */
+static void dpm_wait(struct device *dev, bool async)
+{
+	if (!dev)
+		return;
+
+	if (async || dev->power.async_suspend)
+		wait_for_completion(&dev->power.completion);
+}
+
+static int dpm_wait_fn(struct device *dev, void *async_ptr)
+{
+	dpm_wait(dev, *((bool *)async_ptr));
+	return 0;
+}
+
+static void dpm_wait_for_children(struct device *dev, bool async)
+{
+       device_for_each_child(dev, &async, dpm_wait_fn);
+}
+
 /**
  * pm_op - Execute the PM operation appropriate for given PM event.
  * @dev: Device to handle.
@@ -466,17 +495,19 @@ static int legacy_resume(struct device *dev, int (*cb)(struct device *dev))
 }
 
 /**
- * device_resume - Execute "resume" callbacks for given device.
+ * __device_resume - Execute "resume" callbacks for given device.
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
+ * @async: If true, the device is being resumed asynchronously.
  */
-static int device_resume(struct device *dev, pm_message_t state)
+static int __device_resume(struct device *dev, pm_message_t state, bool async)
 {
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
+	dpm_wait(dev->parent, async);
 	down(&dev->sem);
 
 	if (dev->bus) {
@@ -511,11 +542,36 @@ static int device_resume(struct device *dev, pm_message_t state)
 	}
  End:
 	up(&dev->sem);
+	complete_all(&dev->power.completion);
 
 	TRACE_RESUME(error);
 	return error;
 }
 
+static void async_resume(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = __device_resume(dev, pm_transition, true);
+	if (error)
+		pm_dev_err(dev, pm_transition, " async", error);
+	put_device(dev);
+}
+
+static int device_resume(struct device *dev)
+{
+	INIT_COMPLETION(dev->power.completion);
+
+	if (dev->power.async_suspend && !pm_trace_is_enabled()) {
+		get_device(dev);
+		async_schedule(async_resume, dev);
+		return 0;
+	}
+
+	return __device_resume(dev, pm_transition, false);
+}
+
 /**
  * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
  * @state: PM transition of the system being carried out.
@@ -530,6 +586,7 @@ static void dpm_resume(pm_message_t state)
 
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.next);
 
@@ -540,7 +597,7 @@ static void dpm_resume(pm_message_t state)
 			dev->power.status = DPM_RESUMING;
 			mutex_unlock(&dpm_list_mtx);
 
-			error = device_resume(dev, state);
+			error = device_resume(dev);
 
 			mutex_lock(&dpm_list_mtx);
 			if (error)
@@ -555,6 +612,7 @@ static void dpm_resume(pm_message_t state)
 	}
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
 	dpm_show_time(starttime, state, NULL);
 }
 
@@ -732,17 +790,24 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
 	return error;
 }
 
+static int async_error;
+
 /**
  * device_suspend - Execute "suspend" callbacks for given device.
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
+ * @async: If true, the device is being suspended asynchronously.
  */
-static int device_suspend(struct device *dev, pm_message_t state)
+static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 {
 	int error = 0;
 
+	dpm_wait_for_children(dev, async);
 	down(&dev->sem);
 
+	if (async_error)
+		goto End;
+
 	if (dev->class) {
 		if (dev->class->pm) {
 			pm_dev_dbg(dev, state, "class ");
@@ -773,12 +838,44 @@ static int device_suspend(struct device *dev, pm_message_t state)
 			error = legacy_suspend(dev, state, dev->bus->suspend);
 		}
 	}
+
+	if (!error)
+		dev->power.status = DPM_OFF;
+
  End:
 	up(&dev->sem);
+	complete_all(&dev->power.completion);
 
 	return error;
 }
 
+static void async_suspend(void *data, async_cookie_t cookie)
+{
+	struct device *dev = (struct device *)data;
+	int error;
+
+	error = __device_suspend(dev, pm_transition, true);
+	if (error) {
+		pm_dev_err(dev, pm_transition, " async", error);
+		async_error = error;
+	}
+
+	put_device(dev);
+}
+
+static int device_suspend(struct device *dev)
+{
+	INIT_COMPLETION(dev->power.completion);
+
+	if (dev->power.async_suspend) {
+		get_device(dev);
+		async_schedule(async_suspend, dev);
+		return 0;
+	}
+
+	return __device_suspend(dev, pm_transition, false);
+}
+
 /**
  * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
  * @state: PM transition of the system being carried out.
@@ -791,13 +888,15 @@ static int dpm_suspend(pm_message_t state)
 
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
+	pm_transition = state;
+	async_error = 0;
 	while (!list_empty(&dpm_list)) {
 		struct device *dev = to_device(dpm_list.prev);
 
 		get_device(dev);
 		mutex_unlock(&dpm_list_mtx);
 
-		error = device_suspend(dev, state);
+		error = device_suspend(dev);
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
@@ -805,13 +904,17 @@ static int dpm_suspend(pm_message_t state)
 			put_device(dev);
 			break;
 		}
-		dev->power.status = DPM_OFF;
 		if (!list_empty(&dev->power.entry))
 			list_move(&dev->power.entry, &list);
 		put_device(dev);
+		if (async_error)
+			break;
 	}
 	list_splice(&list, dpm_list.prev);
 	mutex_unlock(&dpm_list_mtx);
+	async_synchronize_full();
+	if (!error)
+		error = async_error;
 	if (!error)
 		dpm_show_time(starttime, state, NULL);
 	return error;
diff --git a/include/linux/device.h b/include/linux/device.h
index a62799f2ab00..70adc5f3f50a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -472,6 +472,12 @@ static inline int device_is_registered(struct device *dev)
 	return dev->kobj.state_in_sysfs;
 }
 
+static inline void device_enable_async_suspend(struct device *dev)
+{
+	if (dev->power.status == DPM_ON)
+		dev->power.async_suspend = true;
+}
+
 void driver_init(void);
 
 /*
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 25b1eca8049d..9c16cd20fc96 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -26,6 +26,7 @@
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/timer.h>
+#include <linux/completion.h>
 
 /*
  * Callbacks for platform drivers to implement.
@@ -412,9 +413,11 @@ struct dev_pm_info {
 	pm_message_t		power_state;
 	unsigned int		can_wakeup:1;
 	unsigned int		should_wakeup:1;
+	unsigned		async_suspend:1;
 	enum dpm_state		status;		/* Owned by the PM core */
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
+	struct completion	completion;
 #endif
 #ifdef CONFIG_PM_RUNTIME
 	struct timer_list	suspend_timer;
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index c9ba2fdf807d..bc8c3881c729 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -6,6 +6,11 @@
 
 extern int pm_trace_enabled;
 
+static inline int pm_trace_is_enabled(void)
+{
+       return pm_trace_enabled;
+}
+
 struct device;
 extern void set_trace_device(struct device *);
 extern void generate_resume_trace(const void *tracedata, unsigned int user);
@@ -17,6 +22,8 @@ extern void generate_resume_trace(const void *tracedata, unsigned int user);
 
 #else
 
+static inline int pm_trace_is_enabled(void) { return 0; }
+
 #define TRACE_DEVICE(dev) do { } while (0)
 #define TRACE_RESUME(dev) do { } while (0)
 
-- 
cgit v1.2.3


From 5a2eb8585f3b38e01e30aacaa8b985a1520a993d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 23 Jan 2010 22:25:23 +0100
Subject: PM: Add facility for advanced testing of async suspend/resume

Add configuration switch CONFIG_PM_ADVANCED_DEBUG for compiling in
extra PM debugging/testing code allowing one to access some
PM-related attributes of devices from the user space via sysfs.

If CONFIG_PM_ADVANCED_DEBUG is set, add sysfs attribute power/async
for every device allowing the user space to access the device's
power.async_suspend flag and modify it, if desired.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/ABI/testing/sysfs-devices-power | 26 ++++++++++++++
 drivers/base/power/sysfs.c                    | 49 +++++++++++++++++++++++++++
 include/linux/device.h                        | 11 ++++++
 kernel/power/Kconfig                          | 14 ++++++++
 4 files changed, 100 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 431bfd7e65c7..6123c523bfd7 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -51,3 +51,29 @@ Description:
 		drivers.  Changing this attribute to "on" prevents the driver
 		from power managing the device at run time.  Doing that while
 		the device is suspended causes it to be woken up.
+
+What:		/sys/devices/.../power/async
+Date:		January 2009
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/devices/.../async attribute allows the user space to
+		enable or diasble the device's suspend and resume callbacks to
+		be executed asynchronously (ie. in separate threads, in parallel
+		with the main suspend/resume thread) during system-wide power
+		transitions (eg. suspend to RAM, hibernation).
+
+		All devices have one of the following two values for the
+		power/async file:
+
+		+ "enabled\n" to permit the asynchronous suspend/resume;
+		+ "disabled\n" to forbid it;
+
+		The value of this attribute may be changed by writing either
+		"enabled", or "disabled" to it.
+
+		It generally is unsafe to permit the asynchronous suspend/resume
+		of a device unless it is certain that all of the PM dependencies
+		of the device are known to the PM core.  However, for some
+		devices this attribute is set to "enabled" by bus type code or
+		device drivers and in that cases it should be safe to leave the
+		default value.
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index c011ff15632c..86fd9373447e 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -54,6 +54,24 @@
  *	wakeup events internally (unless they are disabled), keeping
  *	their hardware in low power modes whenever they're unused.  This
  *	saves runtime power, without requiring system-wide sleep states.
+ *
+ *	async - Report/change current async suspend setting for the device
+ *
+ *	Asynchronous suspend and resume of the device during system-wide power
+ *	state transitions can be enabled by writing "enabled" to this file.
+ *	Analogously, if "disabled" is written to this file, the device will be
+ *	suspended and resumed synchronously.
+ *
+ *	All devices have one of the following two values for power/async:
+ *
+ *	 + "enabled\n" to permit the asynchronous suspend/resume of the device;
+ *	 + "disabled\n" to forbid it;
+ *
+ *	NOTE: It generally is unsafe to permit the asynchronous suspend/resume
+ *	of a device unless it is certain that all of the PM dependencies of the
+ *	device are known to the PM core.  However, for some devices this
+ *	attribute is set to "enabled" by bus type code or device drivers and in
+ *	that cases it should be safe to leave the default value.
  */
 
 static const char enabled[] = "enabled";
@@ -125,12 +143,43 @@ wake_store(struct device * dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
 
+#ifdef CONFIG_PM_SLEEP_ADVANCED_DEBUG
+static ssize_t async_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	return sprintf(buf, "%s\n",
+			device_async_suspend_enabled(dev) ? enabled : disabled);
+}
+
+static ssize_t async_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t n)
+{
+	char *cp;
+	int len = n;
+
+	cp = memchr(buf, '\n', n);
+	if (cp)
+		len = cp - buf;
+	if (len == sizeof enabled - 1 && strncmp(buf, enabled, len) == 0)
+		device_enable_async_suspend(dev);
+	else if (len == sizeof disabled - 1 && strncmp(buf, disabled, len) == 0)
+		device_disable_async_suspend(dev);
+	else
+		return -EINVAL;
+	return n;
+}
+
+static DEVICE_ATTR(async, 0644, async_show, async_store);
+#endif /* CONFIG_PM_SLEEP_ADVANCED_DEBUG */
 
 static struct attribute * power_attrs[] = {
 #ifdef CONFIG_PM_RUNTIME
 	&dev_attr_control.attr,
 #endif
 	&dev_attr_wakeup.attr,
+#ifdef CONFIG_PM_SLEEP_ADVANCED_DEBUG
+	&dev_attr_async.attr,
+#endif
 	NULL,
 };
 static struct attribute_group pm_attr_group = {
diff --git a/include/linux/device.h b/include/linux/device.h
index 70adc5f3f50a..b30527db3ac0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -478,6 +478,17 @@ static inline void device_enable_async_suspend(struct device *dev)
 		dev->power.async_suspend = true;
 }
 
+static inline void device_disable_async_suspend(struct device *dev)
+{
+	if (dev->power.status == DPM_ON)
+		dev->power.async_suspend = false;
+}
+
+static inline bool device_async_suspend_enabled(struct device *dev)
+{
+	return !!dev->power.async_suspend;
+}
+
 void driver_init(void);
 
 /*
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 4c9cffcf69c7..5c36ea9d55d2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -27,6 +27,15 @@ config PM_DEBUG
 	code. This is helpful when debugging and reporting PM bugs, like
 	suspend support.
 
+config PM_ADVANCED_DEBUG
+	bool "Extra PM attributes in sysfs for low-level debugging/testing"
+	depends on PM_DEBUG
+	default n
+	---help---
+	Add extra sysfs attributes allowing one to access some Power Management
+	fields of device objects from user space.  If you are not a kernel
+	developer interested in debugging/testing Power Management, say "no".
+
 config PM_VERBOSE
 	bool "Verbose Power Management debugging"
 	depends on PM_DEBUG
@@ -85,6 +94,11 @@ config PM_SLEEP
 	depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
 	default y
 
+config PM_SLEEP_ADVANCED_DEBUG
+	bool
+	depends on PM_ADVANCED_DEBUG
+	default n
+
 config SUSPEND
 	bool "Suspend to RAM and standby"
 	depends on PM && ARCH_SUSPEND_POSSIBLE
-- 
cgit v1.2.3


From f8824cee405c62ba465b85365201166d9cf86a14 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 27 Jan 2010 23:47:38 +0100
Subject: PM: Allow device drivers to use dpm_wait()

There are some dependencies between devices (in particular, between
EHCI USB controllers and their OHCI/UHCI siblings) which are not
reflected by the structure of the device tree.  With synchronous
suspend and resume these dependencies are taken into accout
automatically, because the devices in question are always registered
in the right order, but to meet these constraints with asynchronous
suspend and resume the drivers of these devices will need to use
dpm_wait() in their suspend/resume routines, so introduce a helper
function allowing them to do that.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/power/main.c | 11 +++++++++++
 include/linux/pm.h        |  2 ++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 6efef9fb23a1..0e26a6f6fd48 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1046,3 +1046,14 @@ void __suspend_report_result(const char *function, void *fn, int ret)
 		printk(KERN_ERR "%s(): %pF returns %d\n", function, fn, ret);
 }
 EXPORT_SYMBOL_GPL(__suspend_report_result);
+
+/**
+ * device_pm_wait_for_dev - Wait for suspend/resume of a device to complete.
+ * @dev: Device to wait for.
+ * @subordinate: Device that needs to wait for @dev.
+ */
+void device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
+{
+	dpm_wait(dev, subordinate->power.async_suspend);
+}
+EXPORT_SYMBOL_GPL(device_pm_wait_for_dev);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 9c16cd20fc96..e80df06ad22a 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -512,6 +512,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 		__suspend_report_result(__func__, fn, ret);		\
 	} while (0)
 
+extern void device_pm_wait_for_dev(struct device *sub, struct device *dev);
 #else /* !CONFIG_PM_SLEEP */
 
 #define device_pm_lock() do {} while (0)
@@ -524,6 +525,7 @@ static inline int dpm_suspend_start(pm_message_t state)
 
 #define suspend_report_result(fn, ret)		do {} while (0)
 
+static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {}
 #endif /* !CONFIG_PM_SLEEP */
 
 /* How to reorder dpm_list after device_move() */
-- 
cgit v1.2.3


From 6d19c009cc780c63de25a046509ebc9473809fd6 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 12 Feb 2010 12:21:11 +0100
Subject: USB: implement non-tree resume ordering constraints for PCI host
 controllers

This patch (as1331) adds non-tree ordering constraints needed for
proper resume of PCI USB host controllers from hibernation.  The main
issue is that non-high-speed devices must not be resumed before the
high-speed root hub, because it is the ehci_bus_resume() routine which
takes care of handing the device connection over to the companion
controller.  If the device resume is attempted before the handover
then the device won't be found and it will be treated as though it had
disconnected.

The patch adds a new field to the usb_bus structure; for each
full/low-speed bus this field will contain a pointer to the companion
high-speed bus (if one exists).  It is used during normal device
resume; if the hs_companion pointer isn't NULL then we wait for the
root-hub device on the hs_companion bus.

A secondary issue is that an EHCI controlller shouldn't be resumed
before any of its companions.  On some machines I have observed
handovers failing if the companion controller is reinitialized after
the handover.  Thus, the EHCI resume routine must wait for the
companion controllers to be resumed.

The patch also fixes a small bug in usb_hcd_pci_probe(); an error path
jumps to the wrong label, causing a memory leak.

[rjw: Fixed compilation for CONFIG_PM_SLEEP unset.]

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/usb/core/driver.c  |   8 +++
 drivers/usb/core/hcd-pci.c | 127 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/usb.h        |   1 +
 3 files changed, 135 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 60a45f1e3a67..f2f055eb6831 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1022,6 +1022,14 @@ static int usb_resume_device(struct usb_device *udev, pm_message_t msg)
 		goto done;
 	}
 
+	/* Non-root devices on a full/low-speed bus must wait for their
+	 * companion high-speed root hub, in case a handoff is needed.
+	 */
+	if (!(msg.event & PM_EVENT_AUTO) && udev->parent &&
+			udev->bus->hs_companion)
+		device_pm_wait_for_dev(&udev->dev,
+				&udev->bus->hs_companion->root_hub->dev);
+
 	if (udev->quirks & USB_QUIRK_RESET_RESUME)
 		udev->reset_resume = 1;
 
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 2dcf906df569..15286533c15a 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/pm_runtime.h>
 #include <linux/usb.h>
 
 #include <asm/io.h>
@@ -37,6 +38,122 @@
 
 /* PCI-based HCs are common, but plenty of non-PCI HCs are used too */
 
+#ifdef CONFIG_PM_SLEEP
+
+/* Coordinate handoffs between EHCI and companion controllers
+ * during system resume
+ */
+
+static DEFINE_MUTEX(companions_mutex);
+
+#define CL_UHCI		PCI_CLASS_SERIAL_USB_UHCI
+#define CL_OHCI		PCI_CLASS_SERIAL_USB_OHCI
+#define CL_EHCI		PCI_CLASS_SERIAL_USB_EHCI
+
+enum companion_action {
+	SET_HS_COMPANION, CLEAR_HS_COMPANION, WAIT_FOR_COMPANIONS
+};
+
+static void companion_common(struct pci_dev *pdev, struct usb_hcd *hcd,
+		enum companion_action action)
+{
+	struct pci_dev		*companion;
+	struct usb_hcd		*companion_hcd;
+	unsigned int		slot = PCI_SLOT(pdev->devfn);
+
+	/* Iterate through other PCI functions in the same slot.
+	 * If pdev is OHCI or UHCI then we are looking for EHCI, and
+	 * vice versa.
+	 */
+	companion = NULL;
+	for (;;) {
+		companion = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, companion);
+		if (!companion)
+			break;
+		if (companion->bus != pdev->bus ||
+				PCI_SLOT(companion->devfn) != slot)
+			continue;
+
+		companion_hcd = pci_get_drvdata(companion);
+		if (!companion_hcd)
+			continue;
+
+		/* For SET_HS_COMPANION, store a pointer to the EHCI bus in
+		 * the OHCI/UHCI companion bus structure.
+		 * For CLEAR_HS_COMPANION, clear the pointer to the EHCI bus
+		 * in the OHCI/UHCI companion bus structure.
+		 * For WAIT_FOR_COMPANIONS, wait until the OHCI/UHCI
+		 * companion controllers have fully resumed.
+		 */
+
+		if ((pdev->class == CL_OHCI || pdev->class == CL_UHCI) &&
+				companion->class == CL_EHCI) {
+			/* action must be SET_HS_COMPANION */
+			dev_dbg(&companion->dev, "HS companion for %s\n",
+					dev_name(&pdev->dev));
+			hcd->self.hs_companion = &companion_hcd->self;
+
+		} else if (pdev->class == CL_EHCI &&
+				(companion->class == CL_OHCI ||
+				companion->class == CL_UHCI)) {
+			switch (action) {
+			case SET_HS_COMPANION:
+				dev_dbg(&pdev->dev, "HS companion for %s\n",
+						dev_name(&companion->dev));
+				companion_hcd->self.hs_companion = &hcd->self;
+				break;
+			case CLEAR_HS_COMPANION:
+				companion_hcd->self.hs_companion = NULL;
+				break;
+			case WAIT_FOR_COMPANIONS:
+				device_pm_wait_for_dev(&pdev->dev,
+						&companion->dev);
+				break;
+			}
+		}
+	}
+}
+
+static void set_hs_companion(struct pci_dev *pdev, struct usb_hcd *hcd)
+{
+	mutex_lock(&companions_mutex);
+	dev_set_drvdata(&pdev->dev, hcd);
+	companion_common(pdev, hcd, SET_HS_COMPANION);
+	mutex_unlock(&companions_mutex);
+}
+
+static void clear_hs_companion(struct pci_dev *pdev, struct usb_hcd *hcd)
+{
+	mutex_lock(&companions_mutex);
+	dev_set_drvdata(&pdev->dev, NULL);
+
+	/* If pdev is OHCI or UHCI, just clear its hs_companion pointer */
+	if (pdev->class == CL_OHCI || pdev->class == CL_UHCI)
+		hcd->self.hs_companion = NULL;
+
+	/* Otherwise search for companion buses and clear their pointers */
+	else
+		companion_common(pdev, hcd, CLEAR_HS_COMPANION);
+	mutex_unlock(&companions_mutex);
+}
+
+static void wait_for_companions(struct pci_dev *pdev, struct usb_hcd *hcd)
+{
+	/* Only EHCI controllers need to wait.
+	 * No locking is needed because a controller cannot be resumed
+	 * while one of its companions is getting unbound.
+	 */
+	if (pdev->class == CL_EHCI)
+		companion_common(pdev, hcd, WAIT_FOR_COMPANIONS);
+}
+
+#else /* !CONFIG_PM_SLEEP */
+
+static inline void set_hs_companion(struct pci_dev *d, struct usb_hcd *h) {}
+static inline void clear_hs_companion(struct pci_dev *d, struct usb_hcd *h) {}
+static inline void wait_for_companions(struct pci_dev *d, struct usb_hcd *h) {}
+
+#endif /* !CONFIG_PM_SLEEP */
 
 /*-------------------------------------------------------------------------*/
 
@@ -123,7 +240,7 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		if (region == PCI_ROM_RESOURCE) {
 			dev_dbg(&dev->dev, "no i/o regions available\n");
 			retval = -EBUSY;
-			goto err1;
+			goto err2;
 		}
 	}
 
@@ -132,6 +249,7 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	retval = usb_add_hcd(hcd, dev->irq, IRQF_DISABLED | IRQF_SHARED);
 	if (retval != 0)
 		goto err4;
+	set_hs_companion(dev, hcd);
 	return retval;
 
  err4:
@@ -142,6 +260,7 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	} else
 		release_region(hcd->rsrc_start, hcd->rsrc_len);
  err2:
+	clear_hs_companion(dev, hcd);
 	usb_put_hcd(hcd);
  err1:
 	pci_disable_device(dev);
@@ -180,6 +299,7 @@ void usb_hcd_pci_remove(struct pci_dev *dev)
 	} else {
 		release_region(hcd->rsrc_start, hcd->rsrc_len);
 	}
+	clear_hs_companion(dev, hcd);
 	usb_put_hcd(hcd);
 	pci_disable_device(dev);
 }
@@ -344,6 +464,11 @@ static int resume_common(struct device *dev, bool hibernated)
 	clear_bit(HCD_FLAG_SAW_IRQ, &hcd->flags);
 
 	if (hcd->driver->pci_resume) {
+		/* This call should be made only during system resume,
+		 * not during runtime resume.
+		 */
+		wait_for_companions(pci_dev, hcd);
+
 		retval = hcd->driver->pci_resume(hcd, hibernated);
 		if (retval) {
 			dev_err(dev, "PCI post-resume error %d!\n", retval);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d7ace1b80f09..332eaea61021 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -339,6 +339,7 @@ struct usb_bus {
 
 	struct usb_devmap devmap;	/* device address allocation map */
 	struct usb_device *root_hub;	/* Root hub */
+	struct usb_bus *hs_companion;	/* Companion EHCI bus, if any */
 	struct list_head bus_list;	/* list of busses */
 
 	int bandwidth_allocated;	/* on this bus: how much of the time
-- 
cgit v1.2.3


From 73bfa5f2f71efcdcaad8d18cbed96b9d7ed86948 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 26 Feb 2010 22:37:52 +0100
Subject: [S390] Define new s390 ELF note sections in elf.h

S390 ELF core dump currently only contains the PSW, the general purpose
registers, the floating point registers and the access registers stored
in PRSTATUS/PRFPREG note sections.
For analyzing s390 kernel problems additional registers are important.
In order to be able to include these registers to a kernel ELF core dump,
this patch adds the following five new note sections to elf.h:

* NT_S390_TIMER:   S390 timer register
* NT_S390_TODCMP:  S390 TOD comparator register
* NT_S390_TODPREG: S390 TOD programmable register
* NT_S390_CTRS:    S390 control registers
* NT_S390_PREFIX:  S390 prefix register

The new note sections have been already defined and accepted in the upstream
binutils package.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/elf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/elf.h b/include/linux/elf.h
index 0cc4d55151b7..39ad4b230a4a 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -362,6 +362,11 @@ typedef struct elf64_shdr {
 #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
 #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
 #define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
+#define NT_S390_TIMER	0x301		/* s390 timer register */
+#define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
+#define NT_S390_TODPREG	0x303		/* s390 TOD programmable register */
+#define NT_S390_CTRS	0x304		/* s390 control registers */
+#define NT_S390_PREFIX	0x305		/* s390 prefix register */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit v1.2.3


From caf66e581172dc5032bb84841a91bc7b77ad9876 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 25 Feb 2010 12:02:45 -0500
Subject: netdevice.h: check for CONFIG_WLAN instead of CONFIG_WLAN_80211

In "wireless: remove WLAN_80211 and WLAN_PRE80211 from Kconfig" I
inadvertantly missed a line in include/linux/netdevice.h.  I thereby
effectively reverted "net: Set LL_MAX_HEADER properly for wireless." by
accident. :-(  Now we should check there for CONFIG_WLAN instead.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Reported-by: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Cc: stable@kernel.org
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a3fccc85b1a0..99914e6fa3fd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -136,7 +136,7 @@ static inline bool dev_xmit_complete(int rc)
  *	used.
  */
 
-#if defined(CONFIG_WLAN_80211) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if defined(CONFIG_WLAN) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
 # if defined(CONFIG_MAC80211_MESH)
 #  define LL_MAX_HEADER 128
 # else
-- 
cgit v1.2.3


From 0b1c87278a8c7e394022ec184a0b44a3886b6fde Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 26 Feb 2010 16:38:57 -0800
Subject: rcu: Make non-RCU_PROVE_LOCKING rcu_read_lock_sched_held() understand
 boot

Before the scheduler starts, all tasks are non-preemptible by
definition. So, during that time, rcu_read_lock_sched_held()
needs to always return "true".  This patch makes that be so
for RCU_PROVE_LOCKING=n.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267231138-27856-2-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index fcea332a8424..c84373626336 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -166,7 +166,7 @@ static inline int rcu_read_lock_bh_held(void)
 
 static inline int rcu_read_lock_sched_held(void)
 {
-	return preempt_count() != 0;
+	return preempt_count() != 0 || !rcu_scheduler_active;
 }
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-- 
cgit v1.2.3


From a2835763e130c343ace5320c20d33c281e7097b7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 26 Feb 2010 06:34:51 +0000
Subject: rtnetlink: handle rtnl_link netlink notifications manually

In order to support specifying device flags during device creation,
we must be able to roll back device registration in case setting the
flags fails without sending any notifications related to the device
to userspace.

This patch changes rollback_registered_many() and register_netdevice()
to manually send netlink notifications for devices not handled by
rtnl_link and allows to defer notifications for devices handled by
rtnl_link until setup is complete.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 ++++++-
 net/core/dev.c            | 8 +++++++-
 net/core/rtnetlink.c      | 4 +---
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7a2aea56f195..1bfda90c2625 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -924,7 +924,12 @@ struct net_device {
 	       NETREG_UNREGISTERED,	/* completed unregister todo */
 	       NETREG_RELEASED,		/* called free_netdev */
 	       NETREG_DUMMY,		/* dummy device for NAPI poll */
-	} reg_state;
+	} reg_state:16;
+
+	enum {
+		RTNL_LINK_INITIALIZED,
+		RTNL_LINK_INITIALIZING,
+	} rtnl_link_state:16;
 
 	/* Called from unregister, can be used to call free_netdev */
 	void (*destructor)(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index eb7f1a4fefc6..75332b089529 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4865,6 +4865,10 @@ static void rollback_registered_many(struct list_head *head)
 		*/
 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 
+		if (!dev->rtnl_link_ops ||
+		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
@@ -5091,7 +5095,9 @@ int register_netdevice(struct net_device *dev)
 	 *	Prevent userspace races by waiting until the network
 	 *	device is fully setup before sending notifications.
 	 */
-	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
+	if (!dev->rtnl_link_ops ||
+	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
 
 out:
 	return ret;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b7c7dfd86507..020e43bfef5f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1425,9 +1425,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	struct net_device *dev = ptr;
 
 	switch (event) {
-	case NETDEV_UNREGISTER:
-		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
-		break;
 	case NETDEV_UP:
 	case NETDEV_DOWN:
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
@@ -1437,6 +1434,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_REGISTER:
 	case NETDEV_CHANGE:
 	case NETDEV_GOING_DOWN:
+	case NETDEV_UNREGISTER:
 	case NETDEV_UNREGISTER_BATCH:
 		break;
 	default:
-- 
cgit v1.2.3


From bd38081160bb3d036db98472e537b6a7dd4da51a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 26 Feb 2010 06:34:53 +0000
Subject: dev: support deferring device flag change notifications

Split dev_change_flags() into two functions: __dev_change_flags() to
perform the actual changes and __dev_notify_flags() to invoke netdevice
notifiers. This will be used by rtnl_link to defer netlink notifications
until the device has been fully configured.

This changes ordering of some operations, in particular:

- netlink notifications are sent after all changes have been performed.
  As a side effect this surpresses one unnecessary netlink message when
  the IFF_UP and other flags are changed simultaneously.

- The NETDEV_UP/NETDEV_DOWN and NETDEV_CHANGE notifiers are invoked
  after all changes have been performed. Their relative is unchanged.

- net_dmaengine_put() is invoked before the NETDEV_DOWN notifier instead
  of afterwards. This should not make any difference since both RX and TX
  are already shut down at this point.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   2 +
 net/core/dev.c            | 163 +++++++++++++++++++++++++++++-----------------
 net/core/rtnetlink.c      |   2 -
 3 files changed, 106 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1bfda90c2625..c79a88be7c33 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1587,7 +1587,9 @@ extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
 extern int		dev_ethtool(struct net *net, struct ifreq *);
 extern unsigned		dev_get_flags(const struct net_device *);
+extern int		__dev_change_flags(struct net_device *, unsigned int flags);
 extern int		dev_change_flags(struct net_device *, unsigned);
+extern void		__dev_notify_flags(struct net_device *, unsigned int old_flags);
 extern int		dev_change_name(struct net_device *, const char *);
 extern int		dev_set_alias(struct net_device *, const char *, size_t);
 extern int		dev_change_net_namespace(struct net_device *,
diff --git a/net/core/dev.c b/net/core/dev.c
index 75332b089529..e5972f7f7e1b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1113,32 +1113,13 @@ void dev_load(struct net *net, const char *name)
 }
 EXPORT_SYMBOL(dev_load);
 
-/**
- *	dev_open	- prepare an interface for use.
- *	@dev:	device to open
- *
- *	Takes a device from down to up state. The device's private open
- *	function is invoked and then the multicast lists are loaded. Finally
- *	the device is moved into the up state and a %NETDEV_UP message is
- *	sent to the netdev notifier chain.
- *
- *	Calling this function on an active interface is a nop. On a failure
- *	a negative errno code is returned.
- */
-int dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int ret;
 
 	ASSERT_RTNL();
 
-	/*
-	 *	Is it already up?
-	 */
-
-	if (dev->flags & IFF_UP)
-		return 0;
-
 	/*
 	 *	Is it even present?
 	 */
@@ -1187,36 +1168,57 @@ int dev_open(struct net_device *dev)
 		 *	Wakeup transmit queue engine
 		 */
 		dev_activate(dev);
-
-		/*
-		 *	... and announce new interface.
-		 */
-		call_netdevice_notifiers(NETDEV_UP, dev);
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL(dev_open);
 
 /**
- *	dev_close - shutdown an interface.
- *	@dev: device to shutdown
+ *	dev_open	- prepare an interface for use.
+ *	@dev:	device to open
  *
- *	This function moves an active device into down state. A
- *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
- *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
- *	chain.
+ *	Takes a device from down to up state. The device's private open
+ *	function is invoked and then the multicast lists are loaded. Finally
+ *	the device is moved into the up state and a %NETDEV_UP message is
+ *	sent to the netdev notifier chain.
+ *
+ *	Calling this function on an active interface is a nop. On a failure
+ *	a negative errno code is returned.
  */
-int dev_close(struct net_device *dev)
+int dev_open(struct net_device *dev)
+{
+	int ret;
+
+	/*
+	 *	Is it already up?
+	 */
+	if (dev->flags & IFF_UP)
+		return 0;
+
+	/*
+	 *	Open device
+	 */
+	ret = __dev_open(dev);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 *	... and announce new interface.
+	 */
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	call_netdevice_notifiers(NETDEV_UP, dev);
+
+	return ret;
+}
+EXPORT_SYMBOL(dev_open);
+
+static int __dev_close(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-	ASSERT_RTNL();
 
+	ASSERT_RTNL();
 	might_sleep();
 
-	if (!(dev->flags & IFF_UP))
-		return 0;
-
 	/*
 	 *	Tell people we are going down, so that they can
 	 *	prepare to death, when device is still operating.
@@ -1252,14 +1254,34 @@ int dev_close(struct net_device *dev)
 	dev->flags &= ~IFF_UP;
 
 	/*
-	 * Tell people we are down
+	 *	Shutdown NET_DMA
 	 */
-	call_netdevice_notifiers(NETDEV_DOWN, dev);
+	net_dmaengine_put();
+
+	return 0;
+}
+
+/**
+ *	dev_close - shutdown an interface.
+ *	@dev: device to shutdown
+ *
+ *	This function moves an active device into down state. A
+ *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ *	chain.
+ */
+int dev_close(struct net_device *dev)
+{
+	if (!(dev->flags & IFF_UP))
+		return 0;
+
+	__dev_close(dev);
 
 	/*
-	 *	Shutdown NET_DMA
+	 * Tell people we are down
 	 */
-	net_dmaengine_put();
+	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+	call_netdevice_notifiers(NETDEV_DOWN, dev);
 
 	return 0;
 }
@@ -4299,18 +4321,10 @@ unsigned dev_get_flags(const struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_get_flags);
 
-/**
- *	dev_change_flags - change device settings
- *	@dev: device
- *	@flags: device state flags
- *
- *	Change settings on device based state flags. The flags are
- *	in the userspace exported format.
- */
-int dev_change_flags(struct net_device *dev, unsigned flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags)
 {
-	int ret, changes;
 	int old_flags = dev->flags;
+	int ret;
 
 	ASSERT_RTNL();
 
@@ -4341,17 +4355,12 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 
 	ret = 0;
 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
-		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
 
 		if (!ret)
 			dev_set_rx_mode(dev);
 	}
 
-	if (dev->flags & IFF_UP &&
-	    ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
-					  IFF_VOLATILE)))
-		call_netdevice_notifiers(NETDEV_CHANGE, dev);
-
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
 
@@ -4370,11 +4379,47 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 		dev_set_allmulti(dev, inc);
 	}
 
-	/* Exclude state transition flags, already notified */
-	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
+	return ret;
+}
+
+void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
+{
+	unsigned int changes = dev->flags ^ old_flags;
+
+	if (changes & IFF_UP) {
+		if (dev->flags & IFF_UP)
+			call_netdevice_notifiers(NETDEV_UP, dev);
+		else
+			call_netdevice_notifiers(NETDEV_DOWN, dev);
+	}
+
+	if (dev->flags & IFF_UP &&
+	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
+		call_netdevice_notifiers(NETDEV_CHANGE, dev);
+}
+
+/**
+ *	dev_change_flags - change device settings
+ *	@dev: device
+ *	@flags: device state flags
+ *
+ *	Change settings on device based state flags. The flags are
+ *	in the userspace exported format.
+ */
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+	int ret, changes;
+	int old_flags = dev->flags;
+
+	ret = __dev_change_flags(dev, flags);
+	if (ret < 0)
+		return ret;
+
+	changes = old_flags ^ dev->flags;
 	if (changes)
 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
 
+	__dev_notify_flags(dev, old_flags);
 	return ret;
 }
 EXPORT_SYMBOL(dev_change_flags);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 020e43bfef5f..c21ec4236dd0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1427,8 +1427,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	switch (event) {
 	case NETDEV_UP:
 	case NETDEV_DOWN:
-		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
-		break;
 	case NETDEV_PRE_UP:
 	case NETDEV_POST_INIT:
 	case NETDEV_REGISTER:
-- 
cgit v1.2.3


From da3f5cf1f8ebb0fab5c5fd09adb189166594ad6c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 23 Feb 2010 11:45:51 +0000
Subject: skbuff: align sk_buff::cb to 64 bit and close some potential holes

The alignment requirement for 64-bit load/store instructions on ARM is
implementation defined. Some CPUs (such as Marvell Feroceon) do not
generate an exception, if such an instruction is executed with an
address that is not 64 bit aligned. In such a case, the Feroceon
corrupts adjacent memory, which showed up in my tests as a crash in the
rx path of ath9k that only occured with CONFIG_XFRM set.

This crash happened, because the first field of the mac80211 rx status
info in the cb is an u64, and changing it corrupted the skb->sp field.

This patch also closes some potential pre-existing holes in the sk_buff
struct surrounding the cb[] area.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ba0f8e3a9cda..d266eeef522d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -315,22 +315,23 @@ struct sk_buff {
 	struct sk_buff		*next;
 	struct sk_buff		*prev;
 
-	struct sock		*sk;
 	ktime_t			tstamp;
+
+	struct sock		*sk;
 	struct net_device	*dev;
 
-	unsigned long		_skb_dst;
-#ifdef CONFIG_XFRM
-	struct	sec_path	*sp;
-#endif
 	/*
 	 * This is the control buffer. It is free to use for every
 	 * layer. Please put your private variables there. If you
 	 * want to keep them across layers you have to do a skb_clone()
 	 * first. This is owned by whoever has the skb queued ATM.
 	 */
-	char			cb[48];
+	char			cb[48] __aligned(8);
 
+	unsigned long		_skb_dst;
+#ifdef CONFIG_XFRM
+	struct	sec_path	*sp;
+#endif
 	unsigned int		len,
 				data_len;
 	__u16			mac_len,
-- 
cgit v1.2.3


From 44ee63587dce85593c22497140db16f4e5027860 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 17 Feb 2010 10:50:50 +0900
Subject: percpu: Add __percpu sparse annotations to hw_breakpoint

Add __percpu sparse annotations to hw_breakpoint.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

In kernel/hw_breakpoint.c, per_cpu(nr_task_bp_pinned, cpu)'s will
trigger spurious noderef related warnings from sparse.  Changing it to
&per_cpu(nr_task_bp_pinned[0], cpu) will work around the problem but
deemed to ugly by the maintainer.  Leave it alone until better
solution can be found.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <4B7B4B7A.9050902@kernel.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/hw_breakpoint.h           |  8 ++++----
 kernel/hw_breakpoint.c                  | 10 +++++-----
 samples/hw_breakpoint/data_breakpoint.c |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 5977b724f7c6..c70d27af03f9 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -66,14 +66,14 @@ register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	triggered,
 				int cpu);
 
-extern struct perf_event **
+extern struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered);
 
 extern int register_perf_hw_breakpoint(struct perf_event *bp);
 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
 extern void unregister_hw_breakpoint(struct perf_event *bp);
-extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
+extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events);
 
 extern int dbg_reserve_bp_slot(struct perf_event *bp);
 extern int dbg_release_bp_slot(struct perf_event *bp);
@@ -100,7 +100,7 @@ static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	 triggered,
 				int cpu)		{ return NULL; }
-static inline struct perf_event **
+static inline struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered)	{ return NULL; }
 static inline int
@@ -109,7 +109,7 @@ static inline int
 __register_perf_hw_breakpoint(struct perf_event *bp) 	{ return -ENOSYS; }
 static inline void unregister_hw_breakpoint(struct perf_event *bp)	{ }
 static inline void
-unregister_wide_hw_breakpoint(struct perf_event **cpu_events)		{ }
+unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)	{ }
 static inline int
 reserve_bp_slot(struct perf_event *bp)			{return -ENOSYS; }
 static inline void release_bp_slot(struct perf_event *bp) 		{ }
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 967e66143e11..6542eacb3fa5 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -413,17 +413,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
  *
  * @return a set of per_cpu pointers to perf events
  */
-struct perf_event **
+struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered)
 {
-	struct perf_event **cpu_events, **pevent, *bp;
+	struct perf_event * __percpu *cpu_events, **pevent, *bp;
 	long err;
 	int cpu;
 
 	cpu_events = alloc_percpu(typeof(*cpu_events));
 	if (!cpu_events)
-		return ERR_PTR(-ENOMEM);
+		return (void __percpu __force *)ERR_PTR(-ENOMEM);
 
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
@@ -451,7 +451,7 @@ fail:
 	put_online_cpus();
 
 	free_percpu(cpu_events);
-	return ERR_PTR(err);
+	return (void __percpu __force *)ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 
@@ -459,7 +459,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
  * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
  * @cpu_events: the per cpu set of events to unregister
  */
-void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
+void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 {
 	int cpu;
 	struct perf_event **pevent;
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index c69cbe9b2426..bd0f337afcab 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -34,7 +34,7 @@
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
 
-struct perf_event **sample_hbp;
+struct perf_event * __percpu *sample_hbp;
 
 static char ksym_name[KSYM_NAME_LEN] = "pid_max";
 module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
@@ -61,8 +61,8 @@ static int __init hw_break_module_init(void)
 	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 
 	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
-	if (IS_ERR(sample_hbp)) {
-		ret = PTR_ERR(sample_hbp);
+	if (IS_ERR((void __force *)sample_hbp)) {
+		ret = PTR_ERR((void __force *)sample_hbp);
 		goto fail;
 	}
 
-- 
cgit v1.2.3


From dd8b1cf681eab40bc5afb67bdd06b2ca341f5669 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 27 Feb 2010 17:10:39 +0100
Subject: perf: Remove pointless breakpoint union

Remove pointless union in the breakpoint field of hw_perf_event.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
---
 include/linux/perf_event.h | 5 ++---
 lib/Kconfig.debug          | 8 +++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7b18b4fd5df7..04f06b4be297 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -487,9 +487,8 @@ struct hw_perf_event {
 			struct hrtimer	hrtimer;
 		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-		union { /* breakpoint */
-			struct arch_hw_breakpoint	info;
-		};
+		/* breakpoint */
+		struct arch_hw_breakpoint	info;
 #endif
 	};
 	atomic64_t			prev_count;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 65f964e7fe78..4dc24cc13f5c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -520,11 +520,13 @@ config LOCK_STAT
 
 	 For more details, see Documentation/lockstat.txt
 
-	 You can analyze lock events with "perf lock", subcommand of perf.
-	 If you want to use "perf lock", you need to turn on CONFIG_EVENT_TRACING.
+	 This also enables lock events required by "perf lock",
+	 subcommand of perf.
+	 If you want to use "perf lock", you also need to turn on
+	 CONFIG_EVENT_TRACING.
 
 	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
- 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
+	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
 
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
-- 
cgit v1.2.3


From 1883c79a57a5fe25309007590cccb1b2782c41b2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 27 Feb 2010 14:53:08 -0800
Subject: rcu: Make task_subsys_state() RCU-lockdep checks handle boot-time use

It is apparently legal to invoke task_subsys_state() without RCU
protection during early boot time.  After all, there are no
concurrent tasks, so there can be no grace periods completing
concurrently.

But this does need an Acked-by from the cgroups folks.

Located-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267311188-16603-2-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c9bbcb2a75ae..a73e1ced09b8 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -489,6 +489,7 @@ static inline struct cgroup_subsys_state *task_subsys_state(
 {
 	return rcu_dereference_check(task->cgroups->subsys[subsys_id],
 				     rcu_read_lock_held() ||
+				     !rcu_scheduler_active ||
 				     cgroup_lock_is_held());
 }
 
-- 
cgit v1.2.3


From a2ce766238f72ff7337606c0bc96803c30c9e05c Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Fri, 26 Feb 2010 14:04:39 +0000
Subject: pci: Add PCI LRDT tag size and section size

This patch adds a preprocessor constant to describe the PCI VPD large
resource data type tag size and an inline function to extract the large
resource section size from the large resource data type tag.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c  |  8 +++++---
 drivers/net/tg3.c   | 13 ++++++-------
 include/linux/pci.h | 14 ++++++++++++++
 3 files changed, 25 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d3f739a295df..bb403887b549 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -7773,15 +7773,17 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 		unsigned int block_end;
 
 		if (val == 0x82 || val == 0x91) {
-			i = (i + 3 + (data[i + 1] + (data[i + 2] << 8)));
+			i += PCI_VPD_LRDT_TAG_SIZE +
+			     pci_vpd_lrdt_size(&data[i]);
 			continue;
 		}
 
 		if (val != 0x90)
 			goto vpd_done;
 
-		block_end = (i + 3 + (data[i + 1] + (data[i + 2] << 8)));
-		i += 3;
+		block_end = (i + PCI_VPD_LRDT_TAG_SIZE +
+			    pci_vpd_lrdt_size(&data[i]));
+		i += PCI_VPD_LRDT_TAG_SIZE;
 
 		if (block_end > BNX2_VPD_LEN)
 			goto vpd_done;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 0d06e4007f44..5fccbe459949 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12590,19 +12590,18 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 		unsigned int block_end;
 
 		if (val == 0x82 || val == 0x91) {
-			i = (i + 3 +
-			     (vpd_data[i + 1] +
-			      (vpd_data[i + 2] << 8)));
+			i += PCI_VPD_LRDT_TAG_SIZE +
+			     pci_vpd_lrdt_size(&vpd_data[i]);
 			continue;
 		}
 
 		if (val != 0x90)
 			goto out_not_found;
 
-		block_end = (i + 3 +
-			     (vpd_data[i + 1] +
-			      (vpd_data[i + 2] << 8)));
-		i += 3;
+		block_end = i + PCI_VPD_LRDT_TAG_SIZE +
+			    pci_vpd_lrdt_size(&vpd_data[i]);
+
+		i += PCI_VPD_LRDT_TAG_SIZE;
 
 		if (block_end > TG3_NVM_VPD_LEN)
 			goto out_not_found;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e2575f86133a..6f62a499023f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1349,5 +1349,19 @@ static inline bool pci_is_pcie(struct pci_dev *dev)
 
 void pci_request_acs(void);
 
+
+#define PCI_VPD_LRDT_TAG_SIZE	3
+
+/**
+ * pci_vpd_lrdt_size - Extracts the Large Resource Data Type length
+ * @lrdt: Pointer to the beginning of the Large Resource Data Type tag
+ *
+ * Returns the extracted Large Resource Data Type length.
+ */
+static inline u16 pci_vpd_lrdt_size(const u8 *lrdt)
+{
+	return (u16)lrdt[1] + ((u16)lrdt[2] << 8);
+}
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From 7ad506fa1adc2da3d394c562f09b8e1b3026c402 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Fri, 26 Feb 2010 14:04:40 +0000
Subject: pci: Add large and small resource data type code

This patch introduces more VPD preprocessor definitions to identify some
small and large resource data type item names.  The patch then continues
to correct how the tg3 and bnx2 drivers search for the "read-only data"
large resource data type.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c  | 23 +++++++++++++++++------
 drivers/net/tg3.c   | 23 +++++++++++++++++------
 include/linux/pci.h | 34 +++++++++++++++++++++++++++++++++-
 3 files changed, 67 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index bb403887b549..084ef102b8c4 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -7772,15 +7772,26 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 		unsigned char val = data[i];
 		unsigned int block_end;
 
-		if (val == 0x82 || val == 0x91) {
-			i += PCI_VPD_LRDT_TAG_SIZE +
-			     pci_vpd_lrdt_size(&data[i]);
+		if (val & PCI_VPD_LRDT) {
+			if (i + PCI_VPD_LRDT_TAG_SIZE > BNX2_VPD_LEN)
+				break;
+
+			if (val != PCI_VPD_LRDT_RO_DATA) {
+				i += PCI_VPD_LRDT_TAG_SIZE +
+				     pci_vpd_lrdt_size(&data[i]);
+
+				continue;
+			}
+		} else {
+			if ((val & PCI_VPD_SRDT_TIN_MASK) == PCI_VPD_STIN_END)
+				break;
+
+			i += PCI_VPD_SRDT_TAG_SIZE +
+			     pci_vpd_srdt_size(&data[i]);
+
 			continue;
 		}
 
-		if (val != 0x90)
-			goto vpd_done;
-
 		block_end = (i + PCI_VPD_LRDT_TAG_SIZE +
 			    pci_vpd_lrdt_size(&data[i]));
 		i += PCI_VPD_LRDT_TAG_SIZE;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 5fccbe459949..ed57a62b3ac8 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12589,15 +12589,26 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 		unsigned char val = vpd_data[i];
 		unsigned int block_end;
 
-		if (val == 0x82 || val == 0x91) {
-			i += PCI_VPD_LRDT_TAG_SIZE +
-			     pci_vpd_lrdt_size(&vpd_data[i]);
+		if (val & PCI_VPD_LRDT) {
+			if (i + PCI_VPD_LRDT_TAG_SIZE > TG3_NVM_VPD_LEN)
+				break;
+
+			if (val != PCI_VPD_LRDT_RO_DATA) {
+				i += PCI_VPD_LRDT_TAG_SIZE +
+				     pci_vpd_lrdt_size(&vpd_data[i]);
+
+				continue;
+			}
+		} else {
+			if ((val & PCI_VPD_SRDT_TIN_MASK) == PCI_VPD_STIN_END)
+				break;
+
+			i += PCI_VPD_SRDT_TAG_SIZE +
+			     pci_vpd_srdt_size(&vpd_data[i]);
+
 			continue;
 		}
 
-		if (val != 0x90)
-			goto out_not_found;
-
 		block_end = i + PCI_VPD_LRDT_TAG_SIZE +
 			    pci_vpd_lrdt_size(&vpd_data[i]);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6f62a499023f..198d062640b7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1350,7 +1350,28 @@ static inline bool pci_is_pcie(struct pci_dev *dev)
 void pci_request_acs(void);
 
 
-#define PCI_VPD_LRDT_TAG_SIZE	3
+#define PCI_VPD_LRDT			0x80	/* Large Resource Data Type */
+#define PCI_VPD_LRDT_ID(x)		(x | PCI_VPD_LRDT)
+
+/* Large Resource Data Type Tag Item Names */
+#define PCI_VPD_LTIN_ID_STRING		0x02	/* Identifier String */
+#define PCI_VPD_LTIN_RO_DATA		0x10	/* Read-Only Data */
+#define PCI_VPD_LTIN_RW_DATA		0x11	/* Read-Write Data */
+
+#define PCI_VPD_LRDT_ID_STRING		PCI_VPD_LRDT_ID(PCI_VPD_LTIN_ID_STRING)
+#define PCI_VPD_LRDT_RO_DATA		PCI_VPD_LRDT_ID(PCI_VPD_LTIN_RO_DATA)
+#define PCI_VPD_LRDT_RW_DATA		PCI_VPD_LRDT_ID(PCI_VPD_LTIN_RW_DATA)
+
+/* Small Resource Data Type Tag Item Names */
+#define PCI_VPD_STIN_END		0x78	/* End */
+
+#define PCI_VPD_SRDT_END		PCI_VPD_STIN_END
+
+#define PCI_VPD_SRDT_TIN_MASK		0x78
+#define PCI_VPD_SRDT_LEN_MASK		0x07
+
+#define PCI_VPD_LRDT_TAG_SIZE		3
+#define PCI_VPD_SRDT_TAG_SIZE		1
 
 /**
  * pci_vpd_lrdt_size - Extracts the Large Resource Data Type length
@@ -1363,5 +1384,16 @@ static inline u16 pci_vpd_lrdt_size(const u8 *lrdt)
 	return (u16)lrdt[1] + ((u16)lrdt[2] << 8);
 }
 
+/**
+ * pci_vpd_srdt_size - Extracts the Small Resource Data Type length
+ * @lrdt: Pointer to the beginning of the Small Resource Data Type tag
+ *
+ * Returns the extracted Small Resource Data Type length.
+ */
+static inline u8 pci_vpd_srdt_size(const u8 *srdt)
+{
+	return (*srdt) & PCI_VPD_SRDT_LEN_MASK;
+}
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From b55ac1b22690d2e5b02a61cf6d69c2d66969c79d Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Fri, 26 Feb 2010 14:04:41 +0000
Subject: pci: Add helper to find a VPD resource data type

This patch adds the pci_vpd_find_tag() helper function to find VPD
resource data types in a buffer.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c   | 24 ++++--------------------
 drivers/net/tg3.c    | 26 +++++---------------------
 drivers/pci/Makefile |  2 +-
 drivers/pci/vpd.c    | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h  | 12 ++++++++++++
 5 files changed, 65 insertions(+), 42 deletions(-)
 create mode 100644 drivers/pci/vpd.c

(limited to 'include/linux')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 084ef102b8c4..fd43feb5a350 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -7769,28 +7769,12 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 	}
 
 	for (i = 0; i <= BNX2_VPD_LEN - 3; ) {
-		unsigned char val = data[i];
 		unsigned int block_end;
 
-		if (val & PCI_VPD_LRDT) {
-			if (i + PCI_VPD_LRDT_TAG_SIZE > BNX2_VPD_LEN)
-				break;
-
-			if (val != PCI_VPD_LRDT_RO_DATA) {
-				i += PCI_VPD_LRDT_TAG_SIZE +
-				     pci_vpd_lrdt_size(&data[i]);
-
-				continue;
-			}
-		} else {
-			if ((val & PCI_VPD_SRDT_TIN_MASK) == PCI_VPD_STIN_END)
-				break;
-
-			i += PCI_VPD_SRDT_TAG_SIZE +
-			     pci_vpd_srdt_size(&data[i]);
-
-			continue;
-		}
+		i = pci_vpd_find_tag(data, i, BNX2_VPD_LEN,
+				     PCI_VPD_LRDT_RO_DATA);
+		if (i < 0)
+			break;
 
 		block_end = (i + PCI_VPD_LRDT_TAG_SIZE +
 			    pci_vpd_lrdt_size(&data[i]));
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index ed57a62b3ac8..76ad141ab448 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12547,7 +12547,7 @@ skip_phy_reset:
 static void __devinit tg3_read_partno(struct tg3 *tp)
 {
 	unsigned char vpd_data[TG3_NVM_VPD_LEN];   /* in little-endian format */
-	unsigned int i;
+	int i;
 	u32 magic;
 
 	if ((tp->tg3_flags3 & TG3_FLG3_NO_NVRAM) ||
@@ -12586,28 +12586,12 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 
 	/* Now parse and find the part number. */
 	for (i = 0; i < TG3_NVM_VPD_LEN - 2; ) {
-		unsigned char val = vpd_data[i];
 		unsigned int block_end;
 
-		if (val & PCI_VPD_LRDT) {
-			if (i + PCI_VPD_LRDT_TAG_SIZE > TG3_NVM_VPD_LEN)
-				break;
-
-			if (val != PCI_VPD_LRDT_RO_DATA) {
-				i += PCI_VPD_LRDT_TAG_SIZE +
-				     pci_vpd_lrdt_size(&vpd_data[i]);
-
-				continue;
-			}
-		} else {
-			if ((val & PCI_VPD_SRDT_TIN_MASK) == PCI_VPD_STIN_END)
-				break;
-
-			i += PCI_VPD_SRDT_TAG_SIZE +
-			     pci_vpd_srdt_size(&vpd_data[i]);
-
-			continue;
-		}
+		i = pci_vpd_find_tag(vpd_data, i, TG3_NVM_VPD_LEN,
+				     PCI_VPD_LRDT_RO_DATA);
+		if (i < 0)
+			break;
 
 		block_end = i + PCI_VPD_LRDT_TAG_SIZE +
 			    pci_vpd_lrdt_size(&vpd_data[i]);
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 4df48d58eaa6..b2f6d777a084 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -4,7 +4,7 @@
 
 obj-y		+= access.o bus.o probe.o remove.o pci.o quirks.o \
 			pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \
-			irq.o
+			irq.o vpd.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSFS) += slot.o
 
diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c
new file mode 100644
index 000000000000..6bc554576f8d
--- /dev/null
+++ b/drivers/pci/vpd.c
@@ -0,0 +1,43 @@
+/*
+ * File:	vpd.c
+ * Purpose:	Provide PCI VPD support
+ *
+ * Copyright (C) 2010 Broadcom Corporation.
+ */
+
+#include <linux/pci.h>
+
+int pci_vpd_find_tag(const u8 *buf, unsigned int off, unsigned int len, u8 rdt)
+{
+	int i;
+
+	for (i = off; i < len; ) {
+		u8 val = buf[i];
+
+		if (val & PCI_VPD_LRDT) {
+			/* Don't return success of the tag isn't complete */
+			if (i + PCI_VPD_LRDT_TAG_SIZE > len)
+				break;
+
+			if (val == rdt)
+				return i;
+
+			i += PCI_VPD_LRDT_TAG_SIZE +
+			     pci_vpd_lrdt_size(&buf[i]);
+		} else {
+			u8 tag = val & ~PCI_VPD_SRDT_LEN_MASK;
+
+			if (tag == rdt)
+				return i;
+
+			if (tag == PCI_VPD_SRDT_END)
+				break;
+
+			i += PCI_VPD_SRDT_TAG_SIZE +
+			     pci_vpd_srdt_size(&buf[i]);
+		}
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(pci_vpd_find_tag);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 198d062640b7..e30ceea7345b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1395,5 +1395,17 @@ static inline u8 pci_vpd_srdt_size(const u8 *srdt)
 	return (*srdt) & PCI_VPD_SRDT_LEN_MASK;
 }
 
+/**
+ * pci_vpd_find_tag - Locates the Resource Data Type tag provided
+ * @buf: Pointer to buffered vpd data
+ * @off: The offset into the buffer at which to begin the search
+ * @len: The length of the vpd buffer
+ * @rdt: The Resource Data Type to search for
+ *
+ * Returns the index where the Resource Data Type was found or
+ * -ENOENT otherwise.
+ */
+int pci_vpd_find_tag(const u8 *buf, unsigned int off, unsigned int len, u8 rdt);
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From e1d5bdabb94da89bdb3c3f2ee105cf61fca88ec8 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Fri, 26 Feb 2010 14:04:42 +0000
Subject: pci: Add VPD information field helper functions

This patch adds a preprocessor constant to describe the PCI VPD
information field header size and an inline function to extract the
size of the information field itself.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c  | 11 ++++++-----
 drivers/net/tg3.c   |  7 ++++---
 include/linux/pci.h | 13 +++++++++++++
 3 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index fd43feb5a350..b808707f83ff 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -7784,14 +7784,15 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 			goto vpd_done;
 
 		while (i < (block_end - 2)) {
-			int len = data[i + 2];
+			int len = pci_vpd_info_field_size(&data[i]);
 
-			if (i + 3 + len > block_end)
+			if (i + PCI_VPD_INFO_FLD_HDR_SIZE + len > block_end)
 				goto vpd_done;
 
 			if (data[i] == 'M' && data[i + 1] == 'N') {
 				if (len != 4 ||
-				    memcmp(&data[i + 3], "1028", 4))
+				    memcmp(&data[i + PCI_VPD_INFO_FLD_HDR_SIZE],
+					   "1028", 4))
 					goto vpd_done;
 				mn_match = true;
 
@@ -7800,9 +7801,9 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 					goto vpd_done;
 
 				v0_len = len;
-				v0_str = &data[i + 3];
+				v0_str = &data[i + PCI_VPD_INFO_FLD_HDR_SIZE];
 			}
-			i += 3 + len;
+			i += PCI_VPD_INFO_FLD_HDR_SIZE + len;
 
 			if (mn_match && v0_str) {
 				memcpy(bp->fw_version, v0_str, v0_len);
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 76ad141ab448..f59f36910e98 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12604,9 +12604,9 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 		while (i < (block_end - 2)) {
 			if (vpd_data[i + 0] == 'P' &&
 			    vpd_data[i + 1] == 'N') {
-				int partno_len = vpd_data[i + 2];
+				int partno_len = pci_vpd_info_field_size(&vpd_data[i]);
 
-				i += 3;
+				i += PCI_VPD_INFO_FLD_HDR_SIZE;
 				if (partno_len > TG3_BPN_SIZE ||
 				    (partno_len + i) > TG3_NVM_VPD_LEN)
 					goto out_not_found;
@@ -12617,7 +12617,8 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 				/* Success. */
 				return;
 			}
-			i += 3 + vpd_data[i + 2];
+			i += PCI_VPD_INFO_FLD_HDR_SIZE +
+			     pci_vpd_info_field_size(&vpd_data[i]);
 		}
 
 		/* Part number not found. */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e30ceea7345b..cfff32fc6e35 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1373,6 +1373,8 @@ void pci_request_acs(void);
 #define PCI_VPD_LRDT_TAG_SIZE		3
 #define PCI_VPD_SRDT_TAG_SIZE		1
 
+#define PCI_VPD_INFO_FLD_HDR_SIZE	3
+
 /**
  * pci_vpd_lrdt_size - Extracts the Large Resource Data Type length
  * @lrdt: Pointer to the beginning of the Large Resource Data Type tag
@@ -1395,6 +1397,17 @@ static inline u8 pci_vpd_srdt_size(const u8 *srdt)
 	return (*srdt) & PCI_VPD_SRDT_LEN_MASK;
 }
 
+/**
+ * pci_vpd_info_field_size - Extracts the information field length
+ * @lrdt: Pointer to the beginning of an information field header
+ *
+ * Returns the extracted information field length.
+ */
+static inline u8 pci_vpd_info_field_size(const u8 *info_field)
+{
+	return info_field[2];
+}
+
 /**
  * pci_vpd_find_tag - Locates the Resource Data Type tag provided
  * @buf: Pointer to buffered vpd data
-- 
cgit v1.2.3


From 4067a8541d397e9d6b443dd2ce0ecb78bfd991db Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Fri, 26 Feb 2010 14:04:43 +0000
Subject: pci: Add helper to search for VPD keywords

This patch adds the pci_vpd_find_info_keyword() helper function to
find information field keywords within read-only and read-write large
resource data type sections.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c  | 54 +++++++++++++++++++++++++++++------------------------
 drivers/net/tg3.c   | 32 +++++++++++++------------------
 drivers/pci/vpd.c   | 18 ++++++++++++++++++
 include/linux/pci.h | 17 +++++++++++++++++
 4 files changed, 78 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index b808707f83ff..bc23bfb687c3 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -7769,48 +7769,54 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 	}
 
 	for (i = 0; i <= BNX2_VPD_LEN - 3; ) {
-		unsigned int block_end;
+		int j;
+		unsigned int block_end, rosize;
 
 		i = pci_vpd_find_tag(data, i, BNX2_VPD_LEN,
 				     PCI_VPD_LRDT_RO_DATA);
 		if (i < 0)
 			break;
 
-		block_end = (i + PCI_VPD_LRDT_TAG_SIZE +
-			    pci_vpd_lrdt_size(&data[i]));
+		rosize = pci_vpd_lrdt_size(&data[i]);
+		block_end = i + PCI_VPD_LRDT_TAG_SIZE + rosize;
 		i += PCI_VPD_LRDT_TAG_SIZE;
 
 		if (block_end > BNX2_VPD_LEN)
 			goto vpd_done;
 
-		while (i < (block_end - 2)) {
-			int len = pci_vpd_info_field_size(&data[i]);
+		j = pci_vpd_find_info_keyword(data, i, rosize,
+					      PCI_VPD_RO_KEYWORD_MFR_ID);
+		if (j > 0) {
+			int len = pci_vpd_info_field_size(&data[j]);
 
-			if (i + PCI_VPD_INFO_FLD_HDR_SIZE + len > block_end)
+			if (j + PCI_VPD_INFO_FLD_HDR_SIZE + len > block_end ||
+			    len != 4 ||
+			    memcmp(&data[j + PCI_VPD_INFO_FLD_HDR_SIZE],
+				   "1028", 4))
 				goto vpd_done;
 
-			if (data[i] == 'M' && data[i + 1] == 'N') {
-				if (len != 4 ||
-				    memcmp(&data[i + PCI_VPD_INFO_FLD_HDR_SIZE],
-					   "1028", 4))
-					goto vpd_done;
-				mn_match = true;
+			mn_match = true;
+		}
 
-			} else if (data[i] == 'V' && data[i + 1] == '0') {
-				if (len > BNX2_MAX_VER_SLEN)
-					goto vpd_done;
+		j = pci_vpd_find_info_keyword(data, i, rosize,
+					      PCI_VPD_RO_KEYWORD_VENDOR0);
+		if (j > 0) {
+			int len = pci_vpd_info_field_size(&data[j]);
 
-				v0_len = len;
-				v0_str = &data[i + PCI_VPD_INFO_FLD_HDR_SIZE];
-			}
-			i += PCI_VPD_INFO_FLD_HDR_SIZE + len;
-
-			if (mn_match && v0_str) {
-				memcpy(bp->fw_version, v0_str, v0_len);
-				bp->fw_version[v0_len] = ' ';
+			j += PCI_VPD_INFO_FLD_HDR_SIZE;
+			if (j + len > block_end || len > BNX2_MAX_VER_SLEN)
 				goto vpd_done;
-			}
+
+			v0_len = len;
+			v0_str = &data[j];
 		}
+
+		if (mn_match && v0_str) {
+			memcpy(bp->fw_version, v0_str, v0_len);
+			bp->fw_version[v0_len] = ' ';
+			goto vpd_done;
+		}
+
 		goto vpd_done;
 	}
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index f59f36910e98..204c565caa5a 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12586,39 +12586,33 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 
 	/* Now parse and find the part number. */
 	for (i = 0; i < TG3_NVM_VPD_LEN - 2; ) {
-		unsigned int block_end;
+		unsigned int block_end, rosize;
 
 		i = pci_vpd_find_tag(vpd_data, i, TG3_NVM_VPD_LEN,
 				     PCI_VPD_LRDT_RO_DATA);
 		if (i < 0)
 			break;
 
-		block_end = i + PCI_VPD_LRDT_TAG_SIZE +
-			    pci_vpd_lrdt_size(&vpd_data[i]);
-
+		rosize = pci_vpd_lrdt_size(&vpd_data[i]);
+		block_end = i + PCI_VPD_LRDT_TAG_SIZE + rosize;
 		i += PCI_VPD_LRDT_TAG_SIZE;
 
 		if (block_end > TG3_NVM_VPD_LEN)
 			goto out_not_found;
 
-		while (i < (block_end - 2)) {
-			if (vpd_data[i + 0] == 'P' &&
-			    vpd_data[i + 1] == 'N') {
-				int partno_len = pci_vpd_info_field_size(&vpd_data[i]);
+		i = pci_vpd_find_info_keyword(vpd_data, i, rosize,
+					      PCI_VPD_RO_KEYWORD_PARTNO);
+		if (i > 0) {
+			u8 len = pci_vpd_info_field_size(&vpd_data[i]);
 
-				i += PCI_VPD_INFO_FLD_HDR_SIZE;
-				if (partno_len > TG3_BPN_SIZE ||
-				    (partno_len + i) > TG3_NVM_VPD_LEN)
-					goto out_not_found;
+			i += PCI_VPD_INFO_FLD_HDR_SIZE;
+			if (len > TG3_BPN_SIZE ||
+			    (len + i) > TG3_NVM_VPD_LEN)
+				break;
 
-				memcpy(tp->board_part_number,
-				       &vpd_data[i], partno_len);
+			memcpy(tp->board_part_number, &vpd_data[i], len);
 
-				/* Success. */
-				return;
-			}
-			i += PCI_VPD_INFO_FLD_HDR_SIZE +
-			     pci_vpd_info_field_size(&vpd_data[i]);
+			return;
 		}
 
 		/* Part number not found. */
diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c
index 6bc554576f8d..a5a5ca17cfe6 100644
--- a/drivers/pci/vpd.c
+++ b/drivers/pci/vpd.c
@@ -41,3 +41,21 @@ int pci_vpd_find_tag(const u8 *buf, unsigned int off, unsigned int len, u8 rdt)
 	return -ENOENT;
 }
 EXPORT_SYMBOL_GPL(pci_vpd_find_tag);
+
+int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
+			      unsigned int len, const char *kw)
+{
+	int i;
+
+	for (i = off; i + PCI_VPD_INFO_FLD_HDR_SIZE <= off + len;) {
+		if (buf[i + 0] == kw[0] &&
+		    buf[i + 1] == kw[1])
+			return i;
+
+		i += PCI_VPD_INFO_FLD_HDR_SIZE +
+		     pci_vpd_info_field_size(&buf[i]);
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(pci_vpd_find_info_keyword);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cfff32fc6e35..1f4a52131c99 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1375,6 +1375,10 @@ void pci_request_acs(void);
 
 #define PCI_VPD_INFO_FLD_HDR_SIZE	3
 
+#define PCI_VPD_RO_KEYWORD_PARTNO	"PN"
+#define PCI_VPD_RO_KEYWORD_MFR_ID	"MN"
+#define PCI_VPD_RO_KEYWORD_VENDOR0	"V0"
+
 /**
  * pci_vpd_lrdt_size - Extracts the Large Resource Data Type length
  * @lrdt: Pointer to the beginning of the Large Resource Data Type tag
@@ -1420,5 +1424,18 @@ static inline u8 pci_vpd_info_field_size(const u8 *info_field)
  */
 int pci_vpd_find_tag(const u8 *buf, unsigned int off, unsigned int len, u8 rdt);
 
+/**
+ * pci_vpd_find_info_keyword - Locates an information field keyword in the VPD
+ * @buf: Pointer to buffered vpd data
+ * @off: The offset into the buffer at which to begin the search
+ * @len: The length of the buffer area, relative to off, in which to search
+ * @kw: The keyword to search for
+ *
+ * Returns the index where the information field keyword was found or
+ * -ENOENT otherwise.
+ */
+int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
+			      unsigned int len, const char *kw);
+
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From 76bd061f5c7b7550cdaed68ad6219ea7cee288fc Mon Sep 17 00:00:00 2001
From: "Steven J. Magnani" <steve@digidescorp.com>
Date: Sun, 28 Feb 2010 22:18:16 -0700
Subject: fsldma: Fix cookie issues

fsl_dma_update_completed_cookie() appears to calculate the last completed
cookie incorrectly in the corner case where DMA on cookie 1 is in progress
just following a cookie wrap.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Acked-by: Ira W. Snyder <iws@ovro.caltech.edu>
[dan.j.williams@intel.com: fix an integer overflow warning with INT_MAX]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/fsldma.c      | 5 ++++-
 include/linux/dmaengine.h | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 6541ebf8bf63..bbb4be5a3ff4 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -819,8 +819,11 @@ static void fsl_dma_update_completed_cookie(struct fsldma_chan *chan)
 	desc = to_fsl_desc(chan->ld_running.prev);
 	if (dma_is_idle(chan))
 		cookie = desc->async_tx.cookie;
-	else
+	else {
 		cookie = desc->async_tx.cookie - 1;
+		if (unlikely(cookie < DMA_MIN_COOKIE))
+			cookie = DMA_MAX_COOKIE;
+	}
 
 	chan->completed_cookie = cookie;
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 78784982b33e..4d8d619f28bc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -31,6 +31,8 @@
  * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code
  */
 typedef s32 dma_cookie_t;
+#define DMA_MIN_COOKIE	1
+#define DMA_MAX_COOKIE	INT_MAX
 
 #define dma_submit_error(cookie) ((cookie) < 0 ? 1 : 0)
 
-- 
cgit v1.2.3


From 6a9ee8af344e3bd7dbd61e67037096cdf7f83289 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Mon, 1 Feb 2010 15:38:10 +1000
Subject: vga_switcheroo: initial implementation (v15)

Many new laptops now come with 2 gpus, one to be used for low power
modes and one for gaming/on-ac applications. These GPUs are typically
wired to the laptop panel and VGA ports via a multiplexer unit which
is controlled via ACPI methods.

4 combinations of systems typically exist - with 2 ACPI methods.
Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method
ATI/ATI - some ASUS - use ATPX ACPI Method
Intel/Nvidia - - use _DSM ACPI method
Nvidia/Nvidia -  - use _DSM ACPI method.

TODO:
This patch adds support for the ATPX method and initial bits
for the _DSM methods that need to written by someone with
access to the hardware.
Add a proper non-debugfs interface - need to get some proper
testing first.

v2: add power up/down support for both devices
on W500 puts i915/radeon into D3 and cuts power to radeon.

v3: redo probing methods, no DMI list, drm devices call to
register with switcheroo, it tries to find an ATPX method on
any device and once there is two devices + ATPX it inits the
switcher.

v4: ATPX msg handling using buffers - should work on more machines

v5: rearchitect after more mjg59 discussion - move ATPX handling to
    radeon driver.

v6: add file headers + initial nouveau bits (to be filled out).

v7: merge delayed switcher code.

v8: avoid suspend/resume of gpu that is off

v9: rearchitect - mjg59 is always right. - move all ATPX code to
radeon, should allow simpler DSM also proper ATRM handling

v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv

v11: fix bug in resuming Intel for 2nd time.

v12: start fixing up nvidia code blindly.

v13: blindly guess at finishing nvidia code

v14: remove radeon audio hacks - fix up intel resume more like upstream

v15: clean up printks + remove unnecessary igd/dis pointers

mount debugfs

/sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected
 + 2 cards.

DIS - immediate change to discrete
IGD - immediate change to IGD
DDIS - delayed change to discrete
DIGD - delayed change to IGD
ON - turn on not in use
OFF - turn off not in use

Tested on W500 (Intel/ATI) and T500 (Intel/ATI)

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/i915_dma.c              |  35 +++
 drivers/gpu/drm/i915/i915_drv.c              |   4 +-
 drivers/gpu/drm/i915/i915_drv.h              |   2 +
 drivers/gpu/drm/i915/intel_fb.c              |   2 +
 drivers/gpu/drm/nouveau/nouveau_acpi.c       | 160 +++++++---
 drivers/gpu/drm/nouveau/nouveau_drv.c        |   9 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h        |  19 +-
 drivers/gpu/drm/nouveau/nouveau_fbcon.c      |   2 +
 drivers/gpu/drm/nouveau/nouveau_state.c      |  32 +-
 drivers/gpu/drm/radeon/Makefile              |   3 +-
 drivers/gpu/drm/radeon/radeon.h              |   8 +
 drivers/gpu/drm/radeon/radeon_atpx_handler.c | 258 +++++++++++++++
 drivers/gpu/drm/radeon/radeon_bios.c         |  44 ++-
 drivers/gpu/drm/radeon/radeon_device.c       |  40 +++
 drivers/gpu/drm/radeon/radeon_drv.c          |   2 +
 drivers/gpu/drm/radeon/radeon_drv.h          |   3 +
 drivers/gpu/drm/radeon/radeon_fb.c           |   3 +
 drivers/gpu/drm/radeon/radeon_kms.c          |   3 +
 drivers/gpu/vga/Kconfig                      |  13 +
 drivers/gpu/vga/Makefile                     |   1 +
 drivers/gpu/vga/vga_switcheroo.c             | 453 +++++++++++++++++++++++++++
 drivers/video/console/fbcon.c                |  18 ++
 include/linux/fb.h                           |   2 +
 include/linux/vga_switcheroo.h               |  58 ++++
 24 files changed, 1104 insertions(+), 70 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/radeon_atpx_handler.c
 create mode 100644 drivers/gpu/vga/vga_switcheroo.c
 create mode 100644 include/linux/vga_switcheroo.h

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2307f98349f7..42ca07f04a21 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -35,6 +35,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 
 /* Really want an OS-independent resettable timer.  Would like to have
  * this loop run for (eg) 3 sec, but have the timer reset every time
@@ -1199,6 +1200,32 @@ static unsigned int i915_vga_set_decode(void *cookie, bool state)
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
+static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_INFO "i915: switched off\n");
+		/* i915 resume handler doesn't set to D0 */
+		pci_set_power_state(dev->pdev, PCI_D0);
+		i915_resume(dev);
+	} else {
+		printk(KERN_ERR "i915: switched off\n");
+		i915_suspend(dev, pmm);
+	}
+}
+
+static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
 static int i915_load_modeset_init(struct drm_device *dev,
 				  unsigned long prealloc_start,
 				  unsigned long prealloc_size,
@@ -1260,6 +1287,12 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	if (ret)
 		goto destroy_ringbuffer;
 
+	ret = vga_switcheroo_register_client(dev->pdev,
+					     i915_switcheroo_set_state,
+					     i915_switcheroo_can_switch);
+	if (ret)
+		goto destroy_ringbuffer;
+
 	intel_modeset_init(dev);
 
 	ret = drm_irq_install(dev);
@@ -1544,6 +1577,7 @@ int i915_driver_unload(struct drm_device *dev)
 			dev_priv->child_dev_num = 0;
 		}
 		drm_irq_uninstall(dev);
+		vga_switcheroo_unregister_client(dev->pdev);
 		vga_client_register(dev->pdev, NULL, NULL, NULL);
 	}
 
@@ -1611,6 +1645,7 @@ void i915_driver_lastclose(struct drm_device * dev)
 
 	if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) {
 		drm_fb_helper_restore();
+		vga_switcheroo_process_delayed_switch();
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index cf4cb3e9a0c2..fd739efe73ce 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -201,7 +201,7 @@ static int i915_drm_freeze(struct drm_device *dev)
 	return 0;
 }
 
-static int i915_suspend(struct drm_device *dev, pm_message_t state)
+int i915_suspend(struct drm_device *dev, pm_message_t state)
 {
 	int error;
 
@@ -255,7 +255,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 	return error;
 }
 
-static int i915_resume(struct drm_device *dev)
+int i915_resume(struct drm_device *dev)
 {
 	if (pci_enable_device(dev->pdev))
 		return -EIO;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b99b6a841d95..d77e56651352 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -736,6 +736,8 @@ extern unsigned int i915_fbpercrtc;
 extern unsigned int i915_powersave;
 extern unsigned int i915_lvds_downclock;
 
+extern int i915_suspend(struct drm_device *dev, pm_message_t state);
+extern int i915_resume(struct drm_device *dev);
 extern void i915_save_display(struct drm_device *dev);
 extern void i915_restore_display(struct drm_device *dev);
 extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index aaabbcbe5905..8cd791dc5b29 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/fb.h>
 #include <linux/init.h>
+#include <linux/vga_switcheroo.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -235,6 +236,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 			obj_priv->gtt_offset, fbo);
 
 	mutex_unlock(&dev->struct_mutex);
+	vga_switcheroo_client_fb_set(dev->pdev, info);
 	return 0;
 
 out_unpin:
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 48227e744753..0e0730a53137 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -11,6 +11,8 @@
 #include "nouveau_drm.h"
 #include "nv50_display.h"
 
+#include <linux/vga_switcheroo.h>
+
 #define NOUVEAU_DSM_SUPPORTED 0x00
 #define NOUVEAU_DSM_SUPPORTED_FUNCTIONS 0x00
 
@@ -28,31 +30,30 @@
 #define NOUVEAU_DSM_POWER_SPEED 0x01
 #define NOUVEAU_DSM_POWER_STAMINA 0x02
 
-static int nouveau_dsm(struct drm_device *dev, int func, int arg, int *result)
-{
-	static char muid[] = {
-		0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
-		0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
-	};
+static struct nouveau_dsm_priv {
+	bool dsm_detected;
+	acpi_handle dhandle;
+	acpi_handle dsm_handle;
+} nouveau_dsm_priv;
+
+static const char nouveau_dsm_muid[] = {
+	0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
+	0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
+};
 
-	struct pci_dev *pdev = dev->pdev;
-	struct acpi_handle *handle;
+static int nouveau_dsm(acpi_handle handle, int func, int arg, int *result)
+{
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_object_list input;
 	union acpi_object params[4];
 	union acpi_object *obj;
 	int err;
 
-	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
-
-	if (!handle)
-		return -ENODEV;
-
 	input.count = 4;
 	input.pointer = params;
 	params[0].type = ACPI_TYPE_BUFFER;
-	params[0].buffer.length = sizeof(muid);
-	params[0].buffer.pointer = (char *)muid;
+	params[0].buffer.length = sizeof(nouveau_dsm_muid);
+	params[0].buffer.pointer = (char *)nouveau_dsm_muid;
 	params[1].type = ACPI_TYPE_INTEGER;
 	params[1].integer.value = 0x00000102;
 	params[2].type = ACPI_TYPE_INTEGER;
@@ -62,7 +63,7 @@ static int nouveau_dsm(struct drm_device *dev, int func, int arg, int *result)
 
 	err = acpi_evaluate_object(handle, "_DSM", &input, &output);
 	if (err) {
-		NV_INFO(dev, "failed to evaluate _DSM: %d\n", err);
+		printk(KERN_INFO "failed to evaluate _DSM: %d\n", err);
 		return err;
 	}
 
@@ -86,40 +87,119 @@ static int nouveau_dsm(struct drm_device *dev, int func, int arg, int *result)
 	return 0;
 }
 
-int nouveau_hybrid_setup(struct drm_device *dev)
+static int nouveau_dsm_switch_mux(acpi_handle handle, int mux_id)
 {
-	int result;
-
-	if (nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STATE,
-								&result))
-		return -ENODEV;
-
-	NV_INFO(dev, "_DSM hardware status gave 0x%x\n", result);
-
-	if (result) { /* Ensure that the external GPU is enabled */
-		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_SPEED, NULL);
-		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_SPEED,
-									NULL);
-	} else { /* Stamina mode - disable the external GPU */
-		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_STAMINA,
-									NULL);
-		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STAMINA,
-									NULL);
-	}
+	return nouveau_dsm(handle, NOUVEAU_DSM_LED, mux_id, NULL);
+}
+
+static int nouveau_dsm_set_discrete_state(acpi_handle handle, enum vga_switcheroo_state state)
+{
+	int arg;
+	if (state == VGA_SWITCHEROO_ON)
+		arg = NOUVEAU_DSM_POWER_SPEED;
+	else
+		arg = NOUVEAU_DSM_POWER_STAMINA;
+	nouveau_dsm(handle, NOUVEAU_DSM_POWER, arg, NULL);
+	return 0;
+}
+
+static int nouveau_dsm_switchto(enum vga_switcheroo_client_id id)
+{
+	if (id == VGA_SWITCHEROO_IGD)
+		return nouveau_dsm_switch_mux(nouveau_dsm_priv.dsm_handle, NOUVEAU_DSM_LED_STAMINA);
+	else
+		return nouveau_dsm_switch_mux(nouveau_dsm_priv.dsm_handle, NOUVEAU_DSM_LED_SPEED);
+}
 
+static int nouveau_dsm_power_state(enum vga_switcheroo_client_id id,
+				   enum vga_switcheroo_state state)
+{
+	if (id == VGA_SWITCHEROO_IGD)
+		return 0;
+
+	return nouveau_dsm_set_discrete_state(nouveau_dsm_priv.dsm_handle, state);
+}
+
+static int nouveau_dsm_init(void)
+{
 	return 0;
 }
 
-bool nouveau_dsm_probe(struct drm_device *dev)
+static int nouveau_dsm_get_client_id(struct pci_dev *pdev)
 {
-	int support = 0;
+	if (nouveau_dsm_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return VGA_SWITCHEROO_IGD;
+	else
+		return VGA_SWITCHEROO_DIS;
+}
+
+static struct vga_switcheroo_handler nouveau_dsm_handler = {
+	.switchto = nouveau_dsm_switchto,
+	.power_state = nouveau_dsm_power_state,
+	.init = nouveau_dsm_init,
+	.get_client_id = nouveau_dsm_get_client_id,
+};
 
-	if (nouveau_dsm(dev, NOUVEAU_DSM_SUPPORTED,
-				NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &support))
+static bool nouveau_dsm_pci_probe(struct pci_dev *pdev)
+{
+	acpi_handle dhandle, nvidia_handle;
+	acpi_status status;
+	int ret;
+	uint32_t result;
+
+	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+	status = acpi_get_handle(dhandle, "_DSM", &nvidia_handle);
+	if (ACPI_FAILURE(status)) {
 		return false;
+	}
 
-	if (!support)
+	ret= nouveau_dsm(nvidia_handle, NOUVEAU_DSM_SUPPORTED,
+			 NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &result);
+	if (ret < 0)
 		return false;
 
+	nouveau_dsm_priv.dhandle = dhandle;
+	nouveau_dsm_priv.dsm_handle = nvidia_handle;
 	return true;
 }
+
+static bool nouveau_dsm_detect(void)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+	struct pci_dev *pdev = NULL;
+	int has_dsm = 0;
+	int vga_count = 0;
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_dsm |= (nouveau_dsm_pci_probe(pdev) == true);
+	}
+
+	if (vga_count == 2 && has_dsm) {
+		acpi_get_name(nouveau_dsm_priv.dsm_handle, ACPI_FULL_PATHNAME, &buffer);
+		printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n",
+		       acpi_method_name);
+		nouveau_dsm_priv.dsm_detected = true;
+		return true;
+	}
+	return false;
+}
+
+void nouveau_register_dsm_handler(void)
+{
+	bool r;
+
+	r = nouveau_dsm_detect();
+	if (!r)
+		return;
+
+	vga_switcheroo_register_handler(&nouveau_dsm_handler);
+}
+
+void nouveau_unregister_dsm_handler(void)
+{
+	vga_switcheroo_unregister_handler();
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index da3b93b84502..f83ec65addba 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -135,7 +135,7 @@ nouveau_pci_remove(struct pci_dev *pdev)
 	drm_put_dev(dev);
 }
 
-static int
+int
 nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -233,7 +233,7 @@ out_abort:
 	return ret;
 }
 
-static int
+int
 nouveau_pci_resume(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -402,8 +402,10 @@ static int __init nouveau_init(void)
 			nouveau_modeset = 1;
 	}
 
-	if (nouveau_modeset == 1)
+	if (nouveau_modeset == 1) {
 		driver.driver_features |= DRIVER_MODESET;
+		nouveau_register_dsm_handler();
+	}
 
 	return drm_init(&driver);
 }
@@ -411,6 +413,7 @@ static int __init nouveau_init(void)
 static void __exit nouveau_exit(void)
 {
 	drm_exit(&driver);
+	nouveau_unregister_dsm_handler();
 }
 
 module_init(nouveau_init);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 1c15ef37b71c..85c05feab4f0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -614,7 +614,6 @@ struct drm_nouveau_private {
 	} susres;
 
 	struct backlight_device *backlight;
-	bool acpi_dsm;
 
 	struct nouveau_channel *evo;
 
@@ -682,6 +681,9 @@ extern int nouveau_ignorelid;
 extern int nouveau_nofbaccel;
 extern int nouveau_noaccel;
 
+extern int nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state);
+extern int nouveau_pci_resume(struct pci_dev *pdev);
+
 /* nouveau_state.c */
 extern void nouveau_preclose(struct drm_device *dev, struct drm_file *);
 extern int  nouveau_load(struct drm_device *, unsigned long flags);
@@ -848,19 +850,8 @@ extern int  nouveau_dma_init(struct nouveau_channel *);
 extern int  nouveau_dma_wait(struct nouveau_channel *, int size);
 
 /* nouveau_acpi.c */
-#ifdef CONFIG_ACPI
-extern int nouveau_hybrid_setup(struct drm_device *dev);
-extern bool nouveau_dsm_probe(struct drm_device *dev);
-#else
-static inline int nouveau_hybrid_setup(struct drm_device *dev)
-{
-	return 0;
-}
-static inline bool nouveau_dsm_probe(struct drm_device *dev)
-{
-	return false;
-}
-#endif
+void nouveau_register_dsm_handler(void);
+void nouveau_unregister_dsm_handler(void);
 
 /* nouveau_backlight.c */
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index ea879a2efef3..1ebf22b664dd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -36,6 +36,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/screen_info.h>
+#include <linux/vga_switcheroo.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -370,6 +371,7 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width,
 						nvbo->bo.offset, nvbo);
 
 	mutex_unlock(&dev->struct_mutex);
+	vga_switcheroo_client_fb_set(dev->pdev, info);
 	return 0;
 
 out_unref:
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index a4851af5b05e..85d65b91389c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -29,6 +29,7 @@
 #include "drm_sarea.h"
 #include "drm_crtc_helper.h"
 #include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
@@ -371,6 +372,30 @@ out_err:
 	return ret;
 }
 
+static void nouveau_switcheroo_set_state(struct pci_dev *pdev,
+					 enum vga_switcheroo_state state)
+{
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_ERR "VGA switcheroo: switched nouveau on\n");
+		nouveau_pci_resume(pdev);
+	} else {
+		printk(KERN_ERR "VGA switcheroo: switched nouveau off\n");
+		nouveau_pci_suspend(pdev, pmm);
+	}
+}
+
+static bool nouveau_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
 int
 nouveau_card_init(struct drm_device *dev)
 {
@@ -384,6 +409,8 @@ nouveau_card_init(struct drm_device *dev)
 		return 0;
 
 	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
+	vga_switcheroo_register_client(dev->pdev, nouveau_switcheroo_set_state,
+				       nouveau_switcheroo_can_switch);
 
 	/* Initialise internal driver API hooks */
 	ret = nouveau_init_engine_ptrs(dev);
@@ -617,11 +644,6 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 	NV_DEBUG(dev, "vendor: 0x%X device: 0x%X class: 0x%X\n",
 		 dev->pci_vendor, dev->pci_device, dev->pdev->class);
 
-	dev_priv->acpi_dsm = nouveau_dsm_probe(dev);
-
-	if (dev_priv->acpi_dsm)
-		nouveau_hybrid_setup(dev);
-
 	dev_priv->wq = create_workqueue("nouveau");
 	if (!dev_priv->wq)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 1cc7b937b1ea..8e62fe13e31c 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -54,7 +54,8 @@ radeon-y += radeon_device.o radeon_kms.o \
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
 	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
-	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o
+	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
+	radeon_atpx_handler.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c0356bb193e5..a5dfb1557d3e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -118,6 +118,10 @@ struct radeon_device;
 /*
  * BIOS.
  */
+#define ATRM_BIOS_PAGE 4096
+
+bool radeon_atrm_supported(struct pci_dev *pdev);
+int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len);
 bool radeon_get_bios(struct radeon_device *rdev);
 
 
@@ -838,6 +842,8 @@ struct radeon_device {
 	int			audio_bits_per_sample;
 	uint8_t			audio_status_bits;
 	uint8_t			audio_category_code;
+
+	bool powered_down;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -1042,6 +1048,8 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
+extern int radeon_resume_kms(struct drm_device *dev);
+extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
 
 /* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */
 struct r100_mc_save {
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
new file mode 100644
index 000000000000..0ae52f19071d
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ * Licensed under GPLv2
+ *
+ * ATPX support for both Intel/ATI
+ */
+
+#include <linux/vga_switcheroo.h>
+#include <acpi/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <linux/pci.h>
+
+#define ATPX_VERSION 0
+#define ATPX_GPU_PWR 2
+#define ATPX_MUX_SELECT 3
+
+#define ATPX_INTEGRATED 0
+#define ATPX_DISCRETE 1
+
+#define ATPX_MUX_IGD 0
+#define ATPX_MUX_DISCRETE 1
+
+static struct radeon_atpx_priv {
+	bool atpx_detected;
+	/* handle for device - and atpx */
+	acpi_handle dhandle;
+	acpi_handle atpx_handle;
+	acpi_handle atrm_handle;
+} radeon_atpx_priv;
+
+/* retrieve the ROM in 4k blocks */
+static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
+			    int offset, int len)
+{
+	acpi_status status;
+	union acpi_object atrm_arg_elements[2], *obj;
+	struct acpi_object_list atrm_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
+
+	atrm_arg.count = 2;
+	atrm_arg.pointer = &atrm_arg_elements[0];
+
+	atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[0].integer.value = offset;
+
+	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[1].integer.value = len;
+
+	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+		return -ENODEV;
+	}
+
+	obj = (union acpi_object *)buffer.pointer;
+	memcpy(bios+offset, obj->buffer.pointer, len);
+	kfree(buffer.pointer);
+	return len;
+}
+
+bool radeon_atrm_supported(struct pci_dev *pdev)
+{
+	/* get the discrete ROM only via ATRM */
+	if (!radeon_atpx_priv.atpx_detected)
+		return false;
+
+	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return false;
+	return true;
+}
+
+
+int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len)
+{
+	return radeon_atrm_call(radeon_atpx_priv.atrm_handle, bios, offset, len);
+}
+
+static int radeon_atpx_get_version(acpi_handle handle)
+{
+	acpi_status status;
+	union acpi_object atpx_arg_elements[2], *obj;
+	struct acpi_object_list atpx_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	atpx_arg.count = 2;
+	atpx_arg.pointer = &atpx_arg_elements[0];
+
+	atpx_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[0].integer.value = ATPX_VERSION;
+
+	atpx_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[1].integer.value = ATPX_VERSION;
+
+	status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("%s: failed to call ATPX: %s\n", __func__, acpi_format_exception(status));
+		return -ENOSYS;
+	}
+	obj = (union acpi_object *)buffer.pointer;
+	if (obj && (obj->type == ACPI_TYPE_BUFFER))
+		printk(KERN_INFO "radeon atpx: version is %d\n", *((u8 *)(obj->buffer.pointer) + 2));
+	kfree(buffer.pointer);
+	return 0;
+}
+
+static int radeon_atpx_execute(acpi_handle handle, int cmd_id, u16 value)
+{
+	acpi_status status;
+	union acpi_object atpx_arg_elements[2];
+	struct acpi_object_list atpx_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	uint8_t buf[4] = {0};
+
+	if (!handle)
+		return -EINVAL;
+
+	atpx_arg.count = 2;
+	atpx_arg.pointer = &atpx_arg_elements[0];
+
+	atpx_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[0].integer.value = cmd_id;
+
+	buf[2] = value & 0xff;
+	buf[3] = (value >> 8) & 0xff;
+
+	atpx_arg_elements[1].type = ACPI_TYPE_BUFFER;
+	atpx_arg_elements[1].buffer.length = 4;
+	atpx_arg_elements[1].buffer.pointer = buf;
+
+	status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("%s: failed to call ATPX: %s\n", __func__, acpi_format_exception(status));
+		return -ENOSYS;
+	}
+	kfree(buffer.pointer);
+
+	return 0;
+}
+
+static int radeon_atpx_set_discrete_state(acpi_handle handle, int state)
+{
+	return radeon_atpx_execute(handle, ATPX_GPU_PWR, state);
+}
+
+static int radeon_atpx_switch_mux(acpi_handle handle, int mux_id)
+{
+	return radeon_atpx_execute(handle, ATPX_MUX_SELECT, mux_id);
+}
+
+
+static int radeon_atpx_switchto(enum vga_switcheroo_client_id id)
+{
+	if (id == VGA_SWITCHEROO_IGD)
+		radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 0);
+	else
+		radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 1);
+	return 0;
+}
+
+static int radeon_atpx_power_state(enum vga_switcheroo_client_id id,
+				   enum vga_switcheroo_state state)
+{
+	/* on w500 ACPI can't change intel gpu state */
+	if (id == VGA_SWITCHEROO_IGD)
+		return 0;
+
+	radeon_atpx_set_discrete_state(radeon_atpx_priv.atpx_handle, state);
+	return 0;
+}
+
+static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
+{
+	acpi_handle dhandle, atpx_handle, atrm_handle;
+	acpi_status status;
+
+	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	radeon_atpx_priv.dhandle = dhandle;
+	radeon_atpx_priv.atpx_handle = atpx_handle;
+	radeon_atpx_priv.atrm_handle = atrm_handle;
+	return true;
+}
+
+static int radeon_atpx_init(void)
+{
+	/* set up the ATPX handle */
+
+	radeon_atpx_get_version(radeon_atpx_priv.atpx_handle);
+	return 0;
+}
+
+static int radeon_atpx_get_client_id(struct pci_dev *pdev)
+{
+	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return VGA_SWITCHEROO_IGD;
+	else
+		return VGA_SWITCHEROO_DIS;
+}
+
+static struct vga_switcheroo_handler radeon_atpx_handler = {
+	.switchto = radeon_atpx_switchto,
+	.power_state = radeon_atpx_power_state,
+	.init = radeon_atpx_init,
+	.get_client_id = radeon_atpx_get_client_id,
+};
+
+static bool radeon_atpx_detect(void)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+	struct pci_dev *pdev = NULL;
+	bool has_atpx = false;
+	int vga_count = 0;
+
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true);
+	}
+
+	if (has_atpx && vga_count == 2) {
+		acpi_get_name(radeon_atpx_priv.atpx_handle, ACPI_FULL_PATHNAME, &buffer);
+		printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n",
+		       acpi_method_name);
+		radeon_atpx_priv.atpx_detected = true;
+		return true;
+	}
+	return false;
+}
+
+void radeon_register_atpx_handler(void)
+{
+	bool r;
+
+	/* detect if we have any ATPX + 2 VGA in the system */
+	r = radeon_atpx_detect();
+	if (!r)
+		return;
+
+	vga_switcheroo_register_handler(&radeon_atpx_handler);
+}
+
+void radeon_unregister_atpx_handler(void)
+{
+	vga_switcheroo_unregister_handler();
+}
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 906921740c60..a34b909485b8 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -30,6 +30,7 @@
 #include "radeon.h"
 #include "atom.h"
 
+#include <linux/vga_switcheroo.h>
 /*
  * BIOS.
  */
@@ -62,7 +63,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev)
 		iounmap(bios);
 		return false;
 	}
-	memcpy(rdev->bios, bios, size);
+	memcpy_fromio(rdev->bios, bios, size);
 	iounmap(bios);
 	return true;
 }
@@ -93,6 +94,38 @@ static bool radeon_read_bios(struct radeon_device *rdev)
 	return true;
 }
 
+/* ATRM is used to get the BIOS on the discrete cards in
+ * dual-gpu systems.
+ */
+static bool radeon_atrm_get_bios(struct radeon_device *rdev)
+{
+	int ret;
+	int size = 64 * 1024;
+	int i;
+
+	if (!radeon_atrm_supported(rdev->pdev))
+		return false;
+
+	rdev->bios = kmalloc(size, GFP_KERNEL);
+	if (!rdev->bios) {
+		DRM_ERROR("Unable to allocate bios\n");
+		return false;
+	}
+
+	for (i = 0; i < size / ATRM_BIOS_PAGE; i++) {
+		ret = radeon_atrm_get_bios_chunk(rdev->bios,
+						 (i * ATRM_BIOS_PAGE),
+						 ATRM_BIOS_PAGE);
+		if (ret <= 0)
+			break;
+	}
+
+	if (i == 0 || rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) {
+		kfree(rdev->bios);
+		return false;
+	}
+	return true;
+}
 static bool r700_read_disabled_bios(struct radeon_device *rdev)
 {
 	uint32_t viph_control;
@@ -388,16 +421,16 @@ static bool radeon_read_disabled_bios(struct radeon_device *rdev)
 		return legacy_read_disabled_bios(rdev);
 }
 
+
 bool radeon_get_bios(struct radeon_device *rdev)
 {
 	bool r;
 	uint16_t tmp;
 
-	if (rdev->flags & RADEON_IS_IGP) {
+	r = radeon_atrm_get_bios(rdev);
+	if (r == false)
 		r = igp_read_bios_from_vram(rdev);
-		if (r == false)
-			r = radeon_read_bios(rdev);
-	} else
+	if (r == false)
 		r = radeon_read_bios(rdev);
 	if (r == false) {
 		r = radeon_read_disabled_bios(rdev);
@@ -408,6 +441,7 @@ bool radeon_get_bios(struct radeon_device *rdev)
 		return false;
 	}
 	if (rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) {
+		printk("BIOS signature incorrect %x %x\n", rdev->bios[0], rdev->bios[1]);
 		goto free_bios;
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 768b1509fa03..cb8d9a1dd69c 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -30,6 +30,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/radeon_drm.h>
 #include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "radeon_asic.h"
@@ -613,6 +614,36 @@ void radeon_check_arguments(struct radeon_device *rdev)
 	}
 }
 
+static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct radeon_device *rdev = dev->dev_private;
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_INFO "radeon: switched on\n");
+		/* don't suspend or resume card normally */
+		rdev->powered_down = false;
+		radeon_resume_kms(dev);
+	} else {
+		printk(KERN_INFO "radeon: switched off\n");
+		radeon_suspend_kms(dev, pmm);
+		/* don't suspend or resume card normally */
+		rdev->powered_down = true;
+	}
+}
+
+static bool radeon_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
+
 int radeon_device_init(struct radeon_device *rdev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
@@ -692,6 +723,9 @@ int radeon_device_init(struct radeon_device *rdev,
 	/* this will fail for cards that aren't VGA class devices, just
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
+	vga_switcheroo_register_client(rdev->pdev,
+				       radeon_switcheroo_set_state,
+				       radeon_switcheroo_can_switch);
 
 	r = radeon_init(rdev);
 	if (r)
@@ -723,6 +757,7 @@ void radeon_device_fini(struct radeon_device *rdev)
 	rdev->shutdown = true;
 	radeon_fini(rdev);
 	destroy_workqueue(rdev->wq);
+	vga_switcheroo_unregister_client(rdev->pdev);
 	vga_client_register(rdev->pdev, NULL, NULL, NULL);
 	iounmap(rdev->rmmio);
 	rdev->rmmio = NULL;
@@ -746,6 +781,8 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	}
 	rdev = dev->dev_private;
 
+	if (rdev->powered_down)
+		return 0;
 	/* unpin the front buffers */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->fb);
@@ -791,6 +828,9 @@ int radeon_resume_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
 
+	if (rdev->powered_down)
+		return 0;
+
 	acquire_console_sem();
 	pci_set_power_state(dev->pdev, PCI_D0);
 	pci_restore_state(dev->pdev);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8ba3de7994d4..4ab53aa163b2 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -339,6 +339,7 @@ static int __init radeon_init(void)
 		driver = &kms_driver;
 		driver->driver_features |= DRIVER_MODESET;
 		driver->num_ioctls = radeon_max_kms_ioctl;
+		radeon_register_atpx_handler();
 	}
 	/* if the vga console setting is enabled still
 	 * let modprobe override it */
@@ -348,6 +349,7 @@ static int __init radeon_init(void)
 static void __exit radeon_exit(void)
 {
 	drm_exit(driver);
+	radeon_unregister_atpx_handler();
 }
 
 module_init(radeon_init);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index c57ad606504d..736237195143 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -455,6 +455,9 @@ extern void r600_blit_swap(struct drm_device *dev,
 			   int sx, int sy, int dx, int dy,
 			   int w, int h, int src_pitch, int dst_pitch, int cpp);
 
+/* atpx handler */
+void radeon_register_atpx_handler(void);
+void radeon_unregister_atpx_handler(void);
 /* Flags for stats.boxes
  */
 #define RADEON_BOX_DMA_IDLE      0x1
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index d71e346e9ab5..561719223988 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -39,6 +39,8 @@
 
 #include "drm_fb_helper.h"
 
+#include <linux/vga_switcheroo.h>
+
 struct radeon_fb_device {
 	struct drm_fb_helper helper;
 	struct radeon_framebuffer	*rfb;
@@ -291,6 +293,7 @@ int radeonfb_create(struct drm_device *dev,
 	rfbdev->rdev = rdev;
 
 	mutex_unlock(&rdev->ddev->struct_mutex);
+	vga_switcheroo_client_fb_set(rdev->ddev->pdev, info);
 	return 0;
 
 out_unref:
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index f23b05606eb5..5db7af6b91f4 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -30,6 +30,8 @@
 #include "radeon.h"
 #include "radeon_drm.h"
 
+#include <linux/vga_switcheroo.h>
+
 int radeon_driver_unload_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -136,6 +138,7 @@ int radeon_driver_firstopen_kms(struct drm_device *dev)
 
 void radeon_driver_lastclose_kms(struct drm_device *dev)
 {
+	vga_switcheroo_process_delayed_switch();
 }
 
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig
index 790e675b13eb..6116a0196214 100644
--- a/drivers/gpu/vga/Kconfig
+++ b/drivers/gpu/vga/Kconfig
@@ -8,3 +8,16 @@ config VGA_ARB
 	  are accessed at same time they need some kind of coordination. Please
 	  see Documentation/vgaarbiter.txt for more details. Select this to
 	  enable VGA arbiter.
+
+config VGA_SWITCHEROO
+	bool "Laptop Hybrid Grapics - GPU switching support"
+	default y
+	depends on X86
+	depends on ACPI
+	help
+	  Many laptops released in 2008/9/10 have two gpus with a multiplxer
+	  to switch between them. This adds support for dynamic switching when
+          X isn't running and delayed switching until the next logoff. This
+	  features is called hybrid graphics, ATI PowerXpress, and Nvidia
+	  HybridPower.
+
diff --git a/drivers/gpu/vga/Makefile b/drivers/gpu/vga/Makefile
index 7cc8c1ed645b..14ca30b75d0a 100644
--- a/drivers/gpu/vga/Makefile
+++ b/drivers/gpu/vga/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_VGA_ARB)  += vgaarb.o
+obj-$(CONFIG_VGA_SWITCHEROO) += vga_switcheroo.o
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
new file mode 100644
index 000000000000..a3f587a0aba9
--- /dev/null
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ *
+ * Licensed under GPLv2
+ *
+ * vga_switcheroo.c - Support for laptop with dual GPU using one set of outputs
+
+ Switcher interface - methods require for ATPX and DCM
+ - switchto - this throws the output MUX switch
+ - discrete_set_power - sets the power state for the discrete card
+
+ GPU driver interface
+ - set_gpu_state - this should do the equiv of s/r for the card
+		  - this should *not* set the discrete power state
+ - switch_check  - check if the device is in a position to switch now
+ */
+
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/fb.h>
+
+#include <acpi/acpi.h>
+#include <acpi/acpi_bus.h>
+
+#include <linux/pci.h>
+#include <linux/vga_switcheroo.h>
+
+struct vga_switcheroo_client {
+	struct pci_dev *pdev;
+	struct fb_info *fb_info;
+	int pwr_state;
+	void (*set_gpu_state)(struct pci_dev *pdev, enum vga_switcheroo_state);
+	bool (*can_switch)(struct pci_dev *pdev);
+	int id;
+	bool active;
+};
+
+static DEFINE_MUTEX(vgasr_mutex);
+
+struct vgasr_priv {
+
+	bool active;
+	bool delayed_switch_active;
+	enum vga_switcheroo_client_id delayed_client_id;
+
+	struct dentry *debugfs_root;
+	struct dentry *switch_file;
+
+	int registered_clients;
+	struct vga_switcheroo_client clients[VGA_SWITCHEROO_MAX_CLIENTS];
+
+	struct vga_switcheroo_handler *handler;
+};
+
+static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
+static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv);
+
+/* only one switcheroo per system */
+static struct vgasr_priv vgasr_priv;
+
+int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler)
+{
+	mutex_lock(&vgasr_mutex);
+	if (vgasr_priv.handler) {
+		mutex_unlock(&vgasr_mutex);
+		return -EINVAL;
+	}
+
+	vgasr_priv.handler = handler;
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(vga_switcheroo_register_handler);
+
+void vga_switcheroo_unregister_handler(void)
+{
+	mutex_lock(&vgasr_mutex);
+	vgasr_priv.handler = NULL;
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_unregister_handler);
+
+static void vga_switcheroo_enable(void)
+{
+	int i;
+	int ret;
+	/* call the handler to init */
+	vgasr_priv.handler->init();
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		ret = vgasr_priv.handler->get_client_id(vgasr_priv.clients[i].pdev);
+		if (ret < 0)
+			return;
+
+		vgasr_priv.clients[i].id = ret;
+	}
+	vga_switcheroo_debugfs_init(&vgasr_priv);
+	vgasr_priv.active = true;
+}
+
+int vga_switcheroo_register_client(struct pci_dev *pdev,
+				   void (*set_gpu_state)(struct pci_dev *pdev, enum vga_switcheroo_state),
+				   bool (*can_switch)(struct pci_dev *pdev))
+{
+	int index;
+
+	mutex_lock(&vgasr_mutex);
+	/* don't do IGD vs DIS here */
+	if (vgasr_priv.registered_clients & 1)
+		index = 1;
+	else
+		index = 0;
+
+	vgasr_priv.clients[index].pwr_state = VGA_SWITCHEROO_ON;
+	vgasr_priv.clients[index].pdev = pdev;
+	vgasr_priv.clients[index].set_gpu_state = set_gpu_state;
+	vgasr_priv.clients[index].can_switch = can_switch;
+	vgasr_priv.clients[index].id = -1;
+	if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		vgasr_priv.clients[index].active = true;
+
+	vgasr_priv.registered_clients |= (1 << index);
+
+	/* if we get two clients + handler */
+	if (vgasr_priv.registered_clients == 0x3 && vgasr_priv.handler) {
+		printk(KERN_INFO "vga_switcheroo: enabled\n");
+		vga_switcheroo_enable();
+	}
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(vga_switcheroo_register_client);
+
+void vga_switcheroo_unregister_client(struct pci_dev *pdev)
+{
+	int i;
+
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].pdev == pdev) {
+			vgasr_priv.registered_clients &= ~(1 << i);
+			break;
+		}
+	}
+
+	printk(KERN_INFO "vga_switcheroo: disabled\n");
+	vga_switcheroo_debugfs_fini(&vgasr_priv);
+	vgasr_priv.active = false;
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_unregister_client);
+
+void vga_switcheroo_client_fb_set(struct pci_dev *pdev,
+				 struct fb_info *info)
+{
+	int i;
+
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].pdev == pdev) {
+			vgasr_priv.clients[i].fb_info = info;
+			break;
+		}
+	}
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_client_fb_set);
+
+static int vga_switcheroo_show(struct seq_file *m, void *v)
+{
+	int i;
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		seq_printf(m, "%d:%c:%s:%s\n", i,
+			   vgasr_priv.clients[i].active ? '+' : ' ',
+			   vgasr_priv.clients[i].pwr_state ? "Pwr" : "Off",
+			   pci_name(vgasr_priv.clients[i].pdev));
+	}
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+
+static int vga_switcheroo_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vga_switcheroo_show, NULL);
+}
+
+static int vga_switchon(struct vga_switcheroo_client *client)
+{
+	int ret;
+
+	ret = vgasr_priv.handler->power_state(client->id, VGA_SWITCHEROO_ON);
+	/* call the driver callback to turn on device */
+	client->set_gpu_state(client->pdev, VGA_SWITCHEROO_ON);
+	client->pwr_state = VGA_SWITCHEROO_ON;
+	return 0;
+}
+
+static int vga_switchoff(struct vga_switcheroo_client *client)
+{
+	/* call the driver callback to turn off device */
+	client->set_gpu_state(client->pdev, VGA_SWITCHEROO_OFF);
+	vgasr_priv.handler->power_state(client->id, VGA_SWITCHEROO_OFF);
+	client->pwr_state = VGA_SWITCHEROO_OFF;
+	return 0;
+}
+
+static int vga_switchto(struct vga_switcheroo_client *new_client)
+{
+	int ret;
+	int i;
+	struct vga_switcheroo_client *active = NULL;
+
+	if (new_client->active == true)
+		return 0;
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].active == true) {
+			active = &vgasr_priv.clients[i];
+			break;
+		}
+	}
+	if (!active)
+		return 0;
+
+	/* power up the first device */
+	ret = pci_enable_device(new_client->pdev);
+	if (ret)
+		return ret;
+
+	if (new_client->pwr_state == VGA_SWITCHEROO_OFF)
+		vga_switchon(new_client);
+
+	/* swap shadow resource to denote boot VGA device has changed so X starts on new device */
+	active->active = false;
+
+	active->pdev->resource[PCI_ROM_RESOURCE].flags &= ~IORESOURCE_ROM_SHADOW;
+	new_client->pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
+
+	if (new_client->fb_info) {
+		struct fb_event event;
+		event.info = new_client->fb_info;
+		fb_notifier_call_chain(FB_EVENT_REMAP_ALL_CONSOLE, &event);
+	}
+
+	ret = vgasr_priv.handler->switchto(new_client->id);
+	if (ret)
+		return ret;
+
+	if (active->pwr_state == VGA_SWITCHEROO_ON)
+		vga_switchoff(active);
+
+	new_client->active = true;
+	return 0;
+}
+
+static ssize_t
+vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos)
+{
+	char usercmd[64];
+	const char *pdev_name;
+	int i, ret;
+	bool delay = false, can_switch;
+	int client_id = -1;
+	struct vga_switcheroo_client *client = NULL;
+
+	if (cnt > 63)
+		cnt = 63;
+
+	if (copy_from_user(usercmd, ubuf, cnt))
+		return -EFAULT;
+
+	mutex_lock(&vgasr_mutex);
+
+	if (!vgasr_priv.active)
+		return -EINVAL;
+
+	/* pwr off the device not in use */
+	if (strncmp(usercmd, "OFF", 3) == 0) {
+		for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+			if (vgasr_priv.clients[i].active)
+				continue;
+			if (vgasr_priv.clients[i].pwr_state == VGA_SWITCHEROO_ON)
+				vga_switchoff(&vgasr_priv.clients[i]);
+		}
+		goto out;
+	}
+	/* pwr on the device not in use */
+	if (strncmp(usercmd, "ON", 2) == 0) {
+		for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+			if (vgasr_priv.clients[i].active)
+				continue;
+			if (vgasr_priv.clients[i].pwr_state == VGA_SWITCHEROO_OFF)
+				vga_switchon(&vgasr_priv.clients[i]);
+		}
+		goto out;
+	}
+
+	/* request a delayed switch - test can we switch now */
+	if (strncmp(usercmd, "DIGD", 4) == 0) {
+		client_id = VGA_SWITCHEROO_IGD;
+		delay = true;
+	}
+
+	if (strncmp(usercmd, "DDIS", 4) == 0) {
+		client_id = VGA_SWITCHEROO_DIS;
+		delay = true;
+	}
+
+	if (strncmp(usercmd, "IGD", 3) == 0)
+		client_id = VGA_SWITCHEROO_IGD;
+
+	if (strncmp(usercmd, "DIS", 3) == 0)
+		client_id = VGA_SWITCHEROO_DIS;
+
+	if (client_id == -1)
+		goto out;
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].id == client_id) {
+			client = &vgasr_priv.clients[i];
+			break;
+		}
+	}
+
+	vgasr_priv.delayed_switch_active = false;
+	/* okay we want a switch - test if devices are willing to switch */
+	can_switch = true;
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		can_switch = vgasr_priv.clients[i].can_switch(vgasr_priv.clients[i].pdev);
+		if (can_switch == false) {
+			printk(KERN_ERR "vga_switcheroo: client %d refused switch\n", i);
+			break;
+		}
+	}
+
+	if (can_switch == false && delay == false)
+		goto out;
+
+	if (can_switch == true) {
+		pdev_name = pci_name(client->pdev);
+		ret = vga_switchto(client);
+		if (ret)
+			printk(KERN_ERR "vga_switcheroo: switching failed %d\n", ret);
+	} else {
+		printk(KERN_INFO "vga_switcheroo: setting delayed switch to client %d\n", client->id);
+		vgasr_priv.delayed_switch_active = true;
+		vgasr_priv.delayed_client_id = client_id;
+
+		/* we should at least power up the card to
+		   make the switch faster */
+		if (client->pwr_state == VGA_SWITCHEROO_OFF)
+			vga_switchon(client);
+	}
+
+out:
+	mutex_unlock(&vgasr_mutex);
+	return cnt;
+}
+
+static const struct file_operations vga_switcheroo_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = vga_switcheroo_debugfs_open,
+	.write = vga_switcheroo_debugfs_write,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv)
+{
+	if (priv->switch_file) {
+		debugfs_remove(priv->switch_file);
+		priv->switch_file = NULL;
+	}
+	if (priv->debugfs_root) {
+		debugfs_remove(priv->debugfs_root);
+		priv->debugfs_root = NULL;
+	}
+}
+
+static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv)
+{
+	/* already initialised */
+	if (priv->debugfs_root)
+		return 0;
+	priv->debugfs_root = debugfs_create_dir("vgaswitcheroo", NULL);
+
+	if (!priv->debugfs_root) {
+		printk(KERN_ERR "vga_switcheroo: Cannot create /sys/kernel/debug/vgaswitcheroo\n");
+		goto fail;
+	}
+
+	priv->switch_file = debugfs_create_file("switch", 0644,
+						priv->debugfs_root, NULL, &vga_switcheroo_debugfs_fops);
+	if (!priv->switch_file) {
+		printk(KERN_ERR "vga_switcheroo: cannot create /sys/kernel/debug/vgaswitcheroo/switch\n");
+		goto fail;
+	}
+	return 0;
+fail:
+	vga_switcheroo_debugfs_fini(priv);
+	return -1;
+}
+
+int vga_switcheroo_process_delayed_switch(void)
+{
+	struct vga_switcheroo_client *client = NULL;
+	const char *pdev_name;
+	bool can_switch = true;
+	int i;
+	int ret;
+	int err = -EINVAL;
+
+	mutex_lock(&vgasr_mutex);
+	if (!vgasr_priv.delayed_switch_active)
+		goto err;
+
+	printk(KERN_INFO "vga_switcheroo: processing delayed switch to %d\n", vgasr_priv.delayed_client_id);
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].id == vgasr_priv.delayed_client_id)
+			client = &vgasr_priv.clients[i];
+		can_switch = vgasr_priv.clients[i].can_switch(vgasr_priv.clients[i].pdev);
+		if (can_switch == false) {
+			printk(KERN_ERR "vga_switcheroo: client %d refused switch\n", i);
+			break;
+		}
+	}
+
+	if (can_switch == false || client == NULL)
+		goto err;
+
+	pdev_name = pci_name(client->pdev);
+	ret = vga_switchto(client);
+	if (ret)
+		printk(KERN_ERR "vga_switcheroo: delayed switching failed %d\n", ret);
+
+	vgasr_priv.delayed_switch_active = false;
+	err = 0;
+err:
+	mutex_unlock(&vgasr_mutex);
+	return err;
+}
+EXPORT_SYMBOL(vga_switcheroo_process_delayed_switch);
+
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 3681c6a88212..b0a3fa00706d 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -3025,6 +3025,20 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 	return 0;
 }
 
+static void fbcon_remap_all(int idx)
+{
+	int i;
+	for (i = first_fb_vc; i <= last_fb_vc; i++)
+		set_con2fb_map(i, idx, 0);
+
+	if (con_is_bound(&fb_con)) {
+		printk(KERN_INFO "fbcon: Remapping primary device, "
+		       "fb%i, to tty %i-%i\n", idx,
+		       first_fb_vc + 1, last_fb_vc + 1);
+		info_idx = idx;
+	}
+}
+
 #ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
 static void fbcon_select_primary(struct fb_info *info)
 {
@@ -3225,6 +3239,10 @@ static int fbcon_event_notify(struct notifier_block *self,
 		caps = event->data;
 		fbcon_get_requirement(info, caps);
 		break;
+	case FB_EVENT_REMAP_ALL_CONSOLE:
+		idx = info->node;
+		fbcon_remap_all(idx);
+		break;
 	}
 done:
 	return ret;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 369767bd873e..c10163b4c40e 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -543,6 +543,8 @@ struct fb_cursor_user {
 #define FB_EVENT_GET_REQ                0x0D
 /*      Unbind from the console if possible */
 #define FB_EVENT_FB_UNBIND              0x0E
+/*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */
+#define FB_EVENT_REMAP_ALL_CONSOLE      0x0F
 
 struct fb_event {
 	struct fb_info *info;
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
new file mode 100644
index 000000000000..4b58ab1e8612
--- /dev/null
+++ b/include/linux/vga_switcheroo.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ * Licensed under GPLv2
+ *
+ * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
+ */
+
+#include <acpi/acpi.h>
+#include <linux/fb.h>
+
+enum vga_switcheroo_state {
+	VGA_SWITCHEROO_OFF,
+	VGA_SWITCHEROO_ON,
+};
+
+enum vga_switcheroo_client_id {
+	VGA_SWITCHEROO_IGD,
+	VGA_SWITCHEROO_DIS,
+	VGA_SWITCHEROO_MAX_CLIENTS,
+};
+
+struct vga_switcheroo_handler {
+	int (*switchto)(enum vga_switcheroo_client_id id);
+	int (*power_state)(enum vga_switcheroo_client_id id,
+			   enum vga_switcheroo_state state);
+	int (*init)(void);
+	int (*get_client_id)(struct pci_dev *pdev);
+};
+
+
+#if defined(CONFIG_VGA_SWITCHEROO)
+void vga_switcheroo_unregister_client(struct pci_dev *dev);
+int vga_switcheroo_register_client(struct pci_dev *dev,
+				   void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state),
+				   bool (*can_switch)(struct pci_dev *dev));
+
+void vga_switcheroo_client_fb_set(struct pci_dev *dev,
+				  struct fb_info *info);
+
+int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler);
+void vga_switcheroo_unregister_handler(void);
+
+int vga_switcheroo_process_delayed_switch(void);
+
+#else
+
+static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
+static inline int vga_switcheroo_register_client(struct pci_dev *dev,
+					  void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state),
+					  bool (*can_switch)(struct pci_dev *dev)) { return 0; }
+static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {}
+static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
+static inline void vga_switcheroo_unregister_handler(void) {}
+static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
+
+#endif
-- 
cgit v1.2.3


From 8edb381d6705811b278527907a5ae2a9c4db8074 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 1 Mar 2010 21:50:01 +1100
Subject: vga_switcheroo: fix build on platforms with no ACPI

radeon was always including the atpx code unnecessarily, also core
switcheroo was including acpi headers.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drv.h        |  5 +++++
 drivers/gpu/drm/radeon/Makefile              |  3 ++-
 drivers/gpu/drm/radeon/radeon.h              | 11 +++++++++++
 drivers/gpu/drm/radeon/radeon_atpx_handler.c |  1 -
 drivers/gpu/drm/radeon/radeon_drv.h          |  6 ++++++
 drivers/gpu/vga/vga_switcheroo.c             |  3 ---
 include/linux/vga_switcheroo.h               |  1 -
 7 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index a2e24f252e84..f5b3cbe7dc80 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -852,8 +852,13 @@ extern int  nouveau_dma_init(struct nouveau_channel *);
 extern int  nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
 
 /* nouveau_acpi.c */
+#if defined(CONFIG_VGA_SWITCHEROO)
 void nouveau_register_dsm_handler(void);
 void nouveau_unregister_dsm_handler(void);
+#else
+static inline void nouveau_register_dsm_handler(void) {}
+static inline void nouveau_unregister_dsm_handler(void) {}
+#endif
 
 /* nouveau_backlight.c */
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 0a4d526e4f44..0adf49eea7fa 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -60,8 +60,9 @@ radeon-y += radeon_device.o radeon_kms.o \
 	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
 	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
-	evergreen.o radeon_atpx_handler.o
+	evergreen.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
+radeon-$(CONFIG_VGA_SWITCHEROO) += radone_atpx_handler.o
 
 obj-$(CONFIG_DRM_RADEON)+= radeon.o
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index ad9d55f94398..829e26e8a4bb 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -121,8 +121,19 @@ struct radeon_device;
  */
 #define ATRM_BIOS_PAGE 4096
 
+#if defined(CONFIG_VGA_SWITCHEROO)
 bool radeon_atrm_supported(struct pci_dev *pdev);
 int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len);
+#else
+static inline bool radeon_atrm_supported(struct pci_dev *pdev)
+{
+	return false;
+}
+
+static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len){
+	return -EINVAL;
+}
+#endif
 bool radeon_get_bios(struct radeon_device *rdev);
 
 
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 0ae52f19071d..3f557c4151e0 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -6,7 +6,6 @@
  *
  * ATPX support for both Intel/ATI
  */
-
 #include <linux/vga_switcheroo.h>
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 4fe16461bb1b..ec55f2b23c22 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -463,8 +463,14 @@ extern void r600_blit_swap(struct drm_device *dev,
 			   int w, int h, int src_pitch, int dst_pitch, int cpp);
 
 /* atpx handler */
+#if defined(CONFIG_VGA_SWITCHEROO)
 void radeon_register_atpx_handler(void);
 void radeon_unregister_atpx_handler(void);
+#else
+static inline void radeon_register_atpx_handler(void) {}
+static inline void radeon_unregister_atpx_handler(void) {}
+#endif
+
 /* Flags for stats.boxes
  */
 #define RADEON_BOX_DMA_IDLE      0x1
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index a3f587a0aba9..d6d1149d525d 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -25,9 +25,6 @@
 #include <linux/debugfs.h>
 #include <linux/fb.h>
 
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
-
 #include <linux/pci.h>
 #include <linux/vga_switcheroo.h>
 
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 4b58ab1e8612..ae9ab13b963d 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -7,7 +7,6 @@
  * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
  */
 
-#include <acpi/acpi.h>
 #include <linux/fb.h>
 
 enum vga_switcheroo_state {
-- 
cgit v1.2.3


From d2e82add832f9c95376d004d565c5e164c99b9ec Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@nokia.com>
Date: Tue, 23 Feb 2010 23:36:25 +0100
Subject: OMAP: DSS2: OMAPFB: install omapfb.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

omapfb has several custom ioctls so user space needs
the header in order to utilize them.

Signed-off-by: Ville Syrjälä <ville.syrjala@nokia.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@nokia.com>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 756f831cbdd5..aa80d7a2dee7 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -125,6 +125,7 @@ header-y += nfs2.h
 header-y += nfs4_mount.h
 header-y += nfs_mount.h
 header-y += nl80211.h
+header-y += omapfb.h
 header-y += param.h
 header-y += pci_regs.h
 header-y += perf_event.h
-- 
cgit v1.2.3


From 1ccaba3056796ab1f933736d763ffcd1958866cd Mon Sep 17 00:00:00 2001
From: Abhijith Das <adas@redhat.com>
Date: Thu, 10 Dec 2009 18:52:54 -0500
Subject: GFS2: Remove old, unused linked list code from quota

This is the kernel portion of the patch-set for upstream gfs2,
to remove the quota-linked-list stuff and replace it with
fiemap-based traversal of the quota file.

The corresponding userland fixes have been pushed to
STABLE3 and master branches of cluster.git and gfs2-utils.git
respectively (Refer Red Hat bug #536902).

Signed-off-by: Abhi Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 include/linux/gfs2_ondisk.h | 30 +-----------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index 81f90a59cda6..4f4462974c14 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -179,33 +179,6 @@ struct gfs2_rgrp {
 	__u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */
 };
 
-/*
- * quota linked list: user quotas and group quotas form two separate 
- * singly linked lists. ll_next stores uids or gids of next quotas in the 
- * linked list.
-
-Given the uid/gid, how to calculate the quota file offsets for the corresponding
-gfs2_quota structures on disk:
-
-for user quotas, given uid,
-offset = uid * sizeof(struct gfs2_quota);
-
-for group quotas, given gid,
-offset = (gid * sizeof(struct gfs2_quota)) + sizeof(struct gfs2_quota);
-
-
-  uid:0   gid:0       uid:12   gid:12      uid:17   gid:17     uid:5142 gid:5142
-+-------+-------+    +-------+-------+    +-------+- - - -+    +- - - -+-------+
-| valid | valid | :: | valid | valid | :: | valid | inval | :: | inval | valid |
-+-------+-------+    +-------+-------+    +-------+- - - -+    +- - - -+-------+
-next:12   next:12    next:17 next:5142    next:NULL                    next:NULL
-    |       |            |       |            |<-- user quota list         |
-     \______|___________/ \______|___________/         group quota list -->|
-            |                    |                                         |
-             \__________________/ \_______________________________________/
-
-*/
-
 /*
  * quota structure
  */
@@ -214,8 +187,7 @@ struct gfs2_quota {
 	__be64 qu_limit;
 	__be64 qu_warn;
 	__be64 qu_value;
-	__be32 qu_ll_next; /* location of next quota in list */
-	__u8 qu_reserved[60];
+	__u8 qu_reserved[64];
 };
 
 /*
-- 
cgit v1.2.3


From 46a26bf55714c1e2f17e34683292a389acb8e601 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:16 -0200
Subject: KVM: modify memslots layout in struct kvm

Have a pointer to an allocated region inside struct kvm.

[alex: fix ppc book 3s]

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  | 10 ++++++----
 arch/powerpc/kvm/book3s.c |  2 +-
 arch/x86/kvm/mmu.c        | 11 ++++++-----
 arch/x86/kvm/vmx.c        |  4 ++--
 arch/x86/kvm/x86.c        |  4 ++--
 include/linux/kvm_host.h  | 12 ++++++++----
 virt/kvm/iommu.c          | 18 ++++++++++++------
 virt/kvm/kvm_main.c       | 36 +++++++++++++++++++++++-------------
 8 files changed, 60 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5fdeec5fddcf..1ca1dbf48117 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1377,12 +1377,14 @@ static void free_kvm(struct kvm *kvm)
 
 static void kvm_release_vm_pages(struct kvm *kvm)
 {
+	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 	int i, j;
 	unsigned long base_gfn;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		memslot = &kvm->memslots[i];
+	slots = kvm->memslots;
+	for (i = 0; i < slots->nmemslots; i++) {
+		memslot = &slots->memslots[i];
 		base_gfn = memslot->base_gfn;
 
 		for (j = 0; j < memslot->npages; j++) {
@@ -1802,7 +1804,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots[log->slot];
+	memslot = &kvm->memslots->memslots[log->slot];
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -1840,7 +1842,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
 		kvm_flush_remote_tlbs(kvm);
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 		memset(memslot->dirty_bitmap, 0, n);
 	}
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 803505d3e455..bb8873dcb20f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -865,7 +865,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 
 		ga = memslot->base_gfn << PAGE_SHIFT;
 		ga_end = ga + (memslot->npages << PAGE_SHIFT);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4f499d7f7106..81f84d326a84 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -807,13 +807,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 {
 	int i, j;
 	int retval = 0;
+	struct kvm_memslots *slots = kvm->memslots;
 
 	/*
 	 * If mmap_sem isn't taken, we can look the memslots with only
 	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
 	 */
-	for (i = 0; i < kvm->nmemslots; i++) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+	for (i = 0; i < slots->nmemslots; i++) {
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
@@ -3021,8 +3022,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
 
-	for (i = 0; i < kvm->nmemslots; i++)
-		nr_pages += kvm->memslots[i].npages;
+	for (i = 0; i < kvm->memslots->nmemslots; i++)
+		nr_pages += kvm->memslots->memslots[i].npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
@@ -3295,7 +3296,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
 	int i, j, k;
 
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
 		struct kvm_rmap_desc *d;
 
 		for (j = 0; j < m->npages; ++j) {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 74a66f0c00b4..18698799e365 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1503,8 +1503,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
-		gfn_t base_gfn = kvm->memslots[0].base_gfn +
-				 kvm->memslots[0].npages - 3;
+		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
+				 kvm->memslots->memslots[0].npages - 3;
 		return base_gfn << PAGE_SHIFT;
 	}
 	return kvm->arch.tss_addr;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8798504ace11..3b81cb9da8b8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2427,7 +2427,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 		spin_lock(&kvm->mmu_lock);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
 		spin_unlock(&kvm->mmu_lock);
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 		memset(memslot->dirty_bitmap, 0, n);
 	}
@@ -5223,7 +5223,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 				int user_alloc)
 {
 	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	struct kvm_memory_slot *memslot = &kvm->memslots->memslots[mem->slot];
 
 	/*To keep backward compatibility with older userspace,
 	 *x86 needs to hanlde !user_alloc case.
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bd5a616d9373..782bfb185f8a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -150,14 +150,18 @@ struct kvm_irq_routing_table {};
 
 #endif
 
+struct kvm_memslots {
+	int nmemslots;
+	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
+					KVM_PRIVATE_MEM_SLOTS];
+};
+
 struct kvm {
 	spinlock_t mmu_lock;
 	spinlock_t requests_lock;
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
-	int nmemslots;
-	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
-					KVM_PRIVATE_MEM_SLOTS];
+	struct kvm_memslots *memslots;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
@@ -482,7 +486,7 @@ static inline void kvm_guest_exit(void)
 
 static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
-	return slot - kvm->memslots;
+	return slot - kvm->memslots->memslots;
 }
 
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 15147583abd1..bc697a66a883 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -76,10 +76,13 @@ unmap_pages:
 static int kvm_iommu_map_memslots(struct kvm *kvm)
 {
 	int i, r = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
-					kvm->memslots[i].npages);
+	slots = kvm->memslots;
+
+	for (i = 0; i < slots->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, slots->memslots[i].base_gfn,
+					slots->memslots[i].npages);
 		if (r)
 			break;
 	}
@@ -210,10 +213,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 {
 	int i;
+	struct kvm_memslots *slots;
+
+	slots = kvm->memslots;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
-				    kvm->memslots[i].npages);
+	for (i = 0; i < slots->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
+				    slots->memslots[i].npages);
 	}
 
 	return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bc23b8e0609b..86dd8f3d29c9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -375,12 +375,16 @@ static struct kvm *kvm_create_vm(void)
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
+	r = -ENOMEM;
+	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!kvm->memslots)
+		goto out_err;
+
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page) {
-		r = -ENOMEM;
+	if (!page)
 		goto out_err;
-	}
+
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
 #endif
@@ -416,6 +420,7 @@ out:
 out_err:
 	hardware_disable_all();
 out_err_nodisable:
+	kfree(kvm->memslots);
 	kfree(kvm);
 	return ERR_PTR(r);
 }
@@ -450,9 +455,12 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 void kvm_free_physmem(struct kvm *kvm)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
+
+	for (i = 0; i < slots->nmemslots; ++i)
+		kvm_free_physmem_slot(&slots->memslots[i], NULL);
 
-	for (i = 0; i < kvm->nmemslots; ++i)
-		kvm_free_physmem_slot(&kvm->memslots[i], NULL);
+	kfree(kvm->memslots);
 }
 
 static void kvm_destroy_vm(struct kvm *kvm)
@@ -533,7 +541,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
 		goto out;
 
-	memslot = &kvm->memslots[mem->slot];
+	memslot = &kvm->memslots->memslots[mem->slot];
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
@@ -554,7 +562,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	/* Check for overlaps */
 	r = -EEXIST;
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *s = &kvm->memslots[i];
+		struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
 
 		if (s == memslot || !s->npages)
 			continue;
@@ -656,8 +664,8 @@ skip_lpage:
 		kvm_arch_flush_shadow(kvm);
 
 	spin_lock(&kvm->mmu_lock);
-	if (mem->slot >= kvm->nmemslots)
-		kvm->nmemslots = mem->slot + 1;
+	if (mem->slot >= kvm->memslots->nmemslots)
+		kvm->memslots->nmemslots = mem->slot + 1;
 
 	*memslot = new;
 	spin_unlock(&kvm->mmu_lock);
@@ -727,7 +735,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots[log->slot];
+	memslot = &kvm->memslots->memslots[log->slot];
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -781,9 +789,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
 
-	for (i = 0; i < kvm->nmemslots; ++i) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+	for (i = 0; i < slots->nmemslots; ++i) {
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
@@ -802,10 +811,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
 
 	gfn = unalias_gfn(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
-- 
cgit v1.2.3


From f7784b8ec9b6a041fa828cfbe9012fe51933f5ac Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:18 -0200
Subject: KVM: split kvm_arch_set_memory_region into prepare and commit

Required for SRCU convertion later.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c   | 16 +++++++++++----
 arch/powerpc/kvm/powerpc.c | 18 ++++++++++++----
 arch/s390/kvm/kvm-s390.c   | 25 +++++++++++++++--------
 arch/x86/kvm/x86.c         | 51 ++++++++++++++++++++++++++--------------------
 include/linux/kvm_host.h   |  7 ++++++-
 virt/kvm/kvm_main.c        | 12 +++++------
 6 files changed, 82 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 1ca1dbf48117..0757c7027986 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1578,15 +1578,15 @@ out:
 	return r;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+		struct kvm_memory_slot *memslot,
 		struct kvm_memory_slot old,
+		struct kvm_userspace_memory_region *mem,
 		int user_alloc)
 {
 	unsigned long i;
 	unsigned long pfn;
-	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	int npages = memslot->npages;
 	unsigned long base_gfn = memslot->base_gfn;
 
 	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@@ -1610,6 +1610,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		struct kvm_memory_slot old,
+		int user_alloc)
+{
+	return;
+}
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f06cf93b178e..4633e7850dd2 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -165,14 +165,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               struct kvm_memory_slot old,
-                               int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+                                   struct kvm_memory_slot *memslot,
+                                   struct kvm_memory_slot old,
+                                   struct kvm_userspace_memory_region *mem,
+                                   int user_alloc)
 {
 	return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+               struct kvm_userspace_memory_region *mem,
+               struct kvm_memory_slot old,
+               int user_alloc)
+{
+       return;
+}
+
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3fa0a10e4668..c8002193d9d4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -690,14 +690,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 }
 
 /* Section: memory related */
-int kvm_arch_set_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old,
-				int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_memory_slot old,
+				   struct kvm_userspace_memory_region *mem,
+				   int user_alloc)
 {
-	int i;
-	struct kvm_vcpu *vcpu;
-
 	/* A few sanity checks. We can have exactly one memory slot which has
 	   to start at guest virtual zero and which has to be located at a
 	   page boundary in userland and which has to end at a page boundary.
@@ -720,14 +718,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	if (!user_alloc)
 		return -EINVAL;
 
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
 	/* request update of sie control block for all available vcpus */
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
 			continue;
 		kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
 	}
-
-	return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ce833191430..43da65feed49 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5228,13 +5228,13 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(kvm);
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
 				struct kvm_memory_slot old,
+				struct kvm_userspace_memory_region *mem,
 				int user_alloc)
 {
-	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots->memslots[mem->slot];
+	int npages = memslot->npages;
 
 	/*To keep backward compatibility with older userspace,
 	 *x86 needs to hanlde !user_alloc case.
@@ -5254,26 +5254,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 			if (IS_ERR((void *)userspace_addr))
 				return PTR_ERR((void *)userspace_addr);
 
-			/* set userspace_addr atomically for kvm_hva_to_rmapp */
-			spin_lock(&kvm->mmu_lock);
 			memslot->userspace_addr = userspace_addr;
-			spin_unlock(&kvm->mmu_lock);
-		} else {
-			if (!old.user_alloc && old.rmap) {
-				int ret;
-
-				down_write(&current->mm->mmap_sem);
-				ret = do_munmap(current->mm, old.userspace_addr,
-						old.npages * PAGE_SIZE);
-				up_write(&current->mm->mmap_sem);
-				if (ret < 0)
-					printk(KERN_WARNING
-				       "kvm_vm_ioctl_set_memory_region: "
-				       "failed to munmap memory\n");
-			}
 		}
 	}
 
+
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+
+	int npages = mem->memory_size >> PAGE_SHIFT;
+
+	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+		int ret;
+
+		down_write(&current->mm->mmap_sem);
+		ret = do_munmap(current->mm, old.userspace_addr,
+				old.npages * PAGE_SIZE);
+		up_write(&current->mm->mmap_sem);
+		if (ret < 0)
+			printk(KERN_WARNING
+			       "kvm_vm_ioctl_set_memory_region: "
+			       "failed to munmap memory\n");
+	}
+
 	spin_lock(&kvm->mmu_lock);
 	if (!kvm->arch.n_requested_mmu_pages) {
 		unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -5282,8 +5291,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 	spin_unlock(&kvm->mmu_lock);
-
-	return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 782bfb185f8a..3c44687b3425 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -253,7 +253,12 @@ int kvm_set_memory_region(struct kvm *kvm,
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc);
-int kvm_arch_set_memory_region(struct kvm *kvm,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
+				struct kvm_memory_slot old,
+				struct kvm_userspace_memory_region *mem,
+				int user_alloc);
+void kvm_arch_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
 				struct kvm_memory_slot old,
 				int user_alloc);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 86dd8f3d29c9..c9f6cfe83120 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -663,6 +663,10 @@ skip_lpage:
 	if (!npages)
 		kvm_arch_flush_shadow(kvm);
 
+	r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
+	if (r)
+		goto out_free;
+
 	spin_lock(&kvm->mmu_lock);
 	if (mem->slot >= kvm->memslots->nmemslots)
 		kvm->memslots->nmemslots = mem->slot + 1;
@@ -670,13 +674,7 @@ skip_lpage:
 	*memslot = new;
 	spin_unlock(&kvm->mmu_lock);
 
-	r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
-	if (r) {
-		spin_lock(&kvm->mmu_lock);
-		*memslot = old;
-		spin_unlock(&kvm->mmu_lock);
-		goto out_free;
-	}
+	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
 	kvm_free_physmem_slot(&old, npages ? &new : NULL);
 	/* Slot deletion case: we have to update the current slot */
-- 
cgit v1.2.3


From 506f0d6f9c40ae7d9634acf3c26358810f42c24a Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:19 -0200
Subject: KVM: introduce gfn_to_pfn_memslot

Which takes a memslot pointer instead of using kvm->memslots.

To be used by SRCU convertion later.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 33 +++++++++++++++++++++++++--------
 2 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3c44687b3425..f1f78deece10 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -273,6 +273,8 @@ void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+			 struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c9f6cfe83120..4e2321c733f7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -835,21 +835,14 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
 {
 	struct page *page[1];
-	unsigned long addr;
 	int npages;
 	pfn_t pfn;
 
 	might_sleep();
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr)) {
-		get_page(bad_page);
-		return page_to_pfn(bad_page);
-	}
-
 	npages = get_user_pages_fast(addr, 1, 1, page);
 
 	if (unlikely(npages != 1)) {
@@ -874,8 +867,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 	return pfn;
 }
 
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+	unsigned long addr;
+
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr)) {
+		get_page(bad_page);
+		return page_to_pfn(bad_page);
+	}
+
+	return hva_to_pfn(kvm, addr);
+}
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
+static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
+}
+
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+			 struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+	return hva_to_pfn(kvm, addr);
+}
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
 	pfn_t pfn;
-- 
cgit v1.2.3


From 3ad26d8139a82b0510b1e0435ee82ae461d33401 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:20 -0200
Subject: KVM: use gfn_to_pfn_memslot in kvm_iommu_map_pages

So its possible to iommu map a memslot before making it visible to
kvm.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  3 +--
 virt/kvm/iommu.c         | 13 ++++++-------
 virt/kvm/kvm_main.c      |  2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f1f78deece10..9af240387fe6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -440,8 +440,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #define KVM_IOMMU_CACHE_COHERENCY	0x1
 
 #ifdef CONFIG_IOMMU_API
-int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
-			unsigned long npages);
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
 int kvm_assign_device(struct kvm *kvm,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index bc697a66a883..cf567d8033db 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,10 +32,10 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
 static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages);
 
-int kvm_iommu_map_pages(struct kvm *kvm,
-			gfn_t base_gfn, unsigned long npages)
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
-	gfn_t gfn = base_gfn;
+	gfn_t gfn = slot->base_gfn;
+	unsigned long npages = slot->npages;
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
@@ -54,7 +54,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
 			continue;
 
-		pfn = gfn_to_pfn(kvm, gfn);
+		pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
@@ -69,7 +69,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	return 0;
 
 unmap_pages:
-	kvm_iommu_put_pages(kvm, base_gfn, i);
+	kvm_iommu_put_pages(kvm, slot->base_gfn, i);
 	return r;
 }
 
@@ -81,8 +81,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	slots = kvm->memslots;
 
 	for (i = 0; i < slots->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, slots->memslots[i].base_gfn,
-					slots->memslots[i].npages);
+		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
 		if (r)
 			break;
 	}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4e2321c733f7..87d296d8b270 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -684,7 +684,7 @@ skip_lpage:
 	spin_unlock(&kvm->mmu_lock);
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
-	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	r = kvm_iommu_map_pages(kvm, memslot);
 	if (r)
 		goto out;
 #endif
-- 
cgit v1.2.3


From bc6678a33d9b952981a8e44a4f876c3ad64ca4d8 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:21 -0200
Subject: KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU
 update

Use two steps for memslot deletion: mark the slot invalid (which stops
instantiation of new shadow pages for that slot, but allows destruction),
then instantiate the new empty slot.

Also simplifies kvm_handle_hva locking.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |   4 +-
 arch/x86/kvm/mmu.c       |  28 +++++-----
 arch/x86/kvm/vmx.c       |   6 +-
 include/linux/kvm.h      |   2 +-
 include/linux/kvm_host.h |   7 +--
 virt/kvm/assigned-dev.c  |   8 +--
 virt/kvm/iommu.c         |   4 +-
 virt/kvm/kvm_main.c      | 141 +++++++++++++++++++++++++++++++++++------------
 8 files changed, 136 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0757c7027986..b2e4d16dd39e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1382,7 +1382,7 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 	int i, j;
 	unsigned long base_gfn;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 	for (i = 0; i < slots->nmemslots; i++) {
 		memslot = &slots->memslots[i];
 		base_gfn = memslot->base_gfn;
@@ -1837,6 +1837,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
+	down_write(&kvm->slots_lock);
 	spin_lock(&kvm->arch.dirty_log_lock);
 
 	r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1856,6 +1857,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	}
 	r = 0;
 out:
+	up_write(&kvm->slots_lock);
 	spin_unlock(&kvm->arch.dirty_log_lock);
 	return r;
 }
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 81f84d326a84..f8bf42a25995 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -29,6 +29,7 @@
 #include <linux/swap.h>
 #include <linux/hugetlb.h>
 #include <linux/compiler.h>
+#include <linux/srcu.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -807,21 +808,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 {
 	int i, j;
 	int retval = 0;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots;
+
+	slots = rcu_dereference(kvm->memslots);
 
-	/*
-	 * If mmap_sem isn't taken, we can look the memslots with only
-	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
-	 */
 	for (i = 0; i < slots->nmemslots; i++) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
-		/* mmu_lock protects userspace_addr */
-		if (!start)
-			continue;
-
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
@@ -1617,7 +1612,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 
 static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 {
-	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
+	int slot = memslot_id(kvm, gfn);
 	struct kvm_mmu_page *sp = page_header(__pa(pte));
 
 	__set_bit(slot, sp->slot_bitmap);
@@ -3021,9 +3016,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	int i;
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->memslots->nmemslots; i++)
-		nr_pages += kvm->memslots->memslots[i].npages;
+	slots = rcu_dereference(kvm->memslots);
+	for (i = 0; i < slots->nmemslots; i++)
+		nr_pages += slots->memslots[i].npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
@@ -3293,10 +3290,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
 static int count_rmaps(struct kvm_vcpu *vcpu)
 {
 	int nmaps = 0;
-	int i, j, k;
+	int i, j, k, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
+		struct kvm_memory_slot *m = &slots->memslots[i];
 		struct kvm_rmap_desc *d;
 
 		for (j = 0; j < m->npages; ++j) {
@@ -3319,6 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
 			}
 		}
 	}
+	srcu_read_unlock(&kvm->srcu, idx);
 	return nmaps;
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 18698799e365..f1cae7d6113d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1503,7 +1503,11 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
-		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
+		struct kvm_memslots *slots;
+		gfn_t base_gfn;
+
+		slots = rcu_dereference(kvm->memslots);
+		base_gfn = kvm->memslots->memslots[0].base_gfn +
 				 kvm->memslots->memslots[0].npages - 3;
 		return base_gfn << PAGE_SHIFT;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a24de0b1858e..f2feef68ffd6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -103,7 +103,7 @@ struct kvm_userspace_memory_region {
 
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL
-
+#define KVM_MEMSLOT_INVALID      (1UL << 1)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9af240387fe6..93bd30701ca7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,6 +162,7 @@ struct kvm {
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
+	struct srcu_struct srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
@@ -275,6 +276,7 @@ void kvm_set_page_accessed(struct page *page);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn);
+int memslot_id(struct kvm *kvm, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
@@ -490,11 +492,6 @@ static inline void kvm_guest_exit(void)
 	current->flags &= ~PF_VCPU;
 }
 
-static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-	return slot - kvm->memslots->memslots;
-}
-
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
 {
 	return (gpa_t)gfn << PAGE_SHIFT;
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f73de631e3ee..f51e684dd238 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -504,12 +504,12 @@ out:
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 				      struct kvm_assigned_pci_dev *assigned_dev)
 {
-	int r = 0;
+	int r = 0, idx;
 	struct kvm_assigned_dev_kernel *match;
 	struct pci_dev *dev;
 
 	mutex_lock(&kvm->lock);
-	down_read(&kvm->slots_lock);
+	idx = srcu_read_lock(&kvm->srcu);
 
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_dev->assigned_dev_id);
@@ -573,7 +573,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	}
 
 out:
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	mutex_unlock(&kvm->lock);
 	return r;
 out_list_del:
@@ -585,7 +585,7 @@ out_put:
 	pci_dev_put(dev);
 out_free:
 	kfree(match);
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	mutex_unlock(&kvm->lock);
 	return r;
 }
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index cf567d8033db..65a51432c8e5 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	int i, r = 0;
 	struct kvm_memslots *slots;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; i++) {
 		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
@@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 	int i;
 	struct kvm_memslots *slots;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; i++) {
 		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 87d296d8b270..2bb24a814fdf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -44,6 +44,7 @@
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/compat.h>
+#include <linux/srcu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -213,7 +214,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 					     unsigned long address)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int need_tlb_flush;
+	int need_tlb_flush, idx;
 
 	/*
 	 * When ->invalidate_page runs, the linux pte has been zapped
@@ -233,10 +234,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 	 * pte after kvm_unmap_hva returned, without noticing the page
 	 * is going to be freed.
 	 */
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	kvm->mmu_notifier_seq++;
 	need_tlb_flush = kvm_unmap_hva(kvm, address);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	/* we've to flush the tlb before the pages can be freed */
 	if (need_tlb_flush)
@@ -250,11 +253,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
 					pte_t pte)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	kvm->mmu_notifier_seq++;
 	kvm_set_spte_hva(kvm, address, pte);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 }
 
 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -263,8 +269,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						    unsigned long end)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int need_tlb_flush = 0;
+	int need_tlb_flush = 0, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	/*
 	 * The count increase must become visible at unlock time as no
@@ -275,6 +282,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	for (; start < end; start += PAGE_SIZE)
 		need_tlb_flush |= kvm_unmap_hva(kvm, start);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	/* we've to flush the tlb before the pages can be freed */
 	if (need_tlb_flush)
@@ -312,11 +320,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 					      unsigned long address)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int young;
+	int young, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	young = kvm_age_hva(kvm, address);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	if (young)
 		kvm_flush_remote_tlbs(kvm);
@@ -379,11 +389,15 @@ static struct kvm *kvm_create_vm(void)
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
 		goto out_err;
+	if (init_srcu_struct(&kvm->srcu))
+		goto out_err;
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page)
+	if (!page) {
+		cleanup_srcu_struct(&kvm->srcu);
 		goto out_err;
+	}
 
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -391,6 +405,7 @@ static struct kvm *kvm_create_vm(void)
 
 	r = kvm_init_mmu_notifier(kvm);
 	if (r) {
+		cleanup_srcu_struct(&kvm->srcu);
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 		put_page(page);
 #endif
@@ -480,6 +495,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #else
 	kvm_arch_flush_shadow(kvm);
 #endif
+	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_destroy_vm(kvm);
 	hardware_disable_all();
 	mmdrop(mm);
@@ -521,12 +537,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc)
 {
-	int r;
+	int r, flush_shadow = 0;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long i;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot old, new;
+	struct kvm_memslots *slots, *old_memslots;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -588,15 +605,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
 		new.user_alloc = user_alloc;
-		/*
-		 * hva_to_rmmap() serialzies with the mmu_lock and to be
-		 * safe it has to ignore memslots with !user_alloc &&
-		 * !userspace_addr.
-		 */
-		if (user_alloc)
-			new.userspace_addr = mem->userspace_addr;
-		else
-			new.userspace_addr = 0;
+		new.userspace_addr = mem->userspace_addr;
 	}
 	if (!npages)
 		goto skip_lpage;
@@ -651,8 +660,9 @@ skip_lpage:
 		if (!new.dirty_bitmap)
 			goto out_free;
 		memset(new.dirty_bitmap, 0, dirty_bytes);
+		/* destroy any largepage mappings for dirty tracking */
 		if (old.npages)
-			kvm_arch_flush_shadow(kvm);
+			flush_shadow = 1;
 	}
 #else  /* not defined CONFIG_S390 */
 	new.user_alloc = user_alloc;
@@ -660,34 +670,72 @@ skip_lpage:
 		new.userspace_addr = mem->userspace_addr;
 #endif /* not defined CONFIG_S390 */
 
-	if (!npages)
+	if (!npages) {
+		r = -ENOMEM;
+		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+		if (!slots)
+			goto out_free;
+		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+		if (mem->slot >= slots->nmemslots)
+			slots->nmemslots = mem->slot + 1;
+		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+
+		old_memslots = kvm->memslots;
+		rcu_assign_pointer(kvm->memslots, slots);
+		synchronize_srcu_expedited(&kvm->srcu);
+		/* From this point no new shadow pages pointing to a deleted
+		 * memslot will be created.
+		 *
+		 * validation of sp->gfn happens in:
+		 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+		 * 	- kvm_is_visible_gfn (mmu_check_roots)
+		 */
 		kvm_arch_flush_shadow(kvm);
+		kfree(old_memslots);
+	}
 
 	r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
 	if (r)
 		goto out_free;
 
-	spin_lock(&kvm->mmu_lock);
-	if (mem->slot >= kvm->memslots->nmemslots)
-		kvm->memslots->nmemslots = mem->slot + 1;
+#ifdef CONFIG_DMAR
+	/* map the pages in iommu page table */
+	if (npages) {
+		r = kvm_iommu_map_pages(kvm, &new);
+		if (r)
+			goto out_free;
+	}
+#endif
 
-	*memslot = new;
-	spin_unlock(&kvm->mmu_lock);
+	r = -ENOMEM;
+	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!slots)
+		goto out_free;
+	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+	if (mem->slot >= slots->nmemslots)
+		slots->nmemslots = mem->slot + 1;
+
+	/* actual memory is freed via old in kvm_free_physmem_slot below */
+	if (!npages) {
+		new.rmap = NULL;
+		new.dirty_bitmap = NULL;
+		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
+			new.lpage_info[i] = NULL;
+	}
+
+	slots->memslots[mem->slot] = new;
+	old_memslots = kvm->memslots;
+	rcu_assign_pointer(kvm->memslots, slots);
+	synchronize_srcu_expedited(&kvm->srcu);
 
 	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
-	kvm_free_physmem_slot(&old, npages ? &new : NULL);
-	/* Slot deletion case: we have to update the current slot */
-	spin_lock(&kvm->mmu_lock);
-	if (!npages)
-		*memslot = old;
-	spin_unlock(&kvm->mmu_lock);
-#ifdef CONFIG_DMAR
-	/* map the pages in iommu page table */
-	r = kvm_iommu_map_pages(kvm, memslot);
-	if (r)
-		goto out;
-#endif
+	kvm_free_physmem_slot(&old, &new);
+	kfree(old_memslots);
+
+	if (flush_shadow)
+		kvm_arch_flush_shadow(kvm);
+
 	return 0;
 
 out_free:
@@ -787,7 +835,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -809,12 +857,15 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
 	gfn = unalias_gfn(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
+		if (memslot->flags & KVM_MEMSLOT_INVALID)
+			continue;
+
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
 			return 1;
@@ -823,13 +874,31 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
+int memslot_id(struct kvm *kvm, gfn_t gfn)
+{
+	int i;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
+	struct kvm_memory_slot *memslot = NULL;
+
+	gfn = unalias_gfn(kvm, gfn);
+	for (i = 0; i < slots->nmemslots; ++i) {
+		memslot = &slots->memslots[i];
+
+		if (gfn >= memslot->base_gfn
+		    && gfn < memslot->base_gfn + memslot->npages)
+			break;
+	}
+
+	return memslot - slots->memslots;
+}
+
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
 
 	gfn = unalias_gfn(kvm, gfn);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
-	if (!slot)
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
-- 
cgit v1.2.3


From a983fb238728e1123177e8058d4f644b949a7d05 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:23 -0200
Subject: KVM: x86: switch kvm_set_memory_alias to SRCU update

Using a similar two-step procedure as for memslots.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  4 +++
 arch/x86/kvm/x86.c              | 60 ++++++++++++++++++++++++++++++++++-------
 include/linux/kvm_host.h        |  6 +++++
 virt/kvm/kvm_main.c             |  4 +--
 4 files changed, 63 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7cdcb3d0f770..6c8c7c578c46 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -368,8 +368,12 @@ struct kvm_mem_alias {
 	gfn_t base_gfn;
 	unsigned long npages;
 	gfn_t target_gfn;
+#define KVM_ALIAS_INVALID     1UL
+	unsigned long flags;
 };
 
+#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
+
 struct kvm_mem_aliases {
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
 	int naliases;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e7488350ca16..28127c936c3b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -38,6 +38,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
 #include <linux/user-return-notifier.h>
+#include <linux/srcu.h>
 #include <trace/events/kvm.h>
 #undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
@@ -2223,11 +2224,32 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 	return kvm->arch.n_alloc_mmu_pages;
 }
 
+gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
+{
+	int i;
+	struct kvm_mem_alias *alias;
+	struct kvm_mem_aliases *aliases;
+
+	aliases = rcu_dereference(kvm->arch.aliases);
+
+	for (i = 0; i < aliases->naliases; ++i) {
+		alias = &aliases->aliases[i];
+		if (alias->flags & KVM_ALIAS_INVALID)
+			continue;
+		if (gfn >= alias->base_gfn
+		    && gfn < alias->base_gfn + alias->npages)
+			return alias->target_gfn + gfn - alias->base_gfn;
+	}
+	return gfn;
+}
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
 	struct kvm_mem_alias *alias;
-	struct kvm_mem_aliases *aliases = kvm->arch.aliases;
+	struct kvm_mem_aliases *aliases;
+
+	aliases = rcu_dereference(kvm->arch.aliases);
 
 	for (i = 0; i < aliases->naliases; ++i) {
 		alias = &aliases->aliases[i];
@@ -2248,7 +2270,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 {
 	int r, n;
 	struct kvm_mem_alias *p;
-	struct kvm_mem_aliases *aliases;
+	struct kvm_mem_aliases *aliases, *old_aliases;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -2265,28 +2287,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	    < alias->target_phys_addr)
 		goto out;
 
+	r = -ENOMEM;
+	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!aliases)
+		goto out;
+
 	down_write(&kvm->slots_lock);
-	spin_lock(&kvm->mmu_lock);
 
-	aliases = kvm->arch.aliases;
+	/* invalidate any gfn reference in case of deletion/shrinking */
+	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
+	aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
+	old_aliases = kvm->arch.aliases;
+	rcu_assign_pointer(kvm->arch.aliases, aliases);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kvm_mmu_zap_all(kvm);
+	kfree(old_aliases);
+
+	r = -ENOMEM;
+	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!aliases)
+		goto out_unlock;
+
+	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
 
 	p = &aliases->aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
 	p->npages = alias->memory_size >> PAGE_SHIFT;
 	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
+	p->flags &= ~(KVM_ALIAS_INVALID);
 
 	for (n = KVM_ALIAS_SLOTS; n > 0; --n)
 		if (aliases->aliases[n - 1].npages)
 			break;
 	aliases->naliases = n;
 
-	spin_unlock(&kvm->mmu_lock);
-	kvm_mmu_zap_all(kvm);
+	old_aliases = kvm->arch.aliases;
+	rcu_assign_pointer(kvm->arch.aliases, aliases);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(old_aliases);
+	r = 0;
 
+out_unlock:
 	up_write(&kvm->slots_lock);
-
-	return 0;
-
 out:
 	return r;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 93bd30701ca7..20941c0f4045 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -266,6 +266,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
+gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn);
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
@@ -539,6 +541,10 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
 }
 #endif
 
+#ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION
+#define unalias_gfn_instantiation unalias_gfn
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 
 #define KVM_MAX_IRQ_ROUTES 1024
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2bb24a814fdf..c680f7b64c6f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -859,7 +859,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 	int i;
 	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn_instantiation(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
@@ -896,7 +896,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn_instantiation(kvm, gfn);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
-- 
cgit v1.2.3


From e93f8a0f821e290ac5149830110a5f704db7a1fc Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:24 -0200
Subject: KVM: convert io_bus to SRCU

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  |   4 +-
 arch/x86/kvm/i8254.c      |   6 +--
 arch/x86/kvm/i8259.c      |   4 +-
 arch/x86/kvm/x86.c        |  13 +++---
 include/linux/kvm_host.h  |  27 ++++++------
 virt/kvm/coalesced_mmio.c |   4 +-
 virt/kvm/eventfd.c        |   8 ++--
 virt/kvm/ioapic.c         |   4 +-
 virt/kvm/kvm_main.c       | 106 +++++++++++++++++++++++++++-------------------
 9 files changed, 101 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index b2e4d16dd39e..d0ad538f0083 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -241,10 +241,10 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return 0;
 mmio:
 	if (p->dir)
-		r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
+		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
 				    p->size, &p->data);
 	else
-		r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
+		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
 				     p->size, &p->data);
 	if (r)
 		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 15578f180e59..4b433de02e5b 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -645,13 +645,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
 	if (ret < 0)
 		goto fail;
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
-		ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
+		ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
 						&pit->speaker_dev);
 		if (ret < 0)
 			goto fail_unregister;
@@ -660,7 +660,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	return pit;
 
 fail_unregister:
-	__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
 
 fail:
 	if (pit->irq_source_id >= 0)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index d057c0cbd245..b7d145b20953 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -533,7 +533,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	 * Initialize PIO device
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
-	ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0) {
 		kfree(s);
 		return NULL;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 28127c936c3b..9b42673df4af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2851,7 +2851,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
 	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
 		return 0;
 
-	return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
+	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2860,7 +2860,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
 		return 0;
 
-	return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
+	return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
@@ -3345,11 +3345,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 	int r;
 
 	if (vcpu->arch.pio.in)
-		r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
 				    vcpu->arch.pio.size, pd);
 	else
-		r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
-				     vcpu->arch.pio.size, pd);
+		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+				     vcpu->arch.pio.port, vcpu->arch.pio.size,
+				     pd);
 	return r;
 }
 
@@ -3360,7 +3361,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
 	int i, r = 0;
 
 	for (i = 0; i < io->cur_count; i++) {
-		if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
+		if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
 				     io->port, io->size, pd)) {
 			r = -EOPNOTSUPP;
 			break;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 20941c0f4045..5e9cb902550b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -57,20 +57,20 @@ struct kvm_io_bus {
 	struct kvm_io_device *devs[NR_IOBUS_DEVS];
 };
 
-void kvm_io_bus_init(struct kvm_io_bus *bus);
-void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
-		     const void *val);
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
+enum kvm_bus {
+	KVM_MMIO_BUS,
+	KVM_PIO_BUS,
+	KVM_NR_BUSES
+};
+
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+		     int len, const void *val);
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
 		    void *val);
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev);
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			    struct kvm_io_device *dev);
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
-				 struct kvm_io_device *dev);
-void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev);
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			      struct kvm_io_device *dev);
 
 struct kvm_vcpu {
 	struct kvm *kvm;
@@ -171,8 +171,7 @@ struct kvm {
 	atomic_t online_vcpus;
 	struct list_head vm_list;
 	struct mutex lock;
-	struct kvm_io_bus mmio_bus;
-	struct kvm_io_bus pio_bus;
+	struct kvm_io_bus *buses[KVM_NR_BUSES];
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 	struct {
 		spinlock_t        lock;
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index d68e6c68e0ff..a736a93ca7b7 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -110,7 +110,9 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
 
-	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0)
 		goto out_free_dev;
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a9d3fc6c681c..315a586ec4d5 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -463,7 +463,7 @@ static int
 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p;
 	struct eventfd_ctx       *eventfd;
 	int                       ret;
@@ -518,7 +518,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
 
-	ret = __kvm_io_bus_register_dev(bus, &p->dev);
+	ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
 	if (ret < 0)
 		goto unlock_fail;
 
@@ -542,7 +542,7 @@ static int
 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
 	int                       ret = -ENOENT;
@@ -565,7 +565,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 		if (!p->wildcard && p->datamatch != args->datamatch)
 			continue;
 
-		__kvm_io_bus_unregister_dev(bus, &p->dev);
+		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
 		ioeventfd_release(p);
 		ret = 0;
 		break;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 38a2d20b89de..f326a6f301cc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -372,7 +372,9 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
-	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0)
 		kfree(ioapic);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c680f7b64c6f..659bc12ad16a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -85,6 +85,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 static int hardware_enable_all(void);
 static void hardware_disable_all(void);
 
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+
 static bool kvm_rebooting;
 
 static bool largepages_enabled = true;
@@ -367,7 +369,7 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 
 static struct kvm *kvm_create_vm(void)
 {
-	int r = 0;
+	int r = 0, i;
 	struct kvm *kvm = kvm_arch_create_vm();
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct page *page;
@@ -391,6 +393,14 @@ static struct kvm *kvm_create_vm(void)
 		goto out_err;
 	if (init_srcu_struct(&kvm->srcu))
 		goto out_err;
+	for (i = 0; i < KVM_NR_BUSES; i++) {
+		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
+					GFP_KERNEL);
+		if (!kvm->buses[i]) {
+			cleanup_srcu_struct(&kvm->srcu);
+			goto out_err;
+		}
+	}
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -416,11 +426,9 @@ static struct kvm *kvm_create_vm(void)
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
 	spin_lock_init(&kvm->requests_lock);
-	kvm_io_bus_init(&kvm->pio_bus);
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
-	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
 	spin_lock(&kvm_lock);
@@ -435,6 +443,8 @@ out:
 out_err:
 	hardware_disable_all();
 out_err_nodisable:
+	for (i = 0; i < KVM_NR_BUSES; i++)
+		kfree(kvm->buses[i]);
 	kfree(kvm->memslots);
 	kfree(kvm);
 	return ERR_PTR(r);
@@ -480,6 +490,7 @@ void kvm_free_physmem(struct kvm *kvm)
 
 static void kvm_destroy_vm(struct kvm *kvm)
 {
+	int i;
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
@@ -487,8 +498,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
-	kvm_io_bus_destroy(&kvm->pio_bus);
-	kvm_io_bus_destroy(&kvm->mmio_bus);
+	for (i = 0; i < KVM_NR_BUSES; i++)
+		kvm_io_bus_destroy(kvm->buses[i]);
 	kvm_coalesced_mmio_free(kvm);
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -1949,12 +1960,7 @@ static struct notifier_block kvm_reboot_notifier = {
 	.priority = 0,
 };
 
-void kvm_io_bus_init(struct kvm_io_bus *bus)
-{
-	memset(bus, 0, sizeof(*bus));
-}
-
-void kvm_io_bus_destroy(struct kvm_io_bus *bus)
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 {
 	int i;
 
@@ -1963,13 +1969,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 
 		kvm_iodevice_destructor(pos);
 	}
+	kfree(bus);
 }
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val)
 {
 	int i;
+	struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
 	for (i = 0; i < bus->dev_count; i++)
 		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
 			return 0;
@@ -1977,59 +1985,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
 }
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+		    int len, void *val)
 {
 	int i;
+	struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
+
 	for (i = 0; i < bus->dev_count; i++)
 		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
 			return 0;
 	return -EOPNOTSUPP;
 }
 
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			     struct kvm_io_device *dev)
+/* Caller must have write lock on slots_lock. */
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			    struct kvm_io_device *dev)
 {
-	int ret;
-
-	down_write(&kvm->slots_lock);
-	ret = __kvm_io_bus_register_dev(bus, dev);
-	up_write(&kvm->slots_lock);
+	struct kvm_io_bus *new_bus, *bus;
 
-	return ret;
-}
-
-/* An unlocked version. Caller must have write lock on slots_lock. */
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			      struct kvm_io_device *dev)
-{
+	bus = kvm->buses[bus_idx];
 	if (bus->dev_count > NR_IOBUS_DEVS-1)
 		return -ENOSPC;
 
-	bus->devs[bus->dev_count++] = dev;
+	new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+	if (!new_bus)
+		return -ENOMEM;
+	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+	new_bus->devs[new_bus->dev_count++] = dev;
+	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(bus);
 
 	return 0;
 }
 
-void kvm_io_bus_unregister_dev(struct kvm *kvm,
-			       struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev)
+/* Caller must have write lock on slots_lock. */
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			      struct kvm_io_device *dev)
 {
-	down_write(&kvm->slots_lock);
-	__kvm_io_bus_unregister_dev(bus, dev);
-	up_write(&kvm->slots_lock);
-}
+	int i, r;
+	struct kvm_io_bus *new_bus, *bus;
 
-/* An unlocked version. Caller must have write lock on slots_lock. */
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
-				 struct kvm_io_device *dev)
-{
-	int i;
+	new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+	if (!new_bus)
+		return -ENOMEM;
 
-	for (i = 0; i < bus->dev_count; i++)
-		if (bus->devs[i] == dev) {
-			bus->devs[i] = bus->devs[--bus->dev_count];
+	bus = kvm->buses[bus_idx];
+	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+
+	r = -ENOENT;
+	for (i = 0; i < new_bus->dev_count; i++)
+		if (new_bus->devs[i] == dev) {
+			r = 0;
+			new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
 			break;
 		}
+
+	if (r) {
+		kfree(new_bus);
+		return r;
+	}
+
+	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(bus);
+	return r;
 }
 
 static struct notifier_block kvm_cpu_notifier = {
-- 
cgit v1.2.3


From f656ce0185cabbbb0cf96877306879661297c7ad Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:25 -0200
Subject: KVM: switch vcpu context to use SRCU

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c | 15 ++++++---------
 arch/s390/kvm/kvm-s390.h | 10 +++++++---
 arch/x86/kvm/mmu.c       |  7 +++----
 arch/x86/kvm/vmx.c       |  6 +++---
 arch/x86/kvm/x86.c       | 43 ++++++++++++++++++++++++-------------------
 include/linux/kvm_host.h |  2 ++
 6 files changed, 45 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d0ad538f0083..d5e384641275 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -636,12 +636,9 @@ static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	union context *host_ctx, *guest_ctx;
-	int r;
+	int r, idx;
 
-	/*
-	 * down_read() may sleep and return with interrupts enabled
-	 */
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 again:
 	if (signal_pending(current)) {
@@ -663,7 +660,7 @@ again:
 	if (r < 0)
 		goto vcpu_run_fail;
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	kvm_guest_enter();
 
 	/*
@@ -687,7 +684,7 @@ again:
 	kvm_guest_exit();
 	preempt_enable();
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	r = kvm_handle_exit(kvm_run, vcpu);
 
@@ -697,10 +694,10 @@ again:
 	}
 
 out:
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (r > 0) {
 		kvm_resched(vcpu);
-		down_read(&vcpu->kvm->slots_lock);
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		goto again;
 	}
 
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 06cce8285ba0..60f09ab3672c 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -67,10 +67,14 @@ static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
 
 static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
 {
+	int idx;
 	struct kvm_memory_slot *mem;
+	struct kvm_memslots *memslots;
 
-	down_read(&vcpu->kvm->slots_lock);
-	mem = &vcpu->kvm->memslots[0];
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	memslots = rcu_dereference(vcpu->kvm->memslots);
+
+	mem = &memslots->memslots[0];
 
 	vcpu->arch.sie_block->gmsor = mem->userspace_addr;
 	vcpu->arch.sie_block->gmslm =
@@ -78,7 +82,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
 		(mem->npages << PAGE_SHIFT) +
 		VIRTIODESCSPACE - 1ul;
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 /* implemented in priv.c */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f8bf42a25995..25aabd00aa01 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2933,10 +2933,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
 	spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
-		int npages;
+		int npages, idx;
 
-		if (!down_read_trylock(&kvm->slots_lock))
-			continue;
+		idx = srcu_read_lock(&kvm->srcu);
 		spin_lock(&kvm->mmu_lock);
 		npages = kvm->arch.n_alloc_mmu_pages -
 			 kvm->arch.n_free_mmu_pages;
@@ -2949,7 +2948,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
 		nr_to_scan--;
 
 		spin_unlock(&kvm->mmu_lock);
-		up_read(&kvm->slots_lock);
+		srcu_read_unlock(&kvm->srcu, idx);
 	}
 	if (kvm_freed)
 		list_move_tail(&kvm_freed->vm_list, &vm_list);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f1cae7d6113d..22ab7137d1d0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2478,10 +2478,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u64 msr;
-	int ret;
+	int ret, idx;
 
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	if (!init_rmode(vmx->vcpu.kvm)) {
 		ret = -ENOMEM;
 		goto out;
@@ -2589,7 +2589,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	vmx->emulation_required = 0;
 
 out:
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	return ret;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b42673df4af..53bc06a68105 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1306,15 +1306,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
 		    int (*do_msr)(struct kvm_vcpu *vcpu,
 				  unsigned index, u64 *data))
 {
-	int i;
+	int i, idx;
 
 	vcpu_load(vcpu);
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	for (i = 0; i < msrs->nmsrs; ++i)
 		if (do_msr(vcpu, entries[i].index, &entries[i].data))
 			break;
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	vcpu_put(vcpu);
 
@@ -3900,14 +3900,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
 static void vapic_exit(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	int idx;
 
 	if (!apic || !apic->vapic_addr)
 		return;
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	kvm_release_page_dirty(apic->vapic_page);
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -4036,7 +4037,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		kvm_lapic_sync_to_vapic(vcpu);
 	}
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
 	kvm_guest_enter();
 
@@ -4078,7 +4079,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	preempt_enable();
 
-	down_read(&vcpu->kvm->slots_lock);
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	/*
 	 * Profile KVM exit RIPs:
@@ -4100,6 +4101,7 @@ out:
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int r;
+	struct kvm *kvm = vcpu->kvm;
 
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
 		pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -4111,7 +4113,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	}
 
-	down_read(&vcpu->kvm->slots_lock);
+	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 	vapic_enter(vcpu);
 
 	r = 1;
@@ -4119,9 +4121,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 			r = vcpu_enter_guest(vcpu);
 		else {
-			up_read(&vcpu->kvm->slots_lock);
+			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_vcpu_block(vcpu);
-			down_read(&vcpu->kvm->slots_lock);
+			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
 			{
 				switch(vcpu->arch.mp_state) {
@@ -4156,13 +4158,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 			++vcpu->stat.signal_exits;
 		}
 		if (need_resched()) {
-			up_read(&vcpu->kvm->slots_lock);
+			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_resched(vcpu);
-			down_read(&vcpu->kvm->slots_lock);
+			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 		}
 	}
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 	post_kvm_run_save(vcpu);
 
 	vapic_exit(vcpu);
@@ -4201,10 +4203,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
 
-		down_read(&vcpu->kvm->slots_lock);
+		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
 					EMULTYPE_NO_DECODE);
-		up_read(&vcpu->kvm->slots_lock);
+		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 		if (r == EMULATE_DO_MMIO) {
 			/*
 			 * Read-modify-write.  Back to userspace.
@@ -4967,11 +4969,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 {
 	unsigned long vaddr = tr->linear_address;
 	gpa_t gpa;
+	int idx;
 
 	vcpu_load(vcpu);
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	tr->physical_address = gpa;
 	tr->valid = gpa != UNMAPPED_GVA;
 	tr->writeable = 1;
@@ -5223,11 +5226,13 @@ fail:
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
+	int idx;
+
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	kvm_mmu_destroy(vcpu);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5e9cb902550b..0bb9aa295e6c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -83,6 +83,8 @@ struct kvm_vcpu {
 	struct kvm_run *run;
 	unsigned long requests;
 	unsigned long guest_debug;
+	int srcu_idx;
+
 	int fpu_active;
 	int guest_fpu_loaded;
 	wait_queue_head_t wq;
-- 
cgit v1.2.3


From 79fac95ecfa3969aab8119d37ccd7226165f933a Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:26 -0200
Subject: KVM: convert slots_lock to a mutex

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  |  4 ++--
 arch/powerpc/kvm/book3s.c |  4 ++--
 arch/x86/kvm/i8254.c      |  2 +-
 arch/x86/kvm/i8259.c      |  4 ++--
 arch/x86/kvm/vmx.c        |  8 ++++----
 arch/x86/kvm/x86.c        | 16 ++++++++--------
 include/linux/kvm_host.h  |  2 +-
 virt/kvm/coalesced_mmio.c | 14 +++++++-------
 virt/kvm/eventfd.c        | 10 +++++-----
 virt/kvm/ioapic.c         |  4 ++--
 virt/kvm/kvm_main.c       | 10 +++++-----
 11 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d5e384641275..e6ac549f8d55 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1834,7 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	spin_lock(&kvm->arch.dirty_log_lock);
 
 	r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1854,7 +1854,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	}
 	r = 0;
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	spin_unlock(&kvm->arch.dirty_log_lock);
 	return r;
 }
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index bb8873dcb20f..492dcc198dd3 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -857,7 +857,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	int is_dirty = 0;
 	int r, n;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 	if (r)
@@ -879,7 +879,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	r = 0;
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4b433de02e5b..6a74246f80c6 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -605,7 +605,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
 	.write    = speaker_ioport_write,
 };
 
-/* Caller must have writers lock on slots_lock */
+/* Caller must hold slots_lock */
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
 	struct kvm_pit *pit;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index b7d145b20953..d5753a75d58c 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -533,9 +533,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	 * Initialize PIO device
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0) {
 		kfree(s);
 		return NULL;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 22ab7137d1d0..f04e2ff21383 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2223,7 +2223,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page)
 		goto out;
 	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -2236,7 +2236,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -2245,7 +2245,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.ept_identity_pagetable)
 		goto out;
 	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
@@ -2260,7 +2260,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
 			kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 53bc06a68105..aff3479867a8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2208,14 +2208,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	spin_lock(&kvm->mmu_lock);
 
 	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
 	spin_unlock(&kvm->mmu_lock);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return 0;
 }
 
@@ -2292,7 +2292,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	if (!aliases)
 		goto out;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	/* invalidate any gfn reference in case of deletion/shrinking */
 	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
@@ -2328,7 +2328,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	r = 0;
 
 out_unlock:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 out:
 	return r;
 }
@@ -2462,7 +2462,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	unsigned long is_dirty = 0;
 	unsigned long *dirty_bitmap = NULL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	r = -EINVAL;
 	if (log->slot >= KVM_MEMORY_SLOTS)
@@ -2512,7 +2512,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 out_free:
 	vfree(dirty_bitmap);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -2625,7 +2625,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 				   sizeof(struct kvm_pit_config)))
 			goto out;
 	create_pit:
-		down_write(&kvm->slots_lock);
+		mutex_lock(&kvm->slots_lock);
 		r = -EEXIST;
 		if (kvm->arch.vpit)
 			goto create_pit_unlock;
@@ -2634,7 +2634,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (kvm->arch.vpit)
 			r = 0;
 	create_pit_unlock:
-		up_write(&kvm->slots_lock);
+		mutex_unlock(&kvm->slots_lock);
 		break;
 	case KVM_IRQ_LINE_STATUS:
 	case KVM_IRQ_LINE: {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0bb9aa295e6c..bb0314ea9267 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -161,7 +161,7 @@ struct kvm_memslots {
 struct kvm {
 	spinlock_t mmu_lock;
 	spinlock_t requests_lock;
-	struct rw_semaphore slots_lock;
+	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
 	struct srcu_struct srcu;
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index a736a93ca7b7..5de6594260cb 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -110,9 +110,9 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0)
 		goto out_free_dev;
 
@@ -140,16 +140,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 	if (dev == NULL)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
-		up_write(&kvm->slots_lock);
+		mutex_unlock(&kvm->slots_lock);
 		return -ENOBUFS;
 	}
 
 	dev->zone[dev->nb_zones] = *zone;
 	dev->nb_zones++;
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return 0;
 }
 
@@ -163,7 +163,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 	if (dev == NULL)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	i = dev->nb_zones;
 	while(i) {
@@ -181,7 +181,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 		i--;
 	}
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	return 0;
 }
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 315a586ec4d5..486c604365d9 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -508,7 +508,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	else
 		p->wildcard = true;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	/* Verify that there isnt a match already */
 	if (ioeventfd_check_collision(kvm, p)) {
@@ -524,12 +524,12 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	list_add_tail(&p->list, &kvm->ioeventfds);
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	return 0;
 
 unlock_fail:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 fail:
 	kfree(p);
@@ -551,7 +551,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	if (IS_ERR(eventfd))
 		return PTR_ERR(eventfd);
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
 		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
@@ -571,7 +571,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 		break;
 	}
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	eventfd_ctx_put(eventfd);
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index f326a6f301cc..f01392f51e86 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -372,9 +372,9 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0)
 		kfree(ioapic);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 659bc12ad16a..2b7cd6c0d9ca 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -429,7 +429,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
-	init_rwsem(&kvm->slots_lock);
+	mutex_init(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
@@ -763,9 +763,9 @@ int kvm_set_memory_region(struct kvm *kvm,
 {
 	int r;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	r = __kvm_set_memory_region(kvm, mem, user_alloc);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -1997,7 +1997,7 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 	return -EOPNOTSUPP;
 }
 
-/* Caller must have write lock on slots_lock. */
+/* Caller must hold slots_lock. */
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			    struct kvm_io_device *dev)
 {
@@ -2019,7 +2019,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	return 0;
 }
 
-/* Caller must have write lock on slots_lock. */
+/* Caller must hold slots_lock. */
 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			      struct kvm_io_device *dev)
 {
-- 
cgit v1.2.3


From 02daab21d94dc4cf01b2fd09863d59a436900322 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 30 Dec 2009 12:40:26 +0200
Subject: KVM: Lazify fpu activation and deactivation

Defer fpu deactivation as much as possible - if the guest fpu is loaded, keep
it loaded until the next heavyweight exit (where we are forced to unload it).
This reduces unnecessary exits.

We also defer fpu activation on clts; while clts signals the intent to use the
fpu, we can't be sure the guest will actually use it.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c              | 35 +++++++++++++++++++++--------------
 arch/x86/kvm/vmx.c              | 25 +++++++++----------------
 arch/x86/kvm/x86.c              |  7 ++++++-
 include/linux/kvm_host.h        |  1 +
 5 files changed, 38 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 693046a7a12d..93bee7abb71c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -506,6 +506,7 @@ struct kvm_x86_ops {
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3899c2d19830..5b336a80f31e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -984,17 +984,11 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	if (npt_enabled)
 		goto set;
 
-	if (kvm_read_cr0_bits(vcpu, X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
-		svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
-		vcpu->fpu_active = 1;
-	}
-
 	vcpu->arch.cr0 = cr0;
 	cr0 |= X86_CR0_PG | X86_CR0_WP;
-	if (!vcpu->fpu_active) {
-		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+
+	if (!vcpu->fpu_active)
 		cr0 |= X86_CR0_TS;
-	}
 set:
 	/*
 	 * re-enable caching here because the QEMU bios
@@ -1250,6 +1244,8 @@ static int nm_interception(struct vcpu_svm *svm)
 	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
 	if (!kvm_read_cr0_bits(&svm->vcpu, X86_CR0_TS))
 		svm->vmcb->save.cr0 &= ~X86_CR0_TS;
+	else
+		svm->vmcb->save.cr0 |= X86_CR0_TS;
 	svm->vcpu.fpu_active = 1;
 
 	return 1;
@@ -2586,6 +2582,8 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu)
 
 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 {
+	if (npt_enabled)
+		vcpu->fpu_active = 1;
 }
 
 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
@@ -2805,12 +2803,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 
 	svm->vmcb->save.cr3 = root;
 	force_new_asid(vcpu);
-
-	if (vcpu->fpu_active) {
-		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
-		svm->vmcb->save.cr0 |= X86_CR0_TS;
-		vcpu->fpu_active = 0;
-	}
 }
 
 static int is_disabled(void)
@@ -2926,6 +2918,20 @@ static bool svm_rdtscp_supported(void)
 	return false;
 }
 
+static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (npt_enabled) {
+		/* hack: npt requires active fpu at this time */
+		vcpu->fpu_active = 1;
+		return;
+	}
+
+	svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
+	svm->vmcb->save.cr0 |= X86_CR0_TS;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -2967,6 +2973,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.cache_reg = svm_cache_reg,
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
+	.fpu_deactivate = svm_fpu_deactivate,
 
 	.tlb_flush = svm_flush_tlb,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dbcdb55094f7..d11be3fb7c80 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -66,7 +66,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 #define KVM_GUEST_CR0_MASK						\
 	(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
-	(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_MP)
 #define KVM_VM_CR0_ALWAYS_ON						\
 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS				      \
@@ -579,9 +579,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
 	u32 eb;
 
-	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
-	if (!vcpu->fpu_active)
-		eb |= 1u << NM_VECTOR;
+	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR)
+		| (1u << NM_VECTOR);
 	/*
 	 * Unconditionally intercept #DB so we can maintain dr6 without
 	 * reading it every exit.
@@ -595,6 +594,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 		eb = ~0;
 	if (enable_ept)
 		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
+	if (vcpu->fpu_active)
+		eb &= ~(1u << NM_VECTOR);
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
@@ -806,9 +807,6 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
 
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->fpu_active)
-		return;
-	vcpu->fpu_active = 0;
 	vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
 	update_exception_bitmap(vcpu);
 }
@@ -1737,8 +1735,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	else
 		hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
 
-	vmx_fpu_deactivate(vcpu);
-
 	if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
 		enter_pmode(vcpu);
 
@@ -1757,12 +1753,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	if (enable_ept)
 		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
+	if (!vcpu->fpu_active)
+		hw_cr0 |= X86_CR0_TS;
+
 	vmcs_writel(CR0_READ_SHADOW, cr0);
 	vmcs_writel(GUEST_CR0, hw_cr0);
 	vcpu->arch.cr0 = cr0;
-
-	if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
-		vmx_fpu_activate(vcpu);
 }
 
 static u64 construct_eptp(unsigned long root_hpa)
@@ -1793,8 +1789,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 	vmx_flush_tlb(vcpu);
 	vmcs_writel(GUEST_CR3, guest_cr3);
-	if (kvm_read_cr0_bits(vcpu, X86_CR0_PE))
-		vmx_fpu_deactivate(vcpu);
 }
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -3002,11 +2996,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 		};
 		break;
 	case 2: /* clts */
-		vmx_fpu_deactivate(vcpu);
 		vcpu->arch.cr0 &= ~X86_CR0_TS;
 		vmcs_writel(CR0_READ_SHADOW, kvm_read_cr0(vcpu));
 		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
-		vmx_fpu_activate(vcpu);
 		skip_emulated_instruction(vcpu);
 		return 1;
 	case 1: /*mov from cr*/
@@ -4127,6 +4119,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.cache_reg = vmx_cache_reg,
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
+	.fpu_deactivate = vmx_fpu_deactivate,
 
 	.tlb_flush = vmx_flush_tlb,
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 748b15d8e46d..1de2ad7a004d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1509,8 +1509,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
+	kvm_x86_ops->vcpu_put(vcpu);
 }
 
 static int is_efer_nx(void)
@@ -4006,6 +4006,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
+			vcpu->fpu_active = 0;
+			kvm_x86_ops->fpu_deactivate(vcpu);
+		}
 	}
 
 	preempt_disable();
@@ -5075,6 +5079,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	kvm_fx_save(&vcpu->arch.guest_fx_image);
 	kvm_fx_restore(&vcpu->arch.host_fx_image);
 	++vcpu->stat.fpu_reload;
+	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 }
 EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bb0314ea9267..dfde04b0d453 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
 #define KVM_REQ_KICK               9
+#define KVM_REQ_DEACTIVATE_FPU    10
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3


From 55cd8e5a4edb8e235163ffe8264b9aaa8d7c050f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 17 Jan 2010 15:51:22 +0200
Subject: KVM: Implement bare minimum of HYPER-V MSRs

Minimum HYPER-V implementation should have GUEST_OS_ID, HYPERCALL and
VP_INDEX MSRs.

[avi: fix build on i386]

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Vadim Rozenfeld <vrozenfe@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   4 +
 arch/x86/include/asm/kvm_para.h |   1 +
 arch/x86/kvm/trace.h            |  32 +++++++
 arch/x86/kvm/x86.c              | 193 +++++++++++++++++++++++++++++++++++++++-
 include/linux/kvm.h             |   1 +
 5 files changed, 230 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 93bee7abb71c..67d19e422006 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,6 +413,10 @@ struct kvm_arch {
 	s64 kvmclock_offset;
 
 	struct kvm_xen_hvm_config xen_hvm_config;
+
+	/* fields used by HYPER-V emulation */
+	u64 hv_guest_os_id;
+	u64 hv_hypercall;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c584076a47f4..ffae1420e7d7 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_KVM_PARA_H
 
 #include <linux/types.h>
+#include <asm/hyperv.h>
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
  * should be used to determine that a VM is running under KVM.
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 816e0449db0b..1cb3d0e990f3 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -55,6 +55,38 @@ TRACE_EVENT(kvm_hypercall,
 		 __entry->a3)
 );
 
+/*
+ * Tracepoint for hypercall.
+ */
+TRACE_EVENT(kvm_hv_hypercall,
+	TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx,
+		 __u64 ingpa, __u64 outgpa),
+	TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
+
+	TP_STRUCT__entry(
+		__field(	__u16, 		code		)
+		__field(	bool,		fast		)
+		__field(	__u16,		rep_cnt		)
+		__field(	__u16,		rep_idx		)
+		__field(	__u64,		ingpa		)
+		__field(	__u64,		outgpa		)
+	),
+
+	TP_fast_assign(
+		__entry->code		= code;
+		__entry->fast		= fast;
+		__entry->rep_cnt	= rep_cnt;
+		__entry->rep_idx	= rep_idx;
+		__entry->ingpa		= ingpa;
+		__entry->outgpa		= outgpa;
+	),
+
+	TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
+		  __entry->code, __entry->fast ? "fast" : "slow",
+		  __entry->rep_cnt, __entry->rep_idx,  __entry->ingpa,
+		  __entry->outgpa)
+);
+
 /*
  * Tracepoint for PIO.
  */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ad34d185da9..480137db4770 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -622,9 +622,10 @@ static inline u32 bit(int bitno)
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	2
+#define KVM_SAVE_MSRS_BEGIN	4
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_K6_STAR,
 #ifdef CONFIG_X86_64
@@ -1004,6 +1005,74 @@ out:
 	return r;
 }
 
+static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+	return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+}
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+	bool r = false;
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+	case HV_X64_MSR_HYPERCALL:
+		r = true;
+		break;
+	}
+
+	return r;
+}
+
+static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		kvm->arch.hv_guest_os_id = data;
+		/* setting guest os id to zero disables hypercall page */
+		if (!kvm->arch.hv_guest_os_id)
+			kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+		break;
+	case HV_X64_MSR_HYPERCALL: {
+		u64 gfn;
+		unsigned long addr;
+		u8 instructions[4];
+
+		/* if guest os id is not set hypercall should remain disabled */
+		if (!kvm->arch.hv_guest_os_id)
+			break;
+		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
+			kvm->arch.hv_hypercall = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		kvm_x86_ops->patch_hypercall(vcpu, instructions);
+		((unsigned char *)instructions)[3] = 0xc3; /* ret */
+		if (copy_to_user((void __user *)addr, instructions, 4))
+			return 1;
+		kvm->arch.hv_hypercall = data;
+		break;
+	}
+	default:
+		pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+			  "data 0x%llx\n", msr, data);
+		return 1;
+	}
+	return 0;
+}
+
+static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x data 0x%llx\n",
+		  msr, data);
+
+	return 1;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	switch (msr) {
@@ -1118,6 +1187,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
 			"0x%x data 0x%llx\n", msr, data);
 		break;
+	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+		if (kvm_hv_msr_partition_wide(msr)) {
+			int r;
+			mutex_lock(&vcpu->kvm->lock);
+			r = set_msr_hyperv_pw(vcpu, msr, data);
+			mutex_unlock(&vcpu->kvm->lock);
+			return r;
+		} else
+			return set_msr_hyperv(vcpu, msr, data);
+		break;
 	default:
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
@@ -1217,6 +1296,48 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
+static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm *kvm = vcpu->kvm;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		data = kvm->arch.hv_guest_os_id;
+		break;
+	case HV_X64_MSR_HYPERCALL:
+		data = kvm->arch.hv_hypercall;
+		break;
+	default:
+		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+
+	*pdata = data;
+	return 0;
+}
+
+static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+
+	switch (msr) {
+	case HV_X64_MSR_VP_INDEX: {
+		int r;
+		struct kvm_vcpu *v;
+		kvm_for_each_vcpu(r, v, vcpu->kvm)
+			if (v == vcpu)
+				data = r;
+		break;
+	}
+	default:
+		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+	*pdata = data;
+	return 0;
+}
+
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
 	u64 data;
@@ -1283,6 +1404,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_MCG_STATUS:
 	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
 		return get_msr_mce(vcpu, msr, pdata);
+	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+		if (kvm_hv_msr_partition_wide(msr)) {
+			int r;
+			mutex_lock(&vcpu->kvm->lock);
+			r = get_msr_hyperv_pw(vcpu, msr, pdata);
+			mutex_unlock(&vcpu->kvm->lock);
+			return r;
+		} else
+			return get_msr_hyperv(vcpu, msr, pdata);
+		break;
 	default:
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1398,6 +1529,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_XEN_HVM:
 	case KVM_CAP_ADJUST_CLOCK:
 	case KVM_CAP_VCPU_EVENTS:
+	case KVM_CAP_HYPERV:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -3618,11 +3750,70 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
 		return a0 | ((gpa_t)a1 << 32);
 }
 
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+	u64 param, ingpa, outgpa, ret;
+	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+	bool fast, longmode;
+	int cs_db, cs_l;
+
+	/*
+	 * hypercall generates UD from non zero cpl and real mode
+	 * per HYPER-V spec
+	 */
+	if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
+	    !kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
+	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+	longmode = is_long_mode(vcpu) && cs_l == 1;
+
+	if (!longmode) {
+		param = (kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffff);
+		ingpa = (kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffff);
+		outgpa = (kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffff);
+	}
+#ifdef CONFIG_X86_64
+	else {
+		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+	}
+#endif
+
+	code = param & 0xffff;
+	fast = (param >> 16) & 0x1;
+	rep_cnt = (param >> 32) & 0xfff;
+	rep_idx = (param >> 48) & 0xfff;
+
+	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+	res = HV_STATUS_INVALID_HYPERCALL_CODE;
+
+	ret = res | (((u64)rep_done & 0xfff) << 32);
+	if (longmode) {
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+	} else {
+		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+	}
+
+	return 1;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
 	int r = 1;
 
+	if (kvm_hv_hypercall_enabled(vcpu->kvm))
+		return kvm_hv_hypercall(vcpu);
+
 	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
 	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f2feef68ffd6..e227cbae70ad 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -497,6 +497,7 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_S390_PSW 42
 #define KVM_CAP_PPC_SEGSTATE 43
+#define KVM_CAP_HYPERV 44
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 10388a07164c1512b3a3d0273b9adc230f82790e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 17 Jan 2010 15:51:23 +0200
Subject: KVM: Add HYPER-V apic access MSRs

Implement HYPER-V apic MSRs. Spec defines three MSRs that speed-up
access to EOI/TPR/ICR apic registers for PV guests.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Vadim Rozenfeld <vrozenfe@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/lapic.c            | 31 ++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.h            |  8 ++++++++
 arch/x86/kvm/x86.c              | 42 +++++++++++++++++++++++++++++++++++++----
 include/linux/kvm.h             |  1 +
 5 files changed, 80 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 67d19e422006..a1f0b5dd7d75 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -363,6 +363,8 @@ struct kvm_vcpu_arch {
 	/* used for guest single stepping over the given code position */
 	u16 singlestep_cs;
 	unsigned long singlestep_rip;
+	/* fields used by HYPER-V emulation */
+	u64 hv_vapic;
 };
 
 struct kvm_mem_alias {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ba8c045da782..4b224f90087b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1246,3 +1246,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 
 	return 0;
 }
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 1;
+
+	/* if this is ICR write vector before command */
+	if (reg == APIC_ICR)
+		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+	return apic_reg_write(apic, reg, (u32)data);
+}
+
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 low, high = 0;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 1;
+
+	if (apic_reg_read(apic, reg, 4, &low))
+		return 1;
+	if (reg == APIC_ICR)
+		apic_reg_read(apic, APIC_ICR2, 4, &high);
+
+	*data = (((u64)high) << 32) | low;
+
+	return 0;
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 40010b09c4aa..f5fe32c5edad 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
 
 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+}
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 480137db4770..552be51e4d84 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -622,10 +622,11 @@ static inline u32 bit(int bitno)
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	4
+#define KVM_SAVE_MSRS_BEGIN	5
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+	HV_X64_MSR_APIC_ASSIST_PAGE,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_K6_STAR,
 #ifdef CONFIG_X86_64
@@ -1067,10 +1068,36 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
 static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
-	pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x data 0x%llx\n",
-		  msr, data);
+	switch (msr) {
+	case HV_X64_MSR_APIC_ASSIST_PAGE: {
+		unsigned long addr;
 
-	return 1;
+		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+			vcpu->arch.hv_vapic = data;
+			break;
+		}
+		addr = gfn_to_hva(vcpu->kvm, data >>
+				  HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (clear_user((void __user *)addr, PAGE_SIZE))
+			return 1;
+		vcpu->arch.hv_vapic = data;
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+	default:
+		pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+			  "data 0x%llx\n", msr, data);
+		return 1;
+	}
+
+	return 0;
 }
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
@@ -1330,6 +1357,12 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 				data = r;
 		break;
 	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
 	default:
 		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -1530,6 +1563,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ADJUST_CLOCK:
 	case KVM_CAP_VCPU_EVENTS:
 	case KVM_CAP_HYPERV:
+	case KVM_CAP_HYPERV_VAPIC:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e227cbae70ad..5ce61738dc30 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -498,6 +498,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_S390_PSW 42
 #define KVM_CAP_PPC_SEGSTATE 43
 #define KVM_CAP_HYPERV 44
+#define KVM_CAP_HYPERV_VAPIC 45
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From c25bc1638a1211f57cccbabdd8b732813b852340 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 17 Jan 2010 15:51:24 +0200
Subject: KVM: Implement NotifyLongSpinWait HYPER-V hypercall

Windows issues this hypercall after guest was spinning on a spinlock
for too many iterations.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Vadim Rozenfeld <vrozenfe@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c  | 10 +++++++++-
 include/linux/kvm.h |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 552be51e4d84..9f72a443455b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1564,6 +1564,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_VCPU_EVENTS:
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV_VAPIC:
+	case KVM_CAP_HYPERV_SPIN:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -3827,7 +3828,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 
 	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
 
-	res = HV_STATUS_INVALID_HYPERCALL_CODE;
+	switch (code) {
+	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+		kvm_vcpu_on_spin(vcpu);
+		break;
+	default:
+		res = HV_STATUS_INVALID_HYPERCALL_CODE;
+		break;
+	}
 
 	ret = res | (((u64)rep_done & 0xfff) << 32);
 	if (longmode) {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 5ce61738dc30..4c4937e7f65f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -499,6 +499,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_PPC_SEGSTATE 43
 #define KVM_CAP_HYPERV 44
 #define KVM_CAP_HYPERV_VAPIC 45
+#define KVM_CAP_HYPERV_SPIN 46
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From ab9f4ecbb6d39a18e300a0d10a4968c37404aa76 Mon Sep 17 00:00:00 2001
From: "Zhai, Edwin" <edwin.zhai@intel.com>
Date: Fri, 29 Jan 2010 14:38:44 +0800
Subject: KVM: enable PCI multiple-segments for pass-through device

Enable optional parameter (default 0) - PCI segment (or domain) besides
BDF, when assigning PCI device to guest.

Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c       | 1 +
 include/linux/kvm.h      | 4 +++-
 include/linux/kvm_host.h | 1 +
 virt/kvm/assigned-dev.c  | 4 +++-
 virt/kvm/iommu.c         | 9 ++++++---
 5 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d47ceda7a928..0bf3df527afc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1569,6 +1569,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV_VAPIC:
 	case KVM_CAP_HYPERV_SPIN:
+	case KVM_CAP_PCI_SEGMENT:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4c4937e7f65f..dfa54be881f4 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -500,6 +500,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_HYPERV 44
 #define KVM_CAP_HYPERV_VAPIC 45
 #define KVM_CAP_HYPERV_SPIN 46
+#define KVM_CAP_PCI_SEGMENT 47
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -694,8 +695,9 @@ struct kvm_assigned_pci_dev {
 	__u32 busnr;
 	__u32 devfn;
 	__u32 flags;
+	__u32 segnr;
 	union {
-		__u32 reserved[12];
+		__u32 reserved[11];
 	};
 };
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dfde04b0d453..665c37063f30 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -400,6 +400,7 @@ struct kvm_assigned_dev_kernel {
 	struct work_struct interrupt_work;
 	struct list_head list;
 	int assigned_dev_id;
+	int host_segnr;
 	int host_busnr;
 	int host_devfn;
 	unsigned int entries_nr;
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f51e684dd238..057e2cca6af5 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 		r = -ENOMEM;
 		goto out;
 	}
-	dev = pci_get_bus_and_slot(assigned_dev->busnr,
+	dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
+				   assigned_dev->busnr,
 				   assigned_dev->devfn);
 	if (!dev) {
 		printk(KERN_INFO "%s: host device not found\n", __func__);
@@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	pci_reset_function(dev);
 
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
+	match->host_segnr = assigned_dev->segnr;
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
 	match->flags = assigned_dev->flags;
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 65a51432c8e5..80fd3ad3b2de 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -106,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm,
 
 	r = iommu_attach_device(domain, &pdev->dev);
 	if (r) {
-		printk(KERN_ERR "assign device %x:%x.%x failed",
+		printk(KERN_ERR "assign device %x:%x:%x.%x failed",
+			pci_domain_nr(pdev->bus),
 			pdev->bus->number,
 			PCI_SLOT(pdev->devfn),
 			PCI_FUNC(pdev->devfn));
@@ -127,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm,
 			goto out_unmap;
 	}
 
-	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+	printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
+		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
@@ -154,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm,
 
 	iommu_detach_device(domain, &pdev->dev);
 
-	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+	printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
+		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
-- 
cgit v1.2.3


From 8f0b1ab6fb045a1324d9435ba00c2940783b0041 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 28 Jan 2010 12:37:56 +0100
Subject: KVM: Introduce kvm_host_page_size

This patch introduces a generic function to find out the
host page size for a given gfn. This function is needed by
the kvm iommu code. This patch also simplifies the x86
host_mapping_level function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       | 18 ++----------------
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 25 +++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dc4d954efacd..913ef4b7939a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -468,24 +468,10 @@ static int has_wrprotected_page(struct kvm *kvm,
 
 static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 {
-	unsigned long page_size = PAGE_SIZE;
-	struct vm_area_struct *vma;
-	unsigned long addr;
+	unsigned long page_size;
 	int i, ret = 0;
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr))
-		return PT_PAGE_TABLE_LEVEL;
-
-	down_read(&current->mm->mmap_sem);
-	vma = find_vma(current->mm, addr);
-	if (!vma)
-		goto out;
-
-	page_size = vma_kernel_pagesize(vma);
-
-out:
-	up_read(&current->mm->mmap_sem);
+	page_size = kvm_host_page_size(kvm, gfn);
 
 	for (i = PT_PAGE_TABLE_LEVEL;
 	     i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 665c37063f30..3145b281de9d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -300,6 +300,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2b0974a14835..0a360c26cc34 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -45,6 +45,7 @@
 #include <linux/spinlock.h>
 #include <linux/compat.h>
 #include <linux/srcu.h>
+#include <linux/hugetlb.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -867,6 +868,30 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr, size;
+
+	size = PAGE_SIZE;
+
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr))
+		return PAGE_SIZE;
+
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, addr);
+	if (!vma)
+		goto out;
+
+	size = vma_kernel_pagesize(vma);
+
+out:
+	up_read(&current->mm->mmap_sem);
+
+	return size;
+}
+
 int memslot_id(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-- 
cgit v1.2.3


From 70e335e16882df5b5d6971022e63c3603a1e8c23 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 18 Feb 2010 11:25:22 +0200
Subject: KVM: Convert kvm->requests_lock to raw_spinlock_t

The code relies on kvm->requests_lock inhibiting preemption.

Noted by Jan Kiszka.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 virt/kvm/kvm_main.c      | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3145b281de9d..a3fd0f91d943 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -161,7 +161,7 @@ struct kvm_memslots {
 
 struct kvm {
 	spinlock_t mmu_lock;
-	spinlock_t requests_lock;
+	raw_spinlock_t requests_lock;
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0a360c26cc34..548f9253c195 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -138,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
 	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
 
-	spin_lock(&kvm->requests_lock);
+	raw_spin_lock(&kvm->requests_lock);
 	me = smp_processor_id();
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_and_set_bit(req, &vcpu->requests))
@@ -153,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 		smp_call_function_many(cpus, ack_flush, NULL, 1);
 	else
 		called = false;
-	spin_unlock(&kvm->requests_lock);
+	raw_spin_unlock(&kvm->requests_lock);
 	free_cpumask_var(cpus);
 	return called;
 }
@@ -409,7 +409,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
-	spin_lock_init(&kvm->requests_lock);
+	raw_spin_lock_init(&kvm->requests_lock);
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
-- 
cgit v1.2.3


From d2be1651b736002e0c76d7095d6c0ba77b4a897c Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Tue, 23 Feb 2010 17:47:57 +0100
Subject: KVM: x86: Add KVM_CAP_X86_ROBUST_SINGLESTEP

This marks the guest single-step API improvement of 94fe45da and
91586a3b with a capability flag to allow reliable detection by user
space.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Cc: stable@kernel.org (2.6.33)
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c  | 1 +
 include/linux/kvm.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c3d2acbbb91b..e46282a56565 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1570,6 +1570,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_HYPERV_VAPIC:
 	case KVM_CAP_HYPERV_SPIN:
 	case KVM_CAP_PCI_SEGMENT:
+	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index dfa54be881f4..60df9c84ecae 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -501,6 +501,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_HYPERV_VAPIC 45
 #define KVM_CAP_HYPERV_SPIN 46
 #define KVM_CAP_PCI_SEGMENT 47
+#define KVM_CAP_X86_ROBUST_SINGLESTEP 51
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 27943620cbd960f710a385ff4a538e14ed3f1922 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Jan 2010 10:49:19 +0900
Subject: libata: implement spurious irq handling for SFF and apply it to piix

Traditional IDE interface sucks in that it doesn't have a reliable IRQ
pending bit, so if the controller raises IRQ while the driver is
expecting it not to, the IRQ won't be cleared and eventually the IRQ
line will be killed by interrupt subsystem.  Some controllers have
non-standard mechanism to indicate IRQ pending so that this condition
can be detected and worked around.

This patch adds an optional operation ->sff_irq_check() which will be
called for each port from the ata_sff_interrupt() if an unexpected
interrupt is received.  If the operation returns %true,
->sff_check_status() and ->sff_irq_clear() will be cleared for the
port.  Note that this doesn't mark the interrupt as handled so it
won't prevent IRQ subsystem from killing the IRQ if this mechanism
fails to clear the spurious IRQ.

This patch also implements ->sff_irq_check() for ata_piix.  Note that
this adds slight overhead to shared IRQ operation as IRQs which are
destined for other controllers will trigger extra register accesses to
check whether IDE interrupt is pending but this solves rare screaming
IRQ cases and for some curious reason also helps weird BIOS related
glitch on Samsung n130 as reported in bko#14314.

  http://bugzilla.kernel.org/show_bug.cgi?id=14314

* piix_base_ops dropped as suggested by Sergei.

* Spurious IRQ detection doesn't kick in anymore if polling qc is in
  progress.  This provides less protection but some controllers have
  possible data corruption issues if the wrong register is accessed
  while a command is in progress.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Johannes Stezenbach <js@sig21.net>
Reported-by: Hans Werner <hwerner4@gmx.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ata_piix.c   | 20 +++++++++++++++-----
 drivers/ata/libata-sff.c | 35 ++++++++++++++++++++++++++++++++---
 include/linux/libata.h   |  1 +
 3 files changed, 48 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index b5f614b9c245..c33806654e46 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -173,6 +173,7 @@ static int piix_sidpr_scr_read(struct ata_link *link,
 			       unsigned int reg, u32 *val);
 static int piix_sidpr_scr_write(struct ata_link *link,
 				unsigned int reg, u32 val);
+static bool piix_irq_check(struct ata_port *ap);
 #ifdef CONFIG_PM
 static int piix_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg);
 static int piix_pci_device_resume(struct pci_dev *pdev);
@@ -317,8 +318,13 @@ static struct scsi_host_template piix_sht = {
 	ATA_BMDMA_SHT(DRV_NAME),
 };
 
-static struct ata_port_operations piix_pata_ops = {
+static struct ata_port_operations piix_sata_ops = {
 	.inherits		= &ata_bmdma32_port_ops,
+	.sff_irq_check		= piix_irq_check,
+};
+
+static struct ata_port_operations piix_pata_ops = {
+	.inherits		= &piix_sata_ops,
 	.cable_detect		= ata_cable_40wire,
 	.set_piomode		= piix_set_piomode,
 	.set_dmamode		= piix_set_dmamode,
@@ -336,10 +342,6 @@ static struct ata_port_operations ich_pata_ops = {
 	.set_dmamode		= ich_set_dmamode,
 };
 
-static struct ata_port_operations piix_sata_ops = {
-	.inherits		= &ata_bmdma32_port_ops,
-};
-
 static struct ata_port_operations piix_sidpr_sata_ops = {
 	.inherits		= &piix_sata_ops,
 	.hardreset		= sata_std_hardreset,
@@ -970,6 +972,14 @@ static int piix_sidpr_scr_write(struct ata_link *link,
 	return 0;
 }
 
+static bool piix_irq_check(struct ata_port *ap)
+{
+	if (unlikely(!ap->ioaddr.bmdma_addr))
+		return false;
+
+	return ap->ops->bmdma_status(ap) & ATA_DMA_INTR;
+}
+
 #ifdef CONFIG_PM
 static int piix_broken_suspend(void)
 {
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index c62ed13b0596..c2661ea70330 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1763,7 +1763,7 @@ irqreturn_t ata_sff_interrupt(int irq, void *dev_instance)
 {
 	struct ata_host *host = dev_instance;
 	unsigned int i;
-	unsigned int handled = 0;
+	unsigned int handled = 0, polling = 0;
 	unsigned long flags;
 
 	/* TODO: make _irqsave conditional on x86 PCI IDE legacy mode */
@@ -1777,8 +1777,37 @@ irqreturn_t ata_sff_interrupt(int irq, void *dev_instance)
 			continue;
 
 		qc = ata_qc_from_tag(ap, ap->link.active_tag);
-		if (qc && !(qc->tf.flags & ATA_TFLAG_POLLING))
-			handled |= ata_sff_host_intr(ap, qc);
+		if (qc) {
+			if (!(qc->tf.flags & ATA_TFLAG_POLLING))
+				handled |= ata_sff_host_intr(ap, qc);
+			else
+				polling |= 1 << i;
+		}
+	}
+
+	/*
+	 * If no port was expecting IRQ but the controller is actually
+	 * asserting IRQ line, nobody cared will ensue.  Check IRQ
+	 * pending status if available and clear spurious IRQ.
+	 */
+	if (!handled) {
+		for (i = 0; i < host->n_ports; i++) {
+			struct ata_port *ap = host->ports[i];
+
+			if (polling & (1 << i))
+				continue;
+
+			if (!ap->ops->sff_irq_check ||
+			    !ap->ops->sff_irq_check(ap))
+				continue;
+
+			if (printk_ratelimit())
+				ata_port_printk(ap, KERN_INFO,
+						"clearing spurious IRQ\n");
+
+			ap->ops->sff_check_status(ap);
+			ap->ops->sff_irq_clear(ap);
+		}
 	}
 
 	spin_unlock_irqrestore(&host->lock, flags);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 73112250862c..5fb888462359 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -857,6 +857,7 @@ struct ata_port_operations {
 	unsigned int (*sff_data_xfer)(struct ata_device *dev,
 			unsigned char *buf, unsigned int buflen, int rw);
 	u8   (*sff_irq_on)(struct ata_port *);
+	bool (*sff_irq_check)(struct ata_port *);
 	void (*sff_irq_clear)(struct ata_port *);
 
 	void (*bmdma_setup)(struct ata_queued_cmd *qc);
-- 
cgit v1.2.3


From 16ea0fc98d53c72cb4e1a9edcb685a87e3a81430 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Tue, 23 Feb 2010 02:26:06 -0500
Subject: libata: Pass host flags into the pci helper

This allows parallel scan and the like to be set without having to stop
using the existing full helper functions. This patch merely adds the argument
and fixes up the callers. It doesn't undo the special cases already in the
tree or add any new parallel callers.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ata_generic.c       | 2 +-
 drivers/ata/libata-sff.c        | 6 ++++--
 drivers/ata/pata_acpi.c         | 2 +-
 drivers/ata/pata_ali.c          | 2 +-
 drivers/ata/pata_amd.c          | 2 +-
 drivers/ata/pata_artop.c        | 2 +-
 drivers/ata/pata_atiixp.c       | 2 +-
 drivers/ata/pata_cmd640.c       | 2 +-
 drivers/ata/pata_cmd64x.c       | 2 +-
 drivers/ata/pata_cs5530.c       | 2 +-
 drivers/ata/pata_cs5535.c       | 2 +-
 drivers/ata/pata_cs5536.c       | 2 +-
 drivers/ata/pata_cypress.c      | 2 +-
 drivers/ata/pata_efar.c         | 2 +-
 drivers/ata/pata_hpt366.c       | 2 +-
 drivers/ata/pata_hpt37x.c       | 2 +-
 drivers/ata/pata_hpt3x2n.c      | 2 +-
 drivers/ata/pata_it8213.c       | 2 +-
 drivers/ata/pata_it821x.c       | 2 +-
 drivers/ata/pata_jmicron.c      | 2 +-
 drivers/ata/pata_marvell.c      | 2 +-
 drivers/ata/pata_netcell.c      | 2 +-
 drivers/ata/pata_ns87410.c      | 2 +-
 drivers/ata/pata_ns87415.c      | 2 +-
 drivers/ata/pata_oldpiix.c      | 2 +-
 drivers/ata/pata_opti.c         | 2 +-
 drivers/ata/pata_optidma.c      | 2 +-
 drivers/ata/pata_pdc202xx_old.c | 2 +-
 drivers/ata/pata_piccolo.c      | 2 +-
 drivers/ata/pata_radisys.c      | 2 +-
 drivers/ata/pata_rz1000.c       | 2 +-
 drivers/ata/pata_sc1200.c       | 2 +-
 drivers/ata/pata_serverworks.c  | 2 +-
 drivers/ata/pata_sil680.c       | 2 +-
 drivers/ata/pata_sis.c          | 2 +-
 drivers/ata/pata_sl82c105.c     | 2 +-
 drivers/ata/pata_triflex.c      | 2 +-
 drivers/ata/pata_via.c          | 2 +-
 drivers/staging/phison/phison.c | 2 +-
 include/linux/libata.h          | 4 ++--
 40 files changed, 44 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index 12e26c3c68e3..33fb614f9784 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -155,7 +155,7 @@ static int ata_generic_init_one(struct pci_dev *dev, const struct pci_device_id
 			return rc;
 		pcim_pin_device(dev);
 	}
-	return ata_pci_sff_init_one(dev, ppi, &generic_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &generic_sht, NULL, 0);
 }
 
 static struct pci_device_id ata_generic[] = {
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index c2661ea70330..02441fd57e9e 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -3037,6 +3037,7 @@ EXPORT_SYMBOL_GPL(ata_pci_sff_activate_host);
  *	@ppi: array of port_info, must be enough for two ports
  *	@sht: scsi_host_template to use when registering the host
  *	@host_priv: host private_data
+ *	@hflag: host flags
  *
  *	This is a helper function which can be called from a driver's
  *	xxx_init_one() probe function if the hardware uses traditional
@@ -3057,8 +3058,8 @@ EXPORT_SYMBOL_GPL(ata_pci_sff_activate_host);
  *	Zero on success, negative on errno-based value on error.
  */
 int ata_pci_sff_init_one(struct pci_dev *pdev,
-			 const struct ata_port_info * const *ppi,
-			 struct scsi_host_template *sht, void *host_priv)
+		 const struct ata_port_info * const *ppi,
+		 struct scsi_host_template *sht, void *host_priv, int hflag)
 {
 	struct device *dev = &pdev->dev;
 	const struct ata_port_info *pi = NULL;
@@ -3093,6 +3094,7 @@ int ata_pci_sff_init_one(struct pci_dev *pdev,
 	if (rc)
 		goto out;
 	host->private_data = host_priv;
+	host->flags |= hflag;
 
 	pci_set_master(pdev);
 	rc = ata_pci_sff_activate_host(host, ata_sff_interrupt, sht);
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index d8f35fe44421..294f3020a78a 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -259,7 +259,7 @@ static int pacpi_init_one (struct pci_dev *pdev, const struct pci_device_id *id)
 			return rc;
 		pcim_pin_device(pdev);
 	}
-	return ata_pci_sff_init_one(pdev, ppi, &pacpi_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &pacpi_sht, NULL, 0);
 }
 
 static const struct pci_device_id pacpi_pci_tbl[] = {
diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index 63ba48c9af33..dc61b72f751c 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -583,7 +583,7 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	        	ppi[0] = &info_20_udma;
 	}
 
-	return ata_pci_sff_init_one(pdev, ppi, &ali_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &ali_sht, NULL, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c
index 567f3f72774e..d95eca9c547e 100644
--- a/drivers/ata/pata_amd.c
+++ b/drivers/ata/pata_amd.c
@@ -574,7 +574,7 @@ static int amd_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	/* And fire it up */
-	return ata_pci_sff_init_one(pdev, ppi, &amd_sht, hpriv);
+	return ata_pci_sff_init_one(pdev, ppi, &amd_sht, hpriv, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_artop.c b/drivers/ata/pata_artop.c
index d332cfdb0f30..4d066d6c30fa 100644
--- a/drivers/ata/pata_artop.c
+++ b/drivers/ata/pata_artop.c
@@ -421,7 +421,7 @@ static int artop_init_one (struct pci_dev *pdev, const struct pci_device_id *id)
 
 	BUG_ON(ppi[0] == NULL);
 
-	return ata_pci_sff_init_one(pdev, ppi, &artop_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &artop_sht, NULL, 0);
 }
 
 static const struct pci_device_id artop_pci_tbl[] = {
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index ae4454d4e955..f697b5b880d3 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -237,7 +237,7 @@ static int atiixp_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		if (!pci_test_config_bits(pdev, &atiixp_enable_bits[i]))
 			ppi[i] = &ata_dummy_port_info;
 
-	return ata_pci_sff_init_one(pdev, ppi, &atiixp_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &atiixp_sht, NULL, 0);
 }
 
 static const struct pci_device_id atiixp[] = {
diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c
index 5acf9fa9b39f..6cd5d5dd9e3b 100644
--- a/drivers/ata/pata_cmd640.c
+++ b/drivers/ata/pata_cmd640.c
@@ -223,7 +223,7 @@ static int cmd640_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	cmd640_hardware_init(pdev);
 
-	return ata_pci_sff_init_one(pdev, ppi, &cmd640_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &cmd640_sht, NULL, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c
index b40bf0f29975..4c81a71b8877 100644
--- a/drivers/ata/pata_cmd64x.c
+++ b/drivers/ata/pata_cmd64x.c
@@ -367,7 +367,7 @@ static int cmd64x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_write_config_byte(pdev, UDIDETCR0, 0xF0);
 #endif
 
-	return ata_pci_sff_init_one(pdev, ppi, &cmd64x_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &cmd64x_sht, NULL, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_cs5530.c b/drivers/ata/pata_cs5530.c
index c974b05e4129..738ad2e14a97 100644
--- a/drivers/ata/pata_cs5530.c
+++ b/drivers/ata/pata_cs5530.c
@@ -324,7 +324,7 @@ static int cs5530_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		ppi[1] = &info_palmax_secondary;
 
 	/* Now kick off ATA set up */
-	return ata_pci_sff_init_one(pdev, ppi, &cs5530_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &cs5530_sht, NULL, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_cs5535.c b/drivers/ata/pata_cs5535.c
index c50c3a42b10b..a02e6459fdcc 100644
--- a/drivers/ata/pata_cs5535.c
+++ b/drivers/ata/pata_cs5535.c
@@ -198,7 +198,7 @@ static int cs5535_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 	rdmsr(ATAC_CH0D1_PIO, timings, dummy);
 	if (CS5535_BAD_PIO(timings))
 		wrmsr(ATAC_CH0D1_PIO, 0xF7F4F7F4UL, 0);
-	return ata_pci_sff_init_one(dev, ppi, &cs5535_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &cs5535_sht, NULL, 0);
 }
 
 static const struct pci_device_id cs5535[] = {
diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
index ffee3978ec83..914ae3506ff5 100644
--- a/drivers/ata/pata_cs5536.c
+++ b/drivers/ata/pata_cs5536.c
@@ -260,7 +260,7 @@ static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 		return -ENODEV;
 	}
 
-	return ata_pci_sff_init_one(dev, ppi, &cs5536_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &cs5536_sht, NULL, 0);
 }
 
 static const struct pci_device_id cs5536[] = {
diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c
index ff6c37de5a12..0fcc096b8dac 100644
--- a/drivers/ata/pata_cypress.c
+++ b/drivers/ata/pata_cypress.c
@@ -138,7 +138,7 @@ static int cy82c693_init_one(struct pci_dev *pdev, const struct pci_device_id *i
 	if (PCI_FUNC(pdev->devfn) != 1)
 		return -ENODEV;
 
-	return ata_pci_sff_init_one(pdev, ppi, &cy82c693_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &cy82c693_sht, NULL, 0);
 }
 
 static const struct pci_device_id cy82c693[] = {
diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c
index 89076d58111b..5556163c80de 100644
--- a/drivers/ata/pata_efar.c
+++ b/drivers/ata/pata_efar.c
@@ -262,7 +262,7 @@ static int efar_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_printk(KERN_DEBUG, &pdev->dev,
 			   "version " DRV_VERSION "\n");
 
-	return ata_pci_sff_init_one(pdev, ppi, &efar_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &efar_sht, NULL, 0);
 }
 
 static const struct pci_device_id efar_pci_tbl[] = {
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index 3ec88f2c8665..af49bfb57247 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -361,7 +361,7 @@ static int hpt36x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 			break;
 	}
 	/* Now kick off ATA set up */
-	return ata_pci_sff_init_one(dev, ppi, &hpt36x_sht, hpriv);
+	return ata_pci_sff_init_one(dev, ppi, &hpt36x_sht, hpriv, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 228dc1a8992f..8839307a64cf 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -987,7 +987,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 	}
 
 	/* Now kick off ATA set up */
-	return ata_pci_sff_init_one(dev, ppi, &hpt37x_sht, private_data);
+	return ata_pci_sff_init_one(dev, ppi, &hpt37x_sht, private_data, 0);
 }
 
 static const struct pci_device_id hpt37x[] = {
diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c
index 4a291221f277..01457b266f3d 100644
--- a/drivers/ata/pata_hpt3x2n.c
+++ b/drivers/ata/pata_hpt3x2n.c
@@ -548,7 +548,7 @@ static int hpt3x2n_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 		outb(inb(iobase + 0x9c) | 0x04, iobase + 0x9c);
 
 	/* Now kick off ATA set up */
-	return ata_pci_sff_init_one(dev, ppi, &hpt3x2n_sht, hpriv);
+	return ata_pci_sff_init_one(dev, ppi, &hpt3x2n_sht, hpriv, 0);
 }
 
 static const struct pci_device_id hpt3x2n[] = {
diff --git a/drivers/ata/pata_it8213.c b/drivers/ata/pata_it8213.c
index 8f3325adceb3..f971f0de88e6 100644
--- a/drivers/ata/pata_it8213.c
+++ b/drivers/ata/pata_it8213.c
@@ -273,7 +273,7 @@ static int it8213_init_one (struct pci_dev *pdev, const struct pci_device_id *en
 		dev_printk(KERN_DEBUG, &pdev->dev,
 			   "version " DRV_VERSION "\n");
 
-	return ata_pci_sff_init_one(pdev, ppi, &it8213_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &it8213_sht, NULL, 0);
 }
 
 static const struct pci_device_id it8213_pci_tbl[] = {
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index edc5c1fed150..9bde1cb5f981 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -932,7 +932,7 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		else
 			ppi[0] = &info_smart;
 	}
-	return ata_pci_sff_init_one(pdev, ppi, &it821x_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &it821x_sht, NULL, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_jmicron.c b/drivers/ata/pata_jmicron.c
index 3a1474ac8838..565e01e6ac7c 100644
--- a/drivers/ata/pata_jmicron.c
+++ b/drivers/ata/pata_jmicron.c
@@ -144,7 +144,7 @@ static int jmicron_init_one (struct pci_dev *pdev, const struct pci_device_id *i
 	};
 	const struct ata_port_info *ppi[] = { &info, NULL };
 
-	return ata_pci_sff_init_one(pdev, ppi, &jmicron_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &jmicron_sht, NULL, 0);
 }
 
 static const struct pci_device_id jmicron_pci_tbl[] = {
diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index b351d81813f3..e8ca02e5a71d 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c
@@ -153,7 +153,7 @@ static int marvell_init_one (struct pci_dev *pdev, const struct pci_device_id *i
 		return -ENODEV;
 	}
 #endif
-	return ata_pci_sff_init_one(pdev, ppi, &marvell_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &marvell_sht, NULL, 0);
 }
 
 static const struct pci_device_id marvell_pci_tbl[] = {
diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c
index f0d52f72f5bb..94f979a7f4f7 100644
--- a/drivers/ata/pata_netcell.c
+++ b/drivers/ata/pata_netcell.c
@@ -82,7 +82,7 @@ static int netcell_init_one (struct pci_dev *pdev, const struct pci_device_id *e
 	ata_pci_bmdma_clear_simplex(pdev);
 
 	/* And let the library code do the work */
-	return ata_pci_sff_init_one(pdev, port_info, &netcell_sht, NULL);
+	return ata_pci_sff_init_one(pdev, port_info, &netcell_sht, NULL, 0);
 }
 
 static const struct pci_device_id netcell_pci_tbl[] = {
diff --git a/drivers/ata/pata_ns87410.c b/drivers/ata/pata_ns87410.c
index ca53fac06717..2110863bb3db 100644
--- a/drivers/ata/pata_ns87410.c
+++ b/drivers/ata/pata_ns87410.c
@@ -148,7 +148,7 @@ static int ns87410_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 		.port_ops = &ns87410_port_ops
 	};
 	const struct ata_port_info *ppi[] = { &info, NULL };
-	return ata_pci_sff_init_one(dev, ppi, &ns87410_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &ns87410_sht, NULL, 0);
 }
 
 static const struct pci_device_id ns87410[] = {
diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c
index 061aa1c41a48..830431f036a1 100644
--- a/drivers/ata/pata_ns87415.c
+++ b/drivers/ata/pata_ns87415.c
@@ -380,7 +380,7 @@ static int ns87415_init_one (struct pci_dev *pdev, const struct pci_device_id *e
 
 	ns87415_fixup(pdev);
 
-	return ata_pci_sff_init_one(pdev, ppi, &ns87415_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &ns87415_sht, NULL, 0);
 }
 
 static const struct pci_device_id ns87415_pci_tbl[] = {
diff --git a/drivers/ata/pata_oldpiix.c b/drivers/ata/pata_oldpiix.c
index 9a8687db6b2d..5f6aba7eb0dd 100644
--- a/drivers/ata/pata_oldpiix.c
+++ b/drivers/ata/pata_oldpiix.c
@@ -248,7 +248,7 @@ static int oldpiix_init_one (struct pci_dev *pdev, const struct pci_device_id *e
 		dev_printk(KERN_DEBUG, &pdev->dev,
 			   "version " DRV_VERSION "\n");
 
-	return ata_pci_sff_init_one(pdev, ppi, &oldpiix_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &oldpiix_sht, NULL, 0);
 }
 
 static const struct pci_device_id oldpiix_pci_tbl[] = {
diff --git a/drivers/ata/pata_opti.c b/drivers/ata/pata_opti.c
index 99eddda2d2e5..00c5a02a94fc 100644
--- a/drivers/ata/pata_opti.c
+++ b/drivers/ata/pata_opti.c
@@ -172,7 +172,7 @@ static int opti_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 	if (!printed_version++)
 		dev_printk(KERN_DEBUG, &dev->dev, "version " DRV_VERSION "\n");
 
-	return ata_pci_sff_init_one(dev, ppi, &opti_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &opti_sht, NULL, 0);
 }
 
 static const struct pci_device_id opti[] = {
diff --git a/drivers/ata/pata_optidma.c b/drivers/ata/pata_optidma.c
index 86885a445f97..76b7d12b1e8d 100644
--- a/drivers/ata/pata_optidma.c
+++ b/drivers/ata/pata_optidma.c
@@ -429,7 +429,7 @@ static int optidma_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 	if (optiplus_with_udma(dev))
 		ppi[0] = &info_82c700_udma;
 
-	return ata_pci_sff_init_one(dev, ppi, &optidma_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &optidma_sht, NULL, 0);
 }
 
 static const struct pci_device_id optidma[] = {
diff --git a/drivers/ata/pata_pdc202xx_old.c b/drivers/ata/pata_pdc202xx_old.c
index 29111205185a..9ac0897cf8b0 100644
--- a/drivers/ata/pata_pdc202xx_old.c
+++ b/drivers/ata/pata_pdc202xx_old.c
@@ -337,7 +337,7 @@ static int pdc202xx_init_one(struct pci_dev *dev, const struct pci_device_id *id
 				return -ENODEV;
 		}
 	}
-	return ata_pci_sff_init_one(dev, ppi, &pdc202xx_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &pdc202xx_sht, NULL, 0);
 }
 
 static const struct pci_device_id pdc202xx[] = {
diff --git a/drivers/ata/pata_piccolo.c b/drivers/ata/pata_piccolo.c
index bfe0180f3efa..981615414849 100644
--- a/drivers/ata/pata_piccolo.c
+++ b/drivers/ata/pata_piccolo.c
@@ -95,7 +95,7 @@ static int ata_tosh_init_one(struct pci_dev *dev, const struct pci_device_id *id
 	};
 	const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info };
 	/* Just one port for the moment */
-	return ata_pci_sff_init_one(dev, ppi, &tosh_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &tosh_sht, NULL, 0);
 }
 
 static struct pci_device_id ata_tosh[] = {
diff --git a/drivers/ata/pata_radisys.c b/drivers/ata/pata_radisys.c
index 4fd25e737d9a..fc9602229acb 100644
--- a/drivers/ata/pata_radisys.c
+++ b/drivers/ata/pata_radisys.c
@@ -227,7 +227,7 @@ static int radisys_init_one (struct pci_dev *pdev, const struct pci_device_id *e
 		dev_printk(KERN_DEBUG, &pdev->dev,
 			   "version " DRV_VERSION "\n");
 
-	return ata_pci_sff_init_one(pdev, ppi, &radisys_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &radisys_sht, NULL, 0);
 }
 
 static const struct pci_device_id radisys_pci_tbl[] = {
diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c
index 2932998fc4c6..4a454a88aa9d 100644
--- a/drivers/ata/pata_rz1000.c
+++ b/drivers/ata/pata_rz1000.c
@@ -95,7 +95,7 @@ static int rz1000_init_one (struct pci_dev *pdev, const struct pci_device_id *en
 	printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
 
 	if (rz1000_fifo_disable(pdev) == 0)
-		return ata_pci_sff_init_one(pdev, ppi, &rz1000_sht, NULL);
+		return ata_pci_sff_init_one(pdev, ppi, &rz1000_sht, NULL, 0);
 
 	printk(KERN_ERR DRV_NAME ": failed to disable read-ahead on chipset..\n");
 	/* Not safe to use so skip */
diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c
index 3bbed8322ecf..dfecc6f964b0 100644
--- a/drivers/ata/pata_sc1200.c
+++ b/drivers/ata/pata_sc1200.c
@@ -237,7 +237,7 @@ static int sc1200_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 	};
 	const struct ata_port_info *ppi[] = { &info, NULL };
 
-	return ata_pci_sff_init_one(dev, ppi, &sc1200_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &sc1200_sht, NULL, 0);
 }
 
 static const struct pci_device_id sc1200[] = {
diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c
index c85976720a0c..9524d54035f7 100644
--- a/drivers/ata/pata_serverworks.c
+++ b/drivers/ata/pata_serverworks.c
@@ -460,7 +460,7 @@ static int serverworks_init_one(struct pci_dev *pdev, const struct pci_device_id
 	if (pdev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE)
 		ata_pci_bmdma_clear_simplex(pdev);
 
-	return ata_pci_sff_init_one(pdev, ppi, &serverworks_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &serverworks_sht, NULL, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c
index a2ace48a4610..c6c589c23ffc 100644
--- a/drivers/ata/pata_sil680.c
+++ b/drivers/ata/pata_sil680.c
@@ -356,7 +356,7 @@ static int __devinit sil680_init_one(struct pci_dev *pdev,
 				 IRQF_SHARED, &sil680_sht);
 
 use_ioports:
-	return ata_pci_sff_init_one(pdev, ppi, &sil680_sht, NULL);
+	return ata_pci_sff_init_one(pdev, ppi, &sil680_sht, NULL, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c
index 5c30d56dec84..b6708032f321 100644
--- a/drivers/ata/pata_sis.c
+++ b/drivers/ata/pata_sis.c
@@ -826,7 +826,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	sis_fixup(pdev, chipset);
 
-	return ata_pci_sff_init_one(pdev, ppi, &sis_sht, chipset);
+	return ata_pci_sff_init_one(pdev, ppi, &sis_sht, chipset, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/pata_sl82c105.c b/drivers/ata/pata_sl82c105.c
index 29f733c32066..733b042a7469 100644
--- a/drivers/ata/pata_sl82c105.c
+++ b/drivers/ata/pata_sl82c105.c
@@ -316,7 +316,7 @@ static int sl82c105_init_one(struct pci_dev *dev, const struct pci_device_id *id
 	val |= CTRL_P0EN | CTRL_P0F16 | CTRL_P1F16;
 	pci_write_config_dword(dev, 0x40, val);
 
-	return ata_pci_sff_init_one(dev, ppi, &sl82c105_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &sl82c105_sht, NULL, 0);
 }
 
 static const struct pci_device_id sl82c105[] = {
diff --git a/drivers/ata/pata_triflex.c b/drivers/ata/pata_triflex.c
index f1f13ff222fd..48f50600ed2a 100644
--- a/drivers/ata/pata_triflex.c
+++ b/drivers/ata/pata_triflex.c
@@ -201,7 +201,7 @@ static int triflex_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 	if (!printed_version++)
 		dev_printk(KERN_DEBUG, &dev->dev, "version " DRV_VERSION "\n");
 
-	return ata_pci_sff_init_one(dev, ppi, &triflex_sht, NULL);
+	return ata_pci_sff_init_one(dev, ppi, &triflex_sht, NULL, 0);
 }
 
 static const struct pci_device_id triflex[] = {
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 6356377231fd..3059ec017de3 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -624,7 +624,7 @@ static int via_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	/* We have established the device type, now fire it up */
-	return ata_pci_sff_init_one(pdev, ppi, &via_sht, (void *)config);
+	return ata_pci_sff_init_one(pdev, ppi, &via_sht, (void *)config, 0);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/staging/phison/phison.c b/drivers/staging/phison/phison.c
index 3817d7497049..fcba78d21636 100644
--- a/drivers/staging/phison/phison.c
+++ b/drivers/staging/phison/phison.c
@@ -62,7 +62,7 @@ static int phison_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	};
 	const struct ata_port_info *ppi[] = { &info, NULL };
 
-	ret = ata_pci_sff_init_one(pdev, ppi, &phison_sht, NULL);
+	ret = ata_pci_sff_init_one(pdev, ppi, &phison_sht, NULL, 0);
 
 	dev_dbg(&pdev->dev, "phison_init_one(), ret = %x\n", ret);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5fb888462359..f8ea71e6d0e2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1643,8 +1643,8 @@ extern int ata_pci_sff_activate_host(struct ata_host *host,
 				     irq_handler_t irq_handler,
 				     struct scsi_host_template *sht);
 extern int ata_pci_sff_init_one(struct pci_dev *pdev,
-				const struct ata_port_info * const * ppi,
-				struct scsi_host_template *sht, void *host_priv);
+		const struct ata_port_info * const * ppi,
+		struct scsi_host_template *sht, void *host_priv, int hflags);
 #endif /* CONFIG_PCI */
 
 /**
-- 
cgit v1.2.3


From 4b7d1c0509d0d07edc731f990791dc5518e51617 Mon Sep 17 00:00:00 2001
From: Ben Gardner <gardner.ben@gmail.com>
Date: Tue, 23 Feb 2010 12:41:22 -0600
Subject: ata: Detect Delkin Devices compact flash

I have a Delkin Devices compact flash card that isn't being recognized using the
SATA/PATA drivers.
The card is recognized and works with the deprecated ATA drivers.

The error I am seeing is:
ata1.00: failed to IDENTIFY (device reports invalid type, err_mask=0x0)

I tracked it down to ata_id_is_cfa() in include/linux/ata.h.
The Delkin card has id[0] set to 0x844a and id[83] set to 0.
This isn't what the kernel expects and is probably incorrect.

The simplest work-around is to add a check for 0x844a to ata_id_is_cfa().

Signed-off-by: Ben Gardner <gardner.ben@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/ata.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 20f31567ccee..b4c85e2adef5 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -841,7 +841,8 @@ static inline int ata_id_current_chs_valid(const u16 *id)
 
 static inline int ata_id_is_cfa(const u16 *id)
 {
-	if (id[ATA_ID_CONFIG] == 0x848A)	/* Traditional CF */
+	if ((id[ATA_ID_CONFIG] == 0x848A) ||	/* Traditional CF */
+	    (id[ATA_ID_CONFIG] == 0x844A))	/* Delkin Devices CF */
 		return 1;
 	/*
 	 * CF specs don't require specific value in the word 0 anymore and yet
-- 
cgit v1.2.3


From 73a19e4c0301908ce6346715fd08a74308451f5a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 2 Mar 2010 11:39:15 +0900
Subject: serial: sh-sci: Add DMA support.

Support using DMA for sending and receiving data over SCI(F) interfaces of
various SH SoCs.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/serial/Kconfig     |   4 +
 drivers/serial/sh-sci.c    | 618 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/serial_sci.h |   6 +
 3 files changed, 582 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 888a0ce91c4b..11ebe862457b 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1009,6 +1009,10 @@ config SERIAL_SH_SCI_CONSOLE
 	depends on SERIAL_SH_SCI=y
 	select SERIAL_CORE_CONSOLE
 
+config SERIAL_SH_SCI_DMA
+	bool "DMA support"
+	depends on SERIAL_SH_SCI && SH_DMAE && EXPERIMENTAL
+
 config SERIAL_PNX8XXX
 	bool "Enable PNX8XXX SoCs' UART Support"
 	depends on MIPS && (SOC_PNX8550 || SOC_PNX833X)
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 42f3333c4ad0..f3841cd8fc5d 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -48,6 +48,9 @@
 #include <linux/ctype.h>
 #include <linux/err.h>
 #include <linux/list.h>
+#include <linux/dmaengine.h>
+#include <linux/scatterlist.h>
+#include <linux/timer.h>
 
 #ifdef CONFIG_SUPERH
 #include <asm/sh_bios.h>
@@ -84,6 +87,27 @@ struct sci_port {
 	struct clk		*dclk;
 
 	struct list_head	node;
+	struct dma_chan			*chan_tx;
+	struct dma_chan			*chan_rx;
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	struct device			*dma_dev;
+	enum sh_dmae_slave_chan_id	slave_tx;
+	enum sh_dmae_slave_chan_id	slave_rx;
+	struct dma_async_tx_descriptor	*desc_tx;
+	struct dma_async_tx_descriptor	*desc_rx[2];
+	dma_cookie_t			cookie_tx;
+	dma_cookie_t			cookie_rx[2];
+	dma_cookie_t			active_rx;
+	struct scatterlist		sg_tx;
+	unsigned int			sg_len_tx;
+	struct scatterlist		sg_rx[2];
+	size_t				buf_len_rx;
+	struct sh_dmae_slave		param_tx;
+	struct sh_dmae_slave		param_rx;
+	struct work_struct		work_tx;
+	struct work_struct		work_rx;
+	struct timer_list		rx_timer;
+#endif
 };
 
 struct sh_sci_priv {
@@ -269,29 +293,44 @@ static inline void sci_init_pins(struct uart_port *port, unsigned int cflag)
     defined(CONFIG_CPU_SUBTYPE_SH7780) || \
     defined(CONFIG_CPU_SUBTYPE_SH7785) || \
     defined(CONFIG_CPU_SUBTYPE_SH7786)
-static inline int scif_txroom(struct uart_port *port)
+static int scif_txfill(struct uart_port *port)
 {
-	return SCIF_TXROOM_MAX - (sci_in(port, SCTFDR) & 0xff);
+	return sci_in(port, SCTFDR) & 0xff;
 }
 
-static inline int scif_rxroom(struct uart_port *port)
+static int scif_txroom(struct uart_port *port)
+{
+	return SCIF_TXROOM_MAX - scif_txfill(port);
+}
+
+static int scif_rxfill(struct uart_port *port)
 {
 	return sci_in(port, SCRFDR) & 0xff;
 }
 #elif defined(CONFIG_CPU_SUBTYPE_SH7763)
-static inline int scif_txroom(struct uart_port *port)
+static int scif_txfill(struct uart_port *port)
 {
-	if ((port->mapbase == 0xffe00000) ||
-	    (port->mapbase == 0xffe08000)) {
+	if (port->mapbase == 0xffe00000 ||
+	    port->mapbase == 0xffe08000)
 		/* SCIF0/1*/
-		return SCIF_TXROOM_MAX - (sci_in(port, SCTFDR) & 0xff);
-	} else {
+		return sci_in(port, SCTFDR) & 0xff;
+	else
 		/* SCIF2 */
-		return SCIF2_TXROOM_MAX - (sci_in(port, SCFDR) >> 8);
-	}
+		return sci_in(port, SCFDR) >> 8;
+}
+
+static int scif_txroom(struct uart_port *port)
+{
+	if (port->mapbase == 0xffe00000 ||
+	    port->mapbase == 0xffe08000)
+		/* SCIF0/1*/
+		return SCIF_TXROOM_MAX - scif_txfill(port);
+	else
+		/* SCIF2 */
+		return SCIF2_TXROOM_MAX - scif_txfill(port);
 }
 
-static inline int scif_rxroom(struct uart_port *port)
+static int scif_rxfill(struct uart_port *port)
 {
 	if ((port->mapbase == 0xffe00000) ||
 	    (port->mapbase == 0xffe08000)) {
@@ -303,23 +342,33 @@ static inline int scif_rxroom(struct uart_port *port)
 	}
 }
 #else
-static inline int scif_txroom(struct uart_port *port)
+static int scif_txfill(struct uart_port *port)
 {
-	return SCIF_TXROOM_MAX - (sci_in(port, SCFDR) >> 8);
+	return sci_in(port, SCFDR) >> 8;
 }
 
-static inline int scif_rxroom(struct uart_port *port)
+static int scif_txroom(struct uart_port *port)
+{
+	return SCIF_TXROOM_MAX - scif_txfill(port);
+}
+
+static int scif_rxfill(struct uart_port *port)
 {
 	return sci_in(port, SCFDR) & SCIF_RFDC_MASK;
 }
 #endif
 
-static inline int sci_txroom(struct uart_port *port)
+static int sci_txfill(struct uart_port *port)
 {
-	return (sci_in(port, SCxSR) & SCI_TDRE) != 0;
+	return !(sci_in(port, SCxSR) & SCI_TDRE);
 }
 
-static inline int sci_rxroom(struct uart_port *port)
+static int sci_txroom(struct uart_port *port)
+{
+	return !sci_txfill(port);
+}
+
+static int sci_rxfill(struct uart_port *port)
 {
 	return (sci_in(port, SCxSR) & SCxSR_RDxF(port)) != 0;
 }
@@ -406,9 +455,9 @@ static inline void sci_receive_chars(struct uart_port *port)
 
 	while (1) {
 		if (port->type == PORT_SCI)
-			count = sci_rxroom(port);
+			count = sci_rxfill(port);
 		else
-			count = scif_rxroom(port);
+			count = scif_rxfill(port);
 
 		/* Don't copy more bytes than there is room for in the buffer */
 		count = tty_buffer_request_room(tty, count);
@@ -453,10 +502,10 @@ static inline void sci_receive_chars(struct uart_port *port)
 				}
 
 				/* Store data and status */
-				if (status&SCxSR_FER(port)) {
+				if (status & SCxSR_FER(port)) {
 					flag = TTY_FRAME;
 					dev_notice(port->dev, "frame error\n");
-				} else if (status&SCxSR_PER(port)) {
+				} else if (status & SCxSR_PER(port)) {
 					flag = TTY_PARITY;
 					dev_notice(port->dev, "parity error\n");
 				} else
@@ -618,13 +667,39 @@ static inline int sci_handle_breaks(struct uart_port *port)
 	return copied;
 }
 
-static irqreturn_t sci_rx_interrupt(int irq, void *port)
+static irqreturn_t sci_rx_interrupt(int irq, void *ptr)
 {
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	struct uart_port *port = ptr;
+	struct sci_port *s = to_sci_port(port);
+
+	if (s->chan_rx) {
+		unsigned long tout;
+		u16 scr = sci_in(port, SCSCR);
+		u16 ssr = sci_in(port, SCxSR);
+
+		/* Disable future Rx interrupts */
+		sci_out(port, SCSCR, scr & ~SCI_CTRL_FLAGS_RIE);
+		/* Clear current interrupt */
+		sci_out(port, SCxSR, ssr & ~(1 | SCxSR_RDxF(port)));
+		/* Calculate delay for 1.5 DMA buffers */
+		tout = (port->timeout - HZ / 50) * s->buf_len_rx * 3 /
+			port->fifosize / 2;
+		dev_dbg(port->dev, "Rx IRQ: setup timeout in %u ms\n",
+			tout * 1000 / HZ);
+		if (tout < 2)
+			tout = 2;
+		mod_timer(&s->rx_timer, jiffies + tout);
+
+		return IRQ_HANDLED;
+	}
+#endif
+
 	/* I think sci_receive_chars has to be called irrespective
 	 * of whether the I_IXOFF is set, otherwise, how is the interrupt
 	 * to be disabled?
 	 */
-	sci_receive_chars(port);
+	sci_receive_chars(ptr);
 
 	return IRQ_HANDLED;
 }
@@ -680,6 +755,7 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 {
 	unsigned short ssr_status, scr_status, err_enabled;
 	struct uart_port *port = ptr;
+	struct sci_port *s = to_sci_port(port);
 	irqreturn_t ret = IRQ_NONE;
 
 	ssr_status = sci_in(port, SCxSR);
@@ -687,10 +763,15 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	err_enabled = scr_status & (SCI_CTRL_FLAGS_REIE | SCI_CTRL_FLAGS_RIE);
 
 	/* Tx Interrupt */
-	if ((ssr_status & SCxSR_TDxE(port)) && (scr_status & SCI_CTRL_FLAGS_TIE))
+	if ((ssr_status & SCxSR_TDxE(port)) && (scr_status & SCI_CTRL_FLAGS_TIE) &&
+	    !s->chan_tx)
 		ret = sci_tx_interrupt(irq, ptr);
-	/* Rx Interrupt */
-	if ((ssr_status & SCxSR_RDxF(port)) && (scr_status & SCI_CTRL_FLAGS_RIE))
+	/*
+	 * Rx Interrupt: if we're using DMA, the DMA controller clears RDF /
+	 * DR flags
+	 */
+	if (((ssr_status & SCxSR_RDxF(port)) || s->chan_rx) &&
+	    (scr_status & SCI_CTRL_FLAGS_RIE))
 		ret = sci_rx_interrupt(irq, ptr);
 	/* Error Interrupt */
 	if ((ssr_status & SCxSR_ERRORS(port)) && err_enabled)
@@ -699,6 +780,10 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	if ((ssr_status & SCxSR_BRK(port)) && err_enabled)
 		ret = sci_br_interrupt(irq, ptr);
 
+	WARN_ONCE(ret == IRQ_NONE,
+		  "%s: %d IRQ %d, status %x, control %x\n", __func__,
+		  irq, port->line, ssr_status, scr_status);
+
 	return ret;
 }
 
@@ -800,7 +885,9 @@ static void sci_free_irq(struct sci_port *port)
 static unsigned int sci_tx_empty(struct uart_port *port)
 {
 	unsigned short status = sci_in(port, SCxSR);
-	return status & SCxSR_TEND(port) ? TIOCSER_TEMT : 0;
+	unsigned short in_tx_fifo = scif_txfill(port);
+
+	return (status & SCxSR_TEND(port)) && !in_tx_fifo ? TIOCSER_TEMT : 0;
 }
 
 static void sci_set_mctrl(struct uart_port *port, unsigned int mctrl)
@@ -812,16 +899,299 @@ static void sci_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
 static unsigned int sci_get_mctrl(struct uart_port *port)
 {
-	/* This routine is used for geting signals of: DTR, DCD, DSR, RI,
+	/* This routine is used for getting signals of: DTR, DCD, DSR, RI,
 	   and CTS/RTS */
 
 	return TIOCM_DTR | TIOCM_RTS | TIOCM_DSR;
 }
 
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+static void sci_dma_tx_complete(void *arg)
+{
+	struct sci_port *s = arg;
+	struct uart_port *port = &s->port;
+	struct circ_buf *xmit = &port->state->xmit;
+	unsigned long flags;
+
+	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	xmit->tail += s->sg_tx.length;
+	xmit->tail &= UART_XMIT_SIZE - 1;
+
+	port->icount.tx += s->sg_tx.length;
+
+	async_tx_ack(s->desc_tx);
+	s->cookie_tx = -EINVAL;
+	s->desc_tx = NULL;
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_chars_pending(xmit))
+		schedule_work(&s->work_tx);
+}
+
+/* Locking: called with port lock held */
+static int sci_dma_rx_push(struct sci_port *s, struct tty_struct *tty,
+			   size_t count)
+{
+	struct uart_port *port = &s->port;
+	int i, active, room;
+
+	room = tty_buffer_request_room(tty, count);
+
+	if (s->active_rx == s->cookie_rx[0]) {
+		active = 0;
+	} else if (s->active_rx == s->cookie_rx[1]) {
+		active = 1;
+	} else {
+		dev_err(port->dev, "cookie %d not found!\n", s->active_rx);
+		return 0;
+	}
+
+	if (room < count)
+		dev_warn(port->dev, "Rx overrun: dropping %u bytes\n",
+			 count - room);
+	if (!room)
+		return room;
+
+	for (i = 0; i < room; i++)
+		tty_insert_flip_char(tty, ((u8 *)sg_virt(&s->sg_rx[active]))[i],
+				     TTY_NORMAL);
+
+	port->icount.rx += room;
+
+	return room;
+}
+
+static void sci_dma_rx_complete(void *arg)
+{
+	struct sci_port *s = arg;
+	struct uart_port *port = &s->port;
+	struct tty_struct *tty = port->state->port.tty;
+	unsigned long flags;
+	int count;
+
+	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	count = sci_dma_rx_push(s, tty, s->buf_len_rx);
+
+	mod_timer(&s->rx_timer, jiffies + msecs_to_jiffies(5));
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (count)
+		tty_flip_buffer_push(tty);
+
+	schedule_work(&s->work_rx);
+}
+
+static void sci_start_rx(struct uart_port *port);
+static void sci_start_tx(struct uart_port *port);
+
+static void sci_rx_dma_release(struct sci_port *s, bool enable_pio)
+{
+	struct dma_chan *chan = s->chan_rx;
+	struct uart_port *port = &s->port;
+	unsigned long flags;
+
+	s->chan_rx = NULL;
+	s->cookie_rx[0] = s->cookie_rx[1] = -EINVAL;
+	dma_release_channel(chan);
+	dma_free_coherent(port->dev, s->buf_len_rx * 2,
+			  sg_virt(&s->sg_rx[0]), sg_dma_address(&s->sg_rx[0]));
+	if (enable_pio)
+		sci_start_rx(port);
+}
+
+static void sci_tx_dma_release(struct sci_port *s, bool enable_pio)
+{
+	struct dma_chan *chan = s->chan_tx;
+	struct uart_port *port = &s->port;
+	unsigned long flags;
+
+	s->chan_tx = NULL;
+	s->cookie_tx = -EINVAL;
+	dma_release_channel(chan);
+	if (enable_pio)
+		sci_start_tx(port);
+}
+
+static void sci_submit_rx(struct sci_port *s)
+{
+	struct dma_chan *chan = s->chan_rx;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct scatterlist *sg = &s->sg_rx[i];
+		struct dma_async_tx_descriptor *desc;
+
+		desc = chan->device->device_prep_slave_sg(chan,
+			sg, 1, DMA_FROM_DEVICE, DMA_PREP_INTERRUPT);
+
+		if (desc) {
+			s->desc_rx[i] = desc;
+			desc->callback = sci_dma_rx_complete;
+			desc->callback_param = s;
+			s->cookie_rx[i] = desc->tx_submit(desc);
+		}
+
+		if (!desc || s->cookie_rx[i] < 0) {
+			if (i) {
+				async_tx_ack(s->desc_rx[0]);
+				s->cookie_rx[0] = -EINVAL;
+			}
+			if (desc) {
+				async_tx_ack(desc);
+				s->cookie_rx[i] = -EINVAL;
+			}
+			dev_warn(s->port.dev,
+				 "failed to re-start DMA, using PIO\n");
+			sci_rx_dma_release(s, true);
+			return;
+		}
+	}
+
+	s->active_rx = s->cookie_rx[0];
+
+	dma_async_issue_pending(chan);
+}
+
+static void work_fn_rx(struct work_struct *work)
+{
+	struct sci_port *s = container_of(work, struct sci_port, work_rx);
+	struct uart_port *port = &s->port;
+	struct dma_async_tx_descriptor *desc;
+	int new;
+
+	if (s->active_rx == s->cookie_rx[0]) {
+		new = 0;
+	} else if (s->active_rx == s->cookie_rx[1]) {
+		new = 1;
+	} else {
+		dev_err(port->dev, "cookie %d not found!\n", s->active_rx);
+		return;
+	}
+	desc = s->desc_rx[new];
+
+	if (dma_async_is_tx_complete(s->chan_rx, s->active_rx, NULL, NULL) !=
+	    DMA_SUCCESS) {
+		/* Handle incomplete DMA receive */
+		struct tty_struct *tty = port->state->port.tty;
+		struct dma_chan *chan = s->chan_rx;
+		struct sh_desc *sh_desc = container_of(desc, struct sh_desc,
+						       async_tx);
+		unsigned long flags;
+		int count;
+
+		chan->device->device_terminate_all(chan);
+		dev_dbg(port->dev, "Read %u bytes with cookie %d\n",
+			sh_desc->partial, sh_desc->cookie);
+
+		spin_lock_irqsave(&port->lock, flags);
+		count = sci_dma_rx_push(s, tty, sh_desc->partial);
+		spin_unlock_irqrestore(&port->lock, flags);
+
+		if (count)
+			tty_flip_buffer_push(tty);
+
+		sci_submit_rx(s);
+
+		return;
+	}
+
+	s->cookie_rx[new] = desc->tx_submit(desc);
+	if (s->cookie_rx[new] < 0) {
+		dev_warn(port->dev, "Failed submitting Rx DMA descriptor\n");
+		sci_rx_dma_release(s, true);
+		return;
+	}
+
+	dev_dbg(port->dev, "%s: cookie %d #%d\n", __func__,
+		s->cookie_rx[new], new);
+
+	s->active_rx = s->cookie_rx[!new];
+}
+
+static void work_fn_tx(struct work_struct *work)
+{
+	struct sci_port *s = container_of(work, struct sci_port, work_tx);
+	struct dma_async_tx_descriptor *desc;
+	struct dma_chan *chan = s->chan_tx;
+	struct uart_port *port = &s->port;
+	struct circ_buf *xmit = &port->state->xmit;
+	struct scatterlist *sg = &s->sg_tx;
+
+	/*
+	 * DMA is idle now.
+	 * Port xmit buffer is already mapped, and it is one page... Just adjust
+	 * offsets and lengths. Since it is a circular buffer, we have to
+	 * transmit till the end, and then the rest. Take the port lock to get a
+	 * consistent xmit buffer state.
+	 */
+	spin_lock_irq(&port->lock);
+	sg->offset = xmit->tail & (UART_XMIT_SIZE - 1);
+	sg->dma_address = (sg_dma_address(sg) & ~(UART_XMIT_SIZE - 1)) +
+		sg->offset;
+	sg->length = min((int)CIRC_CNT(xmit->head, xmit->tail, UART_XMIT_SIZE),
+		CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE));
+	sg->dma_length = sg->length;
+	spin_unlock_irq(&port->lock);
+
+	BUG_ON(!sg->length);
+
+	desc = chan->device->device_prep_slave_sg(chan,
+			sg, s->sg_len_tx, DMA_TO_DEVICE,
+			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		/* switch to PIO */
+		sci_tx_dma_release(s, true);
+		return;
+	}
+
+	dma_sync_sg_for_device(port->dev, sg, 1, DMA_TO_DEVICE);
+
+	spin_lock_irq(&port->lock);
+	s->desc_tx = desc;
+	desc->callback = sci_dma_tx_complete;
+	desc->callback_param = s;
+	spin_unlock_irq(&port->lock);
+	s->cookie_tx = desc->tx_submit(desc);
+	if (s->cookie_tx < 0) {
+		dev_warn(port->dev, "Failed submitting Tx DMA descriptor\n");
+		/* switch to PIO */
+		sci_tx_dma_release(s, true);
+		return;
+	}
+
+	dev_dbg(port->dev, "%s: %p: %d...%d, cookie %d\n", __func__,
+		xmit->buf, xmit->tail, xmit->head, s->cookie_tx);
+
+	dma_async_issue_pending(chan);
+}
+#endif
+
 static void sci_start_tx(struct uart_port *port)
 {
 	unsigned short ctrl;
 
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	struct sci_port *s = to_sci_port(port);
+
+	if (s->chan_tx) {
+		if (!uart_circ_empty(&s->port.state->xmit) && s->cookie_tx < 0)
+			schedule_work(&s->work_tx);
+
+		return;
+	}
+#endif
+
 	/* Set TIE (Transmit Interrupt Enable) bit in SCSCR */
 	ctrl = sci_in(port, SCSCR);
 	ctrl |= SCI_CTRL_FLAGS_TIE;
@@ -838,13 +1208,12 @@ static void sci_stop_tx(struct uart_port *port)
 	sci_out(port, SCSCR, ctrl);
 }
 
-static void sci_start_rx(struct uart_port *port, unsigned int tty_start)
+static void sci_start_rx(struct uart_port *port)
 {
-	unsigned short ctrl;
+	unsigned short ctrl = SCI_CTRL_FLAGS_RIE | SCI_CTRL_FLAGS_REIE;
 
 	/* Set RIE (Receive Interrupt Enable) bit in SCSCR */
-	ctrl = sci_in(port, SCSCR);
-	ctrl |= SCI_CTRL_FLAGS_RIE | SCI_CTRL_FLAGS_REIE;
+	ctrl |= sci_in(port, SCSCR);
 	sci_out(port, SCSCR, ctrl);
 }
 
@@ -868,16 +1237,154 @@ static void sci_break_ctl(struct uart_port *port, int break_state)
 	/* Nothing here yet .. */
 }
 
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+static bool filter(struct dma_chan *chan, void *slave)
+{
+	struct sh_dmae_slave *param = slave;
+
+	dev_dbg(chan->device->dev, "%s: slave ID %d\n", __func__,
+		param->slave_id);
+
+	if (param->dma_dev == chan->device->dev) {
+		chan->private = param;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static void rx_timer_fn(unsigned long arg)
+{
+	struct sci_port *s = (struct sci_port *)arg;
+	struct uart_port *port = &s->port;
+
+	u16 scr = sci_in(port, SCSCR);
+	sci_out(port, SCSCR, scr | SCI_CTRL_FLAGS_RIE);
+	dev_dbg(port->dev, "DMA Rx timed out\n");
+	schedule_work(&s->work_rx);
+}
+
+static void sci_request_dma(struct uart_port *port)
+{
+	struct sci_port *s = to_sci_port(port);
+	struct sh_dmae_slave *param;
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+	int nent;
+
+	dev_dbg(port->dev, "%s: port %d DMA %p\n", __func__,
+		port->line, s->dma_dev);
+
+	if (!s->dma_dev)
+		return;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	param = &s->param_tx;
+
+	/* Slave ID, e.g., SHDMA_SLAVE_SCIF0_TX */
+	param->slave_id = s->slave_tx;
+	param->dma_dev = s->dma_dev;
+
+	s->cookie_tx = -EINVAL;
+	chan = dma_request_channel(mask, filter, param);
+	dev_dbg(port->dev, "%s: TX: got channel %p\n", __func__, chan);
+	if (chan) {
+		s->chan_tx = chan;
+		sg_init_table(&s->sg_tx, 1);
+		/* UART circular tx buffer is an aligned page. */
+		BUG_ON((int)port->state->xmit.buf & ~PAGE_MASK);
+		sg_set_page(&s->sg_tx, virt_to_page(port->state->xmit.buf),
+			    UART_XMIT_SIZE, (int)port->state->xmit.buf & ~PAGE_MASK);
+		nent = dma_map_sg(port->dev, &s->sg_tx, 1, DMA_TO_DEVICE);
+		if (!nent)
+			sci_tx_dma_release(s, false);
+		else
+			dev_dbg(port->dev, "%s: mapped %d@%p to %x\n", __func__,
+				sg_dma_len(&s->sg_tx),
+				port->state->xmit.buf, sg_dma_address(&s->sg_tx));
+
+		s->sg_len_tx = nent;
+
+		INIT_WORK(&s->work_tx, work_fn_tx);
+	}
+
+	param = &s->param_rx;
+
+	/* Slave ID, e.g., SHDMA_SLAVE_SCIF0_RX */
+	param->slave_id = s->slave_rx;
+	param->dma_dev = s->dma_dev;
+
+	chan = dma_request_channel(mask, filter, param);
+	dev_dbg(port->dev, "%s: RX: got channel %p\n", __func__, chan);
+	if (chan) {
+		dma_addr_t dma[2];
+		void *buf[2];
+		int i;
+
+		s->chan_rx = chan;
+
+		s->buf_len_rx = 2 * max(16, (int)port->fifosize);
+		buf[0] = dma_alloc_coherent(port->dev, s->buf_len_rx * 2,
+					    &dma[0], GFP_KERNEL);
+
+		if (!buf[0]) {
+			dev_warn(port->dev,
+				 "failed to allocate dma buffer, using PIO\n");
+			sci_rx_dma_release(s, true);
+			return;
+		}
+
+		buf[1] = buf[0] + s->buf_len_rx;
+		dma[1] = dma[0] + s->buf_len_rx;
+
+		for (i = 0; i < 2; i++) {
+			struct scatterlist *sg = &s->sg_rx[i];
+
+			sg_init_table(sg, 1);
+			sg_set_page(sg, virt_to_page(buf[i]), s->buf_len_rx,
+				    (int)buf[i] & ~PAGE_MASK);
+			sg->dma_address = dma[i];
+			sg->dma_length = sg->length;
+		}
+
+		INIT_WORK(&s->work_rx, work_fn_rx);
+		setup_timer(&s->rx_timer, rx_timer_fn, (unsigned long)s);
+
+		sci_submit_rx(s);
+	}
+}
+
+static void sci_free_dma(struct uart_port *port)
+{
+	struct sci_port *s = to_sci_port(port);
+
+	if (!s->dma_dev)
+		return;
+
+	if (s->chan_tx)
+		sci_tx_dma_release(s, false);
+	if (s->chan_rx)
+		sci_rx_dma_release(s, false);
+}
+#endif
+
 static int sci_startup(struct uart_port *port)
 {
 	struct sci_port *s = to_sci_port(port);
 
+	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
+
 	if (s->enable)
 		s->enable(port);
 
 	sci_request_irq(s);
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	sci_request_dma(port);
+#endif
 	sci_start_tx(port);
-	sci_start_rx(port, 1);
+	sci_start_rx(port);
 
 	return 0;
 }
@@ -886,8 +1393,13 @@ static void sci_shutdown(struct uart_port *port)
 {
 	struct sci_port *s = to_sci_port(port);
 
+	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
+
 	sci_stop_rx(port);
 	sci_stop_tx(port);
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	sci_free_dma(port);
+#endif
 	sci_free_irq(s);
 
 	if (s->disable)
@@ -937,6 +1449,9 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	sci_out(port, SCSMR, smr_val);
 
+	dev_dbg(port->dev, "%s: SMR %x, t %x, SCSCR %x\n", __func__, smr_val, t,
+		SCSCR_INIT(port));
+
 	if (t > 0) {
 		if (t >= 256) {
 			sci_out(port, SCSMR, (sci_in(port, SCSMR) & ~3) | 1);
@@ -954,7 +1469,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 	sci_out(port, SCSCR, SCSCR_INIT(port));
 
 	if ((termios->c_cflag & CREAD) != 0)
-		sci_start_rx(port, 0);
+		sci_start_rx(port);
 }
 
 static const char *sci_type(struct uart_port *port)
@@ -1049,19 +1564,21 @@ static void __devinit sci_init_single(struct platform_device *dev,
 				      unsigned int index,
 				      struct plat_sci_port *p)
 {
-	sci_port->port.ops	= &sci_uart_ops;
-	sci_port->port.iotype	= UPIO_MEM;
-	sci_port->port.line	= index;
+	struct uart_port *port = &sci_port->port;
+
+	port->ops	= &sci_uart_ops;
+	port->iotype	= UPIO_MEM;
+	port->line	= index;
 
 	switch (p->type) {
 	case PORT_SCIFA:
-		sci_port->port.fifosize = 64;
+		port->fifosize = 64;
 		break;
 	case PORT_SCIF:
-		sci_port->port.fifosize = 16;
+		port->fifosize = 16;
 		break;
 	default:
-		sci_port->port.fifosize = 1;
+		port->fifosize = 1;
 		break;
 	}
 
@@ -1070,19 +1587,28 @@ static void __devinit sci_init_single(struct platform_device *dev,
 		sci_port->dclk = clk_get(&dev->dev, "peripheral_clk");
 		sci_port->enable = sci_clk_enable;
 		sci_port->disable = sci_clk_disable;
-		sci_port->port.dev = &dev->dev;
+		port->dev = &dev->dev;
 	}
 
 	sci_port->break_timer.data = (unsigned long)sci_port;
 	sci_port->break_timer.function = sci_break_timer;
 	init_timer(&sci_port->break_timer);
 
-	sci_port->port.mapbase	= p->mapbase;
-	sci_port->port.membase	= p->membase;
+	port->mapbase	= p->mapbase;
+	port->membase	= p->membase;
 
-	sci_port->port.irq	= p->irqs[SCIx_TXI_IRQ];
-	sci_port->port.flags	= p->flags;
-	sci_port->type		= sci_port->port.type = p->type;
+	port->irq	= p->irqs[SCIx_TXI_IRQ];
+	port->flags	= p->flags;
+	sci_port->type	= port->type = p->type;
+
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	sci_port->dma_dev	= p->dma_dev;
+	sci_port->slave_tx	= p->dma_slave_tx;
+	sci_port->slave_rx	= p->dma_slave_rx;
+
+	dev_dbg(port->dev, "%s: DMA device %p, tx %d, rx %d\n", __func__,
+		p->dma_dev, p->dma_slave_tx, p->dma_slave_rx);
+#endif
 
 	memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs));
 }
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 1c297ddc9d5a..1b177d29a7f0 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -2,6 +2,7 @@
 #define __LINUX_SERIAL_SCI_H
 
 #include <linux/serial_core.h>
+#include <asm/dmaengine.h>
 
 /*
  * Generic header for SuperH SCI(F) (used by sh/sh64/h8300 and related parts)
@@ -16,6 +17,8 @@ enum {
 	SCIx_NR_IRQS,
 };
 
+struct device;
+
 /*
  * Platform device specific platform_data struct
  */
@@ -26,6 +29,9 @@ struct plat_sci_port {
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
 	char		*clk;			/* clock string */
+	struct device	*dma_dev;
+	enum sh_dmae_slave_chan_id dma_slave_tx;
+	enum sh_dmae_slave_chan_id dma_slave_rx;
 };
 
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v1.2.3


From 4ab408dea0f0dba4dec0555f4f35b7ae703f5e91 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 1 Mar 2010 03:09:26 +0000
Subject: net: fix protocol sk_buff field

Commit e992cd9b72a18 (kmemcheck: make bitfield annotations truly no-ops
when disabled) allows us to revert a workaround we did in the past to
not add holes in sk_buff structure.

This patch partially reverts commit 14d18a81b5171
(net: fix kmemcheck annotations) so that sparse doesnt complain:

include/linux/skbuff.h:357:41: error: invalid bitfield specifier for
type restricted __be16.

Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d266eeef522d..03f816a9b659 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -355,8 +355,8 @@ struct sk_buff {
 				ipvs_property:1,
 				peeked:1,
 				nf_trace:1;
-	__be16			protocol:16;
 	kmemcheck_bitfield_end(flags1);
+	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-- 
cgit v1.2.3


From b5527a7766f0505dc72efe3cefe5e9dea826f611 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 2 Mar 2010 12:23:42 +0100
Subject: i2c: Add SMBus alert support

SMBus alert support. The SMBus alert protocol allows several SMBus
slave devices to share a single interrupt pin on the SMBus master,
while still allowing the master to know which slave triggered the
interrupt.

This is based on preliminary work by David Brownell. The key
difference between David's implementation and mine is that his was
part of i2c-core, while mine is split into a separate, standalone
module named i2c-smbus. The i2c-smbus module is meant to include
support for all SMBus extensions to the I2C protocol in the future.

The benefit of this approach is a zero cost for I2C bus segments which
do not need SMBus alert support. Where David's implementation
increased the size of struct i2c_adapter by 7% (40 bytes on i386),
mine doesn't touch it. Where David's implementation added over 150
lines of code to i2c-core (+10%), mine doesn't touch it. The only
change that touches all the users of the i2c subsystem is a new
callback in struct i2c_driver (common to both implementations.) I seem
to remember Trent was worried about the footprint of David'd
implementation, hopefully mine addresses the issue.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Trent Piepho <tpiepho@freescale.com>
---
 Documentation/i2c/smbus-protocol |  16 +++
 drivers/i2c/Makefile             |   2 +-
 drivers/i2c/i2c-smbus.c          | 263 +++++++++++++++++++++++++++++++++++++++
 include/linux/i2c-smbus.h        |  50 ++++++++
 include/linux/i2c.h              |   7 ++
 5 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 drivers/i2c/i2c-smbus.c
 create mode 100644 include/linux/i2c-smbus.h

(limited to 'include/linux')

diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol
index 9df47441f0e7..7c19d1a2bea0 100644
--- a/Documentation/i2c/smbus-protocol
+++ b/Documentation/i2c/smbus-protocol
@@ -185,6 +185,22 @@ the protocol. All ARP communications use slave address 0x61 and
 require PEC checksums.
 
 
+SMBus Alert
+===========
+
+SMBus Alert was introduced in Revision 1.0 of the specification.
+
+The SMBus alert protocol allows several SMBus slave devices to share a
+single interrupt pin on the SMBus master, while still allowing the master
+to know which slave triggered the interrupt.
+
+This is implemented the following way in the Linux kernel:
+* I2C bus drivers which support SMBus alert should call
+  i2c_setup_smbus_alert() to setup SMBus alert support.
+* I2C drivers for devices which can trigger SMBus alerts should implement
+  the optional alert() callback.
+
+
 I2C Block Transactions
 ======================
 
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index ba26e6cbe74e..7111c93bd3e6 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
-obj-$(CONFIG_I2C)		+= i2c-core.o
+obj-$(CONFIG_I2C)		+= i2c-core.o i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
 obj-y				+= busses/ chips/ algos/
 
diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c
new file mode 100644
index 000000000000..421278221243
--- /dev/null
+++ b/drivers/i2c/i2c-smbus.c
@@ -0,0 +1,263 @@
+/*
+ * i2c-smbus.c - SMBus extensions to the I2C protocol
+ *
+ * Copyright (C) 2008 David Brownell
+ * Copyright (C) 2010 Jean Delvare <khali@linux-fr.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/semaphore.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/i2c.h>
+#include <linux/i2c-smbus.h>
+
+struct i2c_smbus_alert {
+	unsigned int		alert_edge_triggered:1;
+	int			irq;
+	struct work_struct	alert;
+	struct i2c_client	*ara;		/* Alert response address */
+};
+
+struct alert_data {
+	unsigned short		addr;
+	u8			flag:1;
+};
+
+/* If this is the alerting device, notify its driver */
+static int smbus_do_alert(struct device *dev, void *addrp)
+{
+	struct i2c_client *client = i2c_verify_client(dev);
+	struct alert_data *data = addrp;
+
+	if (!client || client->addr != data->addr)
+		return 0;
+	if (client->flags & I2C_CLIENT_TEN)
+		return 0;
+
+	/*
+	 * Drivers should either disable alerts, or provide at least
+	 * a minimal handler.  Lock so client->driver won't change.
+	 */
+	down(&dev->sem);
+	if (client->driver) {
+		if (client->driver->alert)
+			client->driver->alert(client, data->flag);
+		else
+			dev_warn(&client->dev, "no driver alert()!\n");
+	} else
+		dev_dbg(&client->dev, "alert with no driver\n");
+	up(&dev->sem);
+
+	/* Stop iterating after we find the device */
+	return -EBUSY;
+}
+
+/*
+ * The alert IRQ handler needs to hand work off to a task which can issue
+ * SMBus calls, because those sleeping calls can't be made in IRQ context.
+ */
+static void smbus_alert(struct work_struct *work)
+{
+	struct i2c_smbus_alert *alert;
+	struct i2c_client *ara;
+	unsigned short prev_addr = 0;	/* Not a valid address */
+
+	alert = container_of(work, struct i2c_smbus_alert, alert);
+	ara = alert->ara;
+
+	for (;;) {
+		s32 status;
+		struct alert_data data;
+
+		/*
+		 * Devices with pending alerts reply in address order, low
+		 * to high, because of slave transmit arbitration.  After
+		 * responding, an SMBus device stops asserting SMBALERT#.
+		 *
+		 * Note that SMBus 2.0 reserves 10-bit addresess for future
+		 * use.  We neither handle them, nor try to use PEC here.
+		 */
+		status = i2c_smbus_read_byte(ara);
+		if (status < 0)
+			break;
+
+		data.flag = status & 1;
+		data.addr = status >> 1;
+
+		if (data.addr == prev_addr) {
+			dev_warn(&ara->dev, "Duplicate SMBALERT# from dev "
+				"0x%02x, skipping\n", data.addr);
+			break;
+		}
+		dev_dbg(&ara->dev, "SMBALERT# from dev 0x%02x, flag %d\n",
+			data.addr, data.flag);
+
+		/* Notify driver for the device which issued the alert */
+		device_for_each_child(&ara->adapter->dev, &data,
+				      smbus_do_alert);
+		prev_addr = data.addr;
+	}
+
+	/* We handled all alerts; re-enable level-triggered IRQs */
+	if (!alert->alert_edge_triggered)
+		enable_irq(alert->irq);
+}
+
+static irqreturn_t smbalert_irq(int irq, void *d)
+{
+	struct i2c_smbus_alert *alert = d;
+
+	/* Disable level-triggered IRQs until we handle them */
+	if (!alert->alert_edge_triggered)
+		disable_irq_nosync(irq);
+
+	schedule_work(&alert->alert);
+	return IRQ_HANDLED;
+}
+
+/* Setup SMBALERT# infrastructure */
+static int smbalert_probe(struct i2c_client *ara,
+			  const struct i2c_device_id *id)
+{
+	struct i2c_smbus_alert_setup *setup = ara->dev.platform_data;
+	struct i2c_smbus_alert *alert;
+	struct i2c_adapter *adapter = ara->adapter;
+	int res;
+
+	alert = kzalloc(sizeof(struct i2c_smbus_alert), GFP_KERNEL);
+	if (!alert)
+		return -ENOMEM;
+
+	alert->alert_edge_triggered = setup->alert_edge_triggered;
+	alert->irq = setup->irq;
+	INIT_WORK(&alert->alert, smbus_alert);
+	alert->ara = ara;
+
+	if (setup->irq > 0) {
+		res = devm_request_irq(&ara->dev, setup->irq, smbalert_irq,
+				       0, "smbus_alert", alert);
+		if (res) {
+			kfree(alert);
+			return res;
+		}
+	}
+
+	i2c_set_clientdata(ara, alert);
+	dev_info(&adapter->dev, "supports SMBALERT#, %s trigger\n",
+		 setup->alert_edge_triggered ? "edge" : "level");
+
+	return 0;
+}
+
+/* IRQ resource is managed so it is freed automatically */
+static int smbalert_remove(struct i2c_client *ara)
+{
+	struct i2c_smbus_alert *alert = i2c_get_clientdata(ara);
+
+	cancel_work_sync(&alert->alert);
+
+	i2c_set_clientdata(ara, NULL);
+	kfree(alert);
+	return 0;
+}
+
+static const struct i2c_device_id smbalert_ids[] = {
+	{ "smbus_alert", 0 },
+	{ /* LIST END */ }
+};
+MODULE_DEVICE_TABLE(i2c, smbalert_ids);
+
+static struct i2c_driver smbalert_driver = {
+	.driver = {
+		.name	= "smbus_alert",
+	},
+	.probe		= smbalert_probe,
+	.remove		= smbalert_remove,
+	.id_table	= smbalert_ids,
+};
+
+/**
+ * i2c_setup_smbus_alert - Setup SMBus alert support
+ * @adapter: the target adapter
+ * @setup: setup data for the SMBus alert handler
+ * Context: can sleep
+ *
+ * Setup handling of the SMBus alert protocol on a given I2C bus segment.
+ *
+ * Handling can be done either through our IRQ handler, or by the
+ * adapter (from its handler, periodic polling, or whatever).
+ *
+ * NOTE that if we manage the IRQ, we *MUST* know if it's level or
+ * edge triggered in order to hand it to the workqueue correctly.
+ * If triggering the alert seems to wedge the system, you probably
+ * should have said it's level triggered.
+ *
+ * This returns the ara client, which should be saved for later use with
+ * i2c_handle_smbus_alert() and ultimately i2c_unregister_device(); or NULL
+ * to indicate an error.
+ */
+struct i2c_client *i2c_setup_smbus_alert(struct i2c_adapter *adapter,
+					 struct i2c_smbus_alert_setup *setup)
+{
+	struct i2c_board_info ara_board_info = {
+		I2C_BOARD_INFO("smbus_alert", 0x0c),
+		.platform_data = setup,
+	};
+
+	return i2c_new_device(adapter, &ara_board_info);
+}
+EXPORT_SYMBOL_GPL(i2c_setup_smbus_alert);
+
+/**
+ * i2c_handle_smbus_alert - Handle an SMBus alert
+ * @ara: the ARA client on the relevant adapter
+ * Context: can't sleep
+ *
+ * Helper function to be called from an I2C bus driver's interrupt
+ * handler. It will schedule the alert work, in turn calling the
+ * corresponding I2C device driver's alert function.
+ *
+ * It is assumed that ara is a valid i2c client previously returned by
+ * i2c_setup_smbus_alert().
+ */
+int i2c_handle_smbus_alert(struct i2c_client *ara)
+{
+	struct i2c_smbus_alert *alert = i2c_get_clientdata(ara);
+
+	return schedule_work(&alert->alert);
+}
+EXPORT_SYMBOL_GPL(i2c_handle_smbus_alert);
+
+static int __init i2c_smbus_init(void)
+{
+	return i2c_add_driver(&smbalert_driver);
+}
+
+static void __exit i2c_smbus_exit(void)
+{
+	i2c_del_driver(&smbalert_driver);
+}
+
+module_init(i2c_smbus_init);
+module_exit(i2c_smbus_exit);
+
+MODULE_AUTHOR("Jean Delvare <khali@linux-fr.org>");
+MODULE_DESCRIPTION("SMBus protocol extensions support");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h
new file mode 100644
index 000000000000..63f57a8c8b31
--- /dev/null
+++ b/include/linux/i2c-smbus.h
@@ -0,0 +1,50 @@
+/*
+ * i2c-smbus.h - SMBus extensions to the I2C protocol
+ *
+ * Copyright (C) 2010 Jean Delvare <khali@linux-fr.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_I2C_SMBUS_H
+#define _LINUX_I2C_SMBUS_H
+
+#include <linux/i2c.h>
+
+
+/**
+ * i2c_smbus_alert_setup - platform data for the smbus_alert i2c client
+ * @alert_edge_triggered: whether the alert interrupt is edge (1) or level (0)
+ *		triggered
+ * @irq: IRQ number, if the smbus_alert driver should take care of interrupt
+ *		handling
+ *
+ * If irq is not specified, the smbus_alert driver doesn't take care of
+ * interrupt handling. In that case it is up to the I2C bus driver to either
+ * handle the interrupts or to poll for alerts.
+ *
+ * If irq is specified then it it crucial that alert_edge_triggered is
+ * properly set.
+ */
+struct i2c_smbus_alert_setup {
+	unsigned int		alert_edge_triggered:1;
+	int			irq;
+};
+
+struct i2c_client *i2c_setup_smbus_alert(struct i2c_adapter *adapter,
+					 struct i2c_smbus_alert_setup *setup);
+int i2c_handle_smbus_alert(struct i2c_client *ara);
+
+#endif /* _LINUX_I2C_SMBUS_H */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 02fc617782ef..476abd09c921 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -152,6 +152,13 @@ struct i2c_driver {
 	int (*suspend)(struct i2c_client *, pm_message_t mesg);
 	int (*resume)(struct i2c_client *);
 
+	/* Alert callback, for example for the SMBus alert protocol.
+	 * The format and meaning of the data value depends on the protocol.
+	 * For the SMBus alert protocol, there is a single bit of data passed
+	 * as the alert response's low bit ("event flag").
+	 */
+	void (*alert)(struct i2c_client *, unsigned int data);
+
 	/* a ioctl like command that can be used to perform specific functions
 	 * with the device.
 	 */
-- 
cgit v1.2.3


From 0c43ea544c1086fbbed5a6c99ea58eb64674ea8f Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zgao6@marvell.com>
Date: Tue, 2 Mar 2010 12:23:49 +0100
Subject: i2c: Document the message size limit

i2c_master_send & i2c_master_recv do not support more than 64 kb
transfer, since msg.len is u16.

Signed-off-by: Zhangfei Gao <zgao6@marvell.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/writing-clients | 5 +++--
 drivers/i2c/i2c-core.c            | 4 ++--
 include/linux/i2c.h               | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index 0a74603eb671..3219ee0dbfef 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -318,8 +318,9 @@ Plain I2C communication
 These routines read and write some bytes from/to a client. The client
 contains the i2c address, so you do not have to include it. The second
 parameter contains the bytes to read/write, the third the number of bytes
-to read/write (must be less than the length of the buffer.) Returned is
-the actual number of bytes read/written.
+to read/write (must be less than the length of the buffer, also should be
+less than 64k since msg.len is u16.) Returned is the actual number of bytes
+read/written.
 
 	int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msg,
 			 int num);
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 4131698008b9..3202a86f420e 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1183,7 +1183,7 @@ EXPORT_SYMBOL(i2c_transfer);
  * i2c_master_send - issue a single I2C message in master transmit mode
  * @client: Handle to slave device
  * @buf: Data that will be written to the slave
- * @count: How many bytes to write
+ * @count: How many bytes to write, must be less than 64k since msg.len is u16
  *
  * Returns negative errno, or else the number of bytes written.
  */
@@ -1210,7 +1210,7 @@ EXPORT_SYMBOL(i2c_master_send);
  * i2c_master_recv - issue a single I2C message in master receive mode
  * @client: Handle to slave device
  * @buf: Where to store data read from slave
- * @count: How many bytes to read
+ * @count: How many bytes to read, must be less than 64k since msg.len is u16
  *
  * Returns negative errno, or else the number of bytes read.
  */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 476abd09c921..0a5da639b327 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -53,6 +53,7 @@ struct i2c_board_info;
  * on a bus (or read from them). Apart from two basic transfer functions to
  * transmit one message at a time, a more complex version can be used to
  * transmit an arbitrary number of messages without interruption.
+ * @count must be be less than 64k since msg.len is u16.
  */
 extern int i2c_master_send(struct i2c_client *client, const char *buf,
 			   int count);
-- 
cgit v1.2.3


From 320ebf09cbb6d01954c9a060266aa8e0d27f4638 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Mar 2010 12:35:37 +0100
Subject: perf, x86: Restrict the ANY flag

The ANY flag can show SMT data of another task (like 'top'),
so we want to disable it when system-wide profiling is
disabled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |  3 +++
 include/linux/perf_event.h       | 15 +++++++++++++++
 kernel/perf_event.c              | 15 ---------------
 3 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6531b4bdb22d..aab2e1ce9dee 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -503,6 +503,9 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	if (attr->type == PERF_TYPE_RAW) {
 		hwc->config |= x86_pmu.raw_event(attr->config);
+		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
+		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
 		return 0;
 	}
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 04f06b4be297..90e0521b1690 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -857,6 +857,21 @@ extern int sysctl_perf_event_paranoid;
 extern int sysctl_perf_event_mlock;
 extern int sysctl_perf_event_sample_rate;
 
+static inline bool perf_paranoid_tracepoint_raw(void)
+{
+	return sysctl_perf_event_paranoid > -1;
+}
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_event_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_event_paranoid > 1;
+}
+
 extern void perf_event_init(void);
 extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
 extern void perf_bp_event(struct perf_event *event, void *data);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a661e7991865..482d5e1d3764 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -56,21 +56,6 @@ static atomic_t nr_task_events __read_mostly;
  */
 int sysctl_perf_event_paranoid __read_mostly = 1;
 
-static inline bool perf_paranoid_tracepoint_raw(void)
-{
-	return sysctl_perf_event_paranoid > -1;
-}
-
-static inline bool perf_paranoid_cpu(void)
-{
-	return sysctl_perf_event_paranoid > 0;
-}
-
-static inline bool perf_paranoid_kernel(void)
-{
-	return sysctl_perf_event_paranoid > 1;
-}
-
 int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*
-- 
cgit v1.2.3


From 3082a2b7b1af1b1508c1c3fa589566064f926f40 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 16 Feb 2010 16:36:25 -0500
Subject: rfkill: Add support for KEY_RFKILL

Add support for handling KEY_RFKILL in the rfkill input module. This
simply toggles the state of all rfkill devices. The comment in rfkill.h
is also updated to reflect that RFKILL_TYPE_ALL may be used inside the
kernel.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 2 +-
 net/rfkill/input.c     | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 97059d08a626..4f82326eb294 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -29,7 +29,7 @@
 /**
  * enum rfkill_type - type of rfkill switch.
  *
- * @RFKILL_TYPE_ALL: toggles all switches (userspace only)
+ * @RFKILL_TYPE_ALL: toggles all switches (requests only - not a switch type)
  * @RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device.
  * @RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device.
  * @RFKILL_TYPE_UWB: switch is on a ultra wideband device.
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index a7295ad5f9cb..3713d7ecab96 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -212,6 +212,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 		case KEY_WIMAX:
 			rfkill_schedule_toggle(RFKILL_TYPE_WIMAX);
 			break;
+		case KEY_RFKILL:
+			rfkill_schedule_toggle(RFKILL_TYPE_ALL);
+			break;
 		}
 	} else if (type == EV_SW && code == SW_RFKILL_ALL)
 		rfkill_schedule_evsw_rfkillall(data);
@@ -294,6 +297,11 @@ static const struct input_device_id rfkill_ids[] = {
 		.evbit = { BIT_MASK(EV_KEY) },
 		.keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) },
 	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_RFKILL)] = BIT_MASK(KEY_RFKILL) },
+	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT,
 		.evbit = { BIT(EV_SW) },
-- 
cgit v1.2.3


From ac6ec5b1de5d1d5afcbe88d73c05df71dca0ac39 Mon Sep 17 00:00:00 2001
From: "Ira W. Snyder" <iws@ovro.caltech.edu>
Date: Mon, 21 Dec 2009 16:26:45 -0800
Subject: serial: 8250_pci: add support for MCS9865 / SYBA 6x Serial Port Card

This patch is heavily based on an earlier patch found on the linux-serial
mailing list [1], written by Darius Augulis.

The previous incarnation of this patch only supported a 2x serial port
card.  I have added support for my SYBA 6x serial port card, and tested on
x86.

[1]: http://marc.info/?l=linux-serial&m=124975806304760

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Cc: Darius Augulis <augulis.darius@gmail.com>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/parport/parport_pc.c |  6 ++++++
 drivers/serial/8250_pci.c    | 22 +++++++++++++++++++++-
 include/linux/pci_ids.h      |  1 +
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index ad113b0f62db..0950fa40684f 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -2908,6 +2908,7 @@ enum parport_pc_pci_cards {
 	netmos_9805,
 	netmos_9815,
 	netmos_9901,
+	netmos_9865,
 	quatech_sppxp100,
 };
 
@@ -2989,6 +2990,7 @@ static struct parport_pc_pci {
 	/* netmos_9805 */               { 1, { { 0, -1 }, } },
 	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } },
 	/* netmos_9901 */               { 1, { { 0, -1 }, } },
+	/* netmos_9865 */               { 1, { { 0, -1 }, } },
 	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
 };
 
@@ -3092,6 +3094,10 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9815 },
 	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9901,
 	  0xA000, 0x2000, 0, 0, netmos_9901 },
+	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+	  0xA000, 0x1000, 0, 0, netmos_9865 },
+	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+	  0xA000, 0x2000, 0, 0, netmos_9865 },
 	/* Quatech SPPXP-100 Parallel port PCI ExpressCard */
 	{ PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_SPPXP_100,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index b28af13c45a1..8b18c3ce3898 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -760,7 +760,8 @@ static int pci_netmos_init(struct pci_dev *dev)
 	/* subdevice 0x00PS means <P> parallel, <S> serial */
 	unsigned int num_serial = dev->subsystem_device & 0xf;
 
-	if (dev->device == PCI_DEVICE_ID_NETMOS_9901)
+	if ((dev->device == PCI_DEVICE_ID_NETMOS_9901) ||
+		(dev->device == PCI_DEVICE_ID_NETMOS_9865))
 		return 0;
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
 			dev->subsystem_device == 0x0299)
@@ -1479,6 +1480,7 @@ enum pci_board_num_t {
 
 	pbn_b0_bt_1_115200,
 	pbn_b0_bt_2_115200,
+	pbn_b0_bt_4_115200,
 	pbn_b0_bt_8_115200,
 
 	pbn_b0_bt_1_460800,
@@ -1703,6 +1705,12 @@ static struct pciserial_board pci_boards[] __devinitdata = {
 		.base_baud	= 115200,
 		.uart_offset	= 8,
 	},
+	[pbn_b0_bt_4_115200] = {
+		.flags		= FL_BASE0|FL_BASE_BARS,
+		.num_ports	= 4,
+		.base_baud	= 115200,
+		.uart_offset	= 8,
+	},
 	[pbn_b0_bt_8_115200] = {
 		.flags		= FL_BASE0|FL_BASE_BARS,
 		.num_ports	= 8,
@@ -3648,6 +3656,18 @@ static struct pci_device_id serial_pci_tbl[] = {
 		0xA000, 0x1000,
 		0, 0, pbn_b0_1_115200 },
 
+	/*
+	 * Best Connectivity PCI Multi I/O cards
+	 */
+
+	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+		0xA000, 0x1000,
+		0, 0, pbn_b0_1_115200 },
+
+	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+		0xA000, 0x3004,
+		0, 0, pbn_b0_bt_4_115200 },
+
 	/*
 	 * These entries match devices with class COMMUNICATION_SERIAL,
 	 * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0be824320580..3ec4003f5e64 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2697,6 +2697,7 @@
 #define PCI_DEVICE_ID_NETMOS_9835	0x9835
 #define PCI_DEVICE_ID_NETMOS_9845	0x9845
 #define PCI_DEVICE_ID_NETMOS_9855	0x9855
+#define PCI_DEVICE_ID_NETMOS_9865	0x9865
 #define PCI_DEVICE_ID_NETMOS_9901	0x9901
 
 #define PCI_VENDOR_ID_3COM_2		0xa727
-- 
cgit v1.2.3


From 2a52fcb54fdf4b557730022aefcc794d567591fb Mon Sep 17 00:00:00 2001
From: Kiros Yeh <kiros@korenix.com>
Date: Mon, 21 Dec 2009 16:26:48 -0800
Subject: serial: add support for Korenix JetCard

Add different model (with a different PCI ID) to support Korenix JetCard.

Signed-off-by: Kiros Yeh <kiros@korenix.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250_pci.c | 9 +++++++++
 include/linux/pci_ids.h   | 2 ++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 8b18c3ce3898..01c012da4e26 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -3199,6 +3199,15 @@ static struct pci_device_id serial_pci_tbl[] = {
 		0x1208, 0x0004, 0, 0,
 		pbn_b0_4_921600 },
 
+	{	PCI_VENDOR_ID_KORENIX, PCI_DEVICE_ID_KORENIX_JETCARDF2,
+		0x1204, 0x0004, 0, 0,
+		pbn_b0_4_921600 },
+	{	PCI_VENDOR_ID_KORENIX, PCI_DEVICE_ID_KORENIX_JETCARDF2,
+		0x1208, 0x0004, 0, 0,
+		pbn_b0_4_921600 },
+	{	PCI_VENDOR_ID_KORENIX, PCI_DEVICE_ID_KORENIX_JETCARDF3,
+		0x1208, 0x0004, 0, 0,
+		pbn_b0_4_921600 },
 	/*
 	 * Dell Remote Access Card 4 - Tim_T_Murphy@Dell.com
 	 */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3ec4003f5e64..e91b1fc03e7a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2333,6 +2333,8 @@
 #define PCI_VENDOR_ID_KORENIX		0x1982
 #define PCI_DEVICE_ID_KORENIX_JETCARDF0	0x1600
 #define PCI_DEVICE_ID_KORENIX_JETCARDF1	0x16ff
+#define PCI_DEVICE_ID_KORENIX_JETCARDF2	0x1700
+#define PCI_DEVICE_ID_KORENIX_JETCARDF3	0x17ff
 
 #define PCI_VENDOR_ID_QMI		0x1a32
 
-- 
cgit v1.2.3


From d9661adfb8e53a7647360140af3b92284cbe52d4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 18 Feb 2010 16:43:47 +0000
Subject: tty: Keep the default buffering to sub-page units

We allocate during interrupts so while our buffering is normally diced up
small anyway on some hardware at speed we can pressure the VM excessively
for page pairs. We don't really need big buffers to be linear so don't try
so hard.

In order to make this work well we will tidy up excess callers to request_room,
which cannot itself enforce this break up.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_buffer.c |  6 ++++--
 include/linux/tty.h       | 10 ++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_buffer.c b/drivers/char/tty_buffer.c
index 66fa4e10d76b..f27c4d6d956e 100644
--- a/drivers/char/tty_buffer.c
+++ b/drivers/char/tty_buffer.c
@@ -247,7 +247,8 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
 {
 	int copied = 0;
 	do {
-		int space = tty_buffer_request_room(tty, size - copied);
+		int goal = min(size - copied, TTY_BUFFER_PAGE);
+		int space = tty_buffer_request_room(tty, goal);
 		struct tty_buffer *tb = tty->buf.tail;
 		/* If there is no space then tb may be NULL */
 		if (unlikely(space == 0))
@@ -283,7 +284,8 @@ int tty_insert_flip_string_flags(struct tty_struct *tty,
 {
 	int copied = 0;
 	do {
-		int space = tty_buffer_request_room(tty, size - copied);
+		int goal = min(size - copied, TTY_BUFFER_PAGE);
+		int space = tty_buffer_request_room(tty, goal);
 		struct tty_buffer *tb = tty->buf.tail;
 		/* If there is no space then tb may be NULL */
 		if (unlikely(space == 0))
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 6abfcf5b5887..d96e5882f129 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -68,6 +68,16 @@ struct tty_buffer {
 	unsigned long data[0];
 };
 
+/*
+ * We default to dicing tty buffer allocations to this many characters
+ * in order to avoid multiple page allocations. We assume tty_buffer itself
+ * is under 256 bytes. See tty_buffer_find for the allocation logic this
+ * must match
+ */
+
+#define TTY_BUFFER_PAGE		((PAGE_SIZE  - 256) / 2)
+
+
 struct tty_bufhead {
 	struct delayed_work work;
 	spinlock_t lock;
-- 
cgit v1.2.3


From eec9fe7d1ab4a0dfac4cb43047a7657fffd0002f Mon Sep 17 00:00:00 2001
From: Ari Entlich <atrigent@ccs.neu.edu>
Date: Fri, 19 Feb 2010 09:37:55 -0500
Subject: tty: Add a new VT mode which is like VT_PROCESS but doesn't require a
 VT_RELDISP ioctl call

This new VT mode (VT_PROCESS_AUTO) does everything that VT_PROCESS does
except that it doesn't wait for a VT_RELDISP ioctl before switching
away from a VT with that mode.

If the X server eventually uses this new mode, debugging and crash
recovery should become easier. This is because even when currently in
the VT of a frozen X server it would still be possible to switch out
by doing SysRq-r and then CTRL-<number of a text vt>, sshing in and
doing chvt <number of a text vt>, or any other method of VT switching.
The general concensus on #xorg-devel seems to be that it should be
safe to use this with X now that we have KMS.

This also moves the VT_ACKACQ define to a more appropriate place,
for clarity's sake.

Signed-off-by: Ari Entlich <atrigent@ccs.neu.edu>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt_ioctl.c | 39 ++++++++++++++++++++-------------------
 include/linux/vt.h      |  3 ++-
 2 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 6aa10284104a..87778dcf8727 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -888,7 +888,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 			ret = -EFAULT;
 			goto out;
 		}
-		if (tmp.mode != VT_AUTO && tmp.mode != VT_PROCESS) {
+		if (tmp.mode != VT_AUTO && tmp.mode != VT_PROCESS && tmp.mode != VT_PROCESS_AUTO) {
 			ret = -EINVAL;
 			goto out;
 		}
@@ -1622,7 +1622,7 @@ static void complete_change_console(struct vc_data *vc)
 	 * telling it that it has acquired. Also check if it has died and
 	 * clean up (similar to logic employed in change_console())
 	 */
-	if (vc->vt_mode.mode == VT_PROCESS) {
+	if (vc->vt_mode.mode == VT_PROCESS || vc->vt_mode.mode == VT_PROCESS_AUTO) {
 		/*
 		 * Send the signal as privileged - kill_pid() will
 		 * tell us if the process has gone or something else
@@ -1682,7 +1682,7 @@ void change_console(struct vc_data *new_vc)
 	 * vt to auto control.
 	 */
 	vc = vc_cons[fg_console].d;
-	if (vc->vt_mode.mode == VT_PROCESS) {
+	if (vc->vt_mode.mode == VT_PROCESS || vc->vt_mode.mode == VT_PROCESS_AUTO) {
 		/*
 		 * Send the signal as privileged - kill_pid() will
 		 * tell us if the process has gone or something else
@@ -1693,27 +1693,28 @@ void change_console(struct vc_data *new_vc)
 		 */
 		vc->vt_newvt = new_vc->vc_num;
 		if (kill_pid(vc->vt_pid, vc->vt_mode.relsig, 1) == 0) {
+			if(vc->vt_mode.mode == VT_PROCESS)
+				/*
+				 * It worked. Mark the vt to switch to and
+				 * return. The process needs to send us a
+				 * VT_RELDISP ioctl to complete the switch.
+				 */
+				return;
+		} else {
 			/*
-			 * It worked. Mark the vt to switch to and
-			 * return. The process needs to send us a
-			 * VT_RELDISP ioctl to complete the switch.
+			 * The controlling process has died, so we revert back to
+			 * normal operation. In this case, we'll also change back
+			 * to KD_TEXT mode. I'm not sure if this is strictly correct
+			 * but it saves the agony when the X server dies and the screen
+			 * remains blanked due to KD_GRAPHICS! It would be nice to do
+			 * this outside of VT_PROCESS but there is no single process
+			 * to account for and tracking tty count may be undesirable.
 			 */
-			return;
+			reset_vc(vc);
 		}
 
 		/*
-		 * The controlling process has died, so we revert back to
-		 * normal operation. In this case, we'll also change back
-		 * to KD_TEXT mode. I'm not sure if this is strictly correct
-		 * but it saves the agony when the X server dies and the screen
-		 * remains blanked due to KD_GRAPHICS! It would be nice to do
-		 * this outside of VT_PROCESS but there is no single process
-		 * to account for and tracking tty count may be undesirable.
-		 */
-		reset_vc(vc);
-
-		/*
-		 * Fall through to normal (VT_AUTO) handling of the switch...
+		 * Fall through to normal (VT_AUTO and VT_PROCESS_AUTO) handling of the switch...
 		 */
 	}
 
diff --git a/include/linux/vt.h b/include/linux/vt.h
index d5dd0bc408fd..778b7b2a47d4 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -27,7 +27,7 @@ struct vt_mode {
 #define VT_SETMODE	0x5602	/* set mode of active vt */
 #define		VT_AUTO		0x00	/* auto vt switching */
 #define		VT_PROCESS	0x01	/* process controls switching */
-#define		VT_ACKACQ	0x02	/* acknowledge switch */
+#define		VT_PROCESS_AUTO 0x02	/* process is notified of switching */
 
 struct vt_stat {
 	unsigned short v_active;	/* active vt */
@@ -38,6 +38,7 @@ struct vt_stat {
 #define VT_SENDSIG	0x5604	/* signal to send to bitmask of vts */
 
 #define VT_RELDISP	0x5605	/* release display */
+#define		VT_ACKACQ	0x02	/* acknowledge switch */
 
 #define VT_ACTIVATE	0x5606	/* make vt active */
 #define VT_WAITACTIVE	0x5607	/* wait for vt active */
-- 
cgit v1.2.3


From e9a20171dfa0aa134d2211126d1310f2daea52cf Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Thu, 17 Dec 2009 13:01:36 +0200
Subject: USB: otg: add notifier support

The notifier will be used to communicate usb events
to other drivers like the charger chip.

This can be used as source of information to kick
usb charger detection as described by the USB
Battery Charging Specification 1.1 and/or to
pass bMaxPower field of selected usb_configuration
to charger chip in order to use that information
as input current on the charging profile
setup.

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/otg.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 52bb917641f0..6c0b676b27d8 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -9,6 +9,8 @@
 #ifndef __LINUX_USB_OTG_H
 #define __LINUX_USB_OTG_H
 
+#include <linux/notifier.h>
+
 /* OTG defines lots of enumeration states before device reset */
 enum usb_otg_state {
 	OTG_STATE_UNDEFINED = 0,
@@ -33,6 +35,14 @@ enum usb_otg_state {
 	OTG_STATE_A_VBUS_ERR,
 };
 
+enum usb_xceiv_events {
+	USB_EVENT_NONE,         /* no events or cable disconnected */
+	USB_EVENT_VBUS,         /* vbus valid event */
+	USB_EVENT_ID,           /* id was grounded */
+	USB_EVENT_CHARGER,      /* usb dedicated charger */
+	USB_EVENT_ENUMERATED,   /* gadget driver enumerated */
+};
+
 #define USB_OTG_PULLUP_ID		(1 << 0)
 #define USB_OTG_PULLDOWN_DP		(1 << 1)
 #define USB_OTG_PULLDOWN_DM		(1 << 2)
@@ -70,6 +80,9 @@ struct otg_transceiver {
 	struct otg_io_access_ops	*io_ops;
 	void __iomem			*io_priv;
 
+	/* for notification of usb_xceiv_events */
+	struct blocking_notifier_head	notifier;
+
 	/* to pass extra port status to the root hub */
 	u16			port_status;
 	u16			port_change;
@@ -203,6 +216,18 @@ otg_start_srp(struct otg_transceiver *otg)
 	return otg->start_srp(otg);
 }
 
+/* notifiers */
+static inline int
+otg_register_notifier(struct otg_transceiver *otg, struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&otg->notifier, nb);
+}
+
+static inline void
+otg_unregister_notifier(struct otg_transceiver *otg, struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&otg->notifier, nb);
+}
 
 /* for OTG controller drivers (and maybe other stuff) */
 extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
-- 
cgit v1.2.3


From 5d3987796c7a747e5ed3ded1eb64a9632d52a1a4 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Fri, 18 Dec 2009 12:14:21 +0100
Subject: USB: storage: Never reset devices that will morph to an old mode

Some devices must be switched to a new mode to fully use them.
A reset would make them revert to the old mode. Therefore a reset
must not be used for error handling with such devices.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/quirks.c       | 3 ++-
 drivers/usb/storage/transport.c | 6 ++++++
 include/linux/usb/quirks.h      | 3 +++
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index ab93918d9207..0b689224394b 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -120,6 +120,7 @@ void usb_detect_quirks(struct usb_device *udev)
 	 * for all devices.  It will affect things like hub resets
 	 * and EMF-related port disables.
 	 */
-	udev->persist_enabled = 1;
+	if (!(udev->quirks & USB_QUIRK_RESET_MORPHS))
+		udev->persist_enabled = 1;
 #endif	/* CONFIG_PM */
 }
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index cc313d16d727..468038126e5e 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -47,6 +47,8 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 
+#include <linux/usb/quirks.h>
+
 #include <scsi/scsi.h>
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_device.h>
@@ -1297,6 +1299,10 @@ int usb_stor_port_reset(struct us_data *us)
 {
 	int result;
 
+	/*for these devices we must use the class specific method */
+	if (us->pusb_dev->quirks & USB_QUIRK_RESET_MORPHS)
+		return -EPERM;
+
 	result = usb_lock_device_for_reset(us->pusb_dev, us->pusb_intf);
 	if (result < 0)
 		US_DEBUGP("unable to lock device for reset: %d\n", result);
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 2526f3bbd273..0a555dd131fc 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -19,4 +19,7 @@
 /* device can't handle its Configuration or Interface strings */
 #define USB_QUIRK_CONFIG_INTF_STRINGS	0x00000008
 
+/*device will morph if reset, don't use reset for handling errors */
+#define USB_QUIRK_RESET_MORPHS		0x00000010
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 551cdbbeb118bd5ed301f8749aef69219284399b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 14 Jan 2010 11:08:04 -0800
Subject: USB: rename USB_SPEED_VARIABLE to USB_SPEED_WIRELESS

It's really the wireless speed, so rename the thing to make
more sense.  Based on a recommendation from David Vrabel

Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/usbip/vhci_sysfs.c | 2 +-
 drivers/usb/core/hub.c             | 6 +++---
 drivers/usb/core/sysfs.c           | 2 +-
 drivers/usb/core/urb.c             | 6 +++---
 drivers/usb/host/xhci-mem.c        | 4 ++--
 drivers/usb/wusbcore/devconnect.c  | 2 +-
 include/linux/usb/ch9.h            | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/usbip/vhci_sysfs.c b/drivers/staging/usbip/vhci_sysfs.c
index d8992d10d555..f6e34e03c8e4 100644
--- a/drivers/staging/usbip/vhci_sysfs.c
+++ b/drivers/staging/usbip/vhci_sysfs.c
@@ -144,7 +144,7 @@ static int valid_args(__u32 rhport, enum usb_device_speed speed)
 	case USB_SPEED_LOW:
 	case USB_SPEED_FULL:
 	case USB_SPEED_HIGH:
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 		break;
 	default:
 		usbip_uerr("speed %d\n", speed);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 6f84d383ecee..4986ff628465 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1990,7 +1990,7 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1,
 		if (!(portstatus & USB_PORT_STAT_RESET) &&
 		    (portstatus & USB_PORT_STAT_ENABLE)) {
 			if (hub_is_wusb(hub))
-				udev->speed = USB_SPEED_VARIABLE;
+				udev->speed = USB_SPEED_WIRELESS;
 			else if (portstatus & USB_PORT_STAT_HIGH_SPEED)
 				udev->speed = USB_SPEED_HIGH;
 			else if (portstatus & USB_PORT_STAT_LOW_SPEED)
@@ -2689,7 +2689,7 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	 */
 	switch (udev->speed) {
 	case USB_SPEED_SUPER:
-	case USB_SPEED_VARIABLE:	/* fixed at 512 */
+	case USB_SPEED_WIRELESS:	/* fixed at 512 */
 		udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512);
 		break;
 	case USB_SPEED_HIGH:		/* fixed at 64 */
@@ -2717,7 +2717,7 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	case USB_SPEED_SUPER:
 				speed = "super";
 				break;
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 				speed = "variable";
 				type = "Wireless ";
 				break;
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index b1725abf6c7b..1b3c00b3ca3f 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -115,7 +115,7 @@ show_speed(struct device *dev, struct device_attribute *attr, char *buf)
 	case USB_SPEED_HIGH:
 		speed = "480";
 		break;
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 		speed = "480";
 		break;
 	case USB_SPEED_SUPER:
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index e2bd153cbd89..27080561a1c2 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -437,7 +437,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 	case USB_ENDPOINT_XFER_INT:
 		/* too small? */
 		switch (dev->speed) {
-		case USB_SPEED_VARIABLE:
+		case USB_SPEED_WIRELESS:
 			if (urb->interval < 6)
 				return -EINVAL;
 			break;
@@ -453,7 +453,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 			if (urb->interval > (1 << 15))
 				return -EINVAL;
 			max = 1 << 15;
-		case USB_SPEED_VARIABLE:
+		case USB_SPEED_WIRELESS:
 			if (urb->interval > 16)
 				return -EINVAL;
 			break;
@@ -480,7 +480,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 		default:
 			return -EINVAL;
 		}
-		if (dev->speed != USB_SPEED_VARIABLE) {
+		if (dev->speed != USB_SPEED_WIRELESS) {
 			/* Round down to a power of 2, no more than max */
 			urb->interval = min(max, 1 << ilog2(urb->interval));
 		}
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 8045bc69083d..49f7d72f8b1b 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -454,7 +454,7 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	case USB_SPEED_LOW:
 		slot_ctx->dev_info |= (u32) SLOT_SPEED_LS;
 		break;
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 		xhci_dbg(xhci, "FIXME xHCI doesn't support wireless speeds\n");
 		return -EINVAL;
 		break;
@@ -498,7 +498,7 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	case USB_SPEED_LOW:
 		ep0_ctx->ep_info2 |= MAX_PACKET(8);
 		break;
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 		xhci_dbg(xhci, "FIXME xHCI doesn't support wireless speeds\n");
 		return -EINVAL;
 		break;
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index dced419f7aba..1c918286159c 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -868,7 +868,7 @@ static struct usb_wireless_cap_descriptor wusb_cap_descr_default = {
  * reference that we'll drop.
  *
  * First we need to determine if the device is a WUSB device (else we
- * ignore it). For that we use the speed setting (USB_SPEED_VARIABLE)
+ * ignore it). For that we use the speed setting (USB_SPEED_WIRELESS)
  * [FIXME: maybe we'd need something more definitive]. If so, we track
  * it's usb_busd and from there, the WUSB HC.
  *
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 94012e649d86..e58369ff8168 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -775,7 +775,7 @@ enum usb_device_speed {
 	USB_SPEED_UNKNOWN = 0,			/* enumerating */
 	USB_SPEED_LOW, USB_SPEED_FULL,		/* usb 1.1 */
 	USB_SPEED_HIGH,				/* usb 2.0 */
-	USB_SPEED_VARIABLE,			/* wireless (usb 2.5) */
+	USB_SPEED_WIRELESS,			/* wireless (usb 2.5) */
 	USB_SPEED_SUPER,			/* usb 3.0 */
 };
 
-- 
cgit v1.2.3


From 5fc4e77911f457b6aa910c704eebe3a58d334116 Mon Sep 17 00:00:00 2001
From: Ajay Kumar Gupta <ajay.gupta@ti.com>
Date: Mon, 28 Dec 2009 13:40:42 +0200
Subject: usb: musb: Add 'extvbus' in musb_hdrc_platform_data

Some of the board might use external Vbus power supply on musb
interface which would require to program ULPI_BUSCONTROL register.

Adding 'extvbus' flag which can be set from such boards which will
be checked at musb driver files before programming ULPI_BUSCONTROL.

Signed-off-by: Ajay Kumar Gupta <ajay.gupta@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/musb/musb_core.c | 8 ++++++++
 drivers/usb/musb/musb_regs.h | 5 +++++
 include/linux/usb/musb.h     | 3 +++
 3 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 4c8962f976b2..074d380bf883 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2031,6 +2031,7 @@ bad_config:
 	/* host side needs more setup */
 	if (is_host_enabled(musb)) {
 		struct usb_hcd	*hcd = musb_to_hcd(musb);
+		u8 busctl;
 
 		otg_set_host(musb->xceiv, &hcd->self);
 
@@ -2038,6 +2039,13 @@ bad_config:
 			hcd->self.otg_port = 1;
 		musb->xceiv->host = &hcd->self;
 		hcd->power_budget = 2 * (plat->power ? : 250);
+
+		/* program PHY to use external vBus if required */
+		if (plat->extvbus) {
+			busctl = musb_readb(musb->mregs, MUSB_ULPI_BUSCONTROL);
+			busctl |= MUSB_ULPI_USE_EXTVBUS;
+			musb_writeb(musb->mregs, MUSB_ULPI_BUSCONTROL, busctl);
+		}
 	}
 
 	/* For the host-only role, we can activate right away.
diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h
index 473a94ef905f..9a8621ac5ac2 100644
--- a/drivers/usb/musb/musb_regs.h
+++ b/drivers/usb/musb/musb_regs.h
@@ -72,6 +72,10 @@
 #define MUSB_DEVCTL_HR		0x02
 #define MUSB_DEVCTL_SESSION	0x01
 
+/* MUSB ULPI VBUSCONTROL */
+#define MUSB_ULPI_USE_EXTVBUS	0x01
+#define MUSB_ULPI_USE_EXTVBUSIND 0x02
+
 /* TESTMODE */
 #define MUSB_TEST_FORCE_HOST	0x80
 #define MUSB_TEST_FIFO_ACCESS	0x40
@@ -246,6 +250,7 @@
 
 /* REVISIT: vctrl/vstatus: optional vendor utmi+phy register at 0x68 */
 #define MUSB_HWVERS		0x6C	/* 8 bit */
+#define MUSB_ULPI_BUSCONTROL	0x70	/* 8 bit */
 
 #define MUSB_EPINFO		0x78	/* 8 bit */
 #define MUSB_RAMINFO		0x79	/* 8 bit */
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index d43755669261..4b7f8fa252f0 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -76,6 +76,9 @@ struct musb_hdrc_platform_data {
 	/* (HOST or OTG) msec/2 after VBUS on till power good */
 	u8		potpgt;
 
+	/* (HOST or OTG) program PHY for external Vbus */
+	unsigned	extvbus:1;
+
 	/* Power the device on or off */
 	int		(*set_power)(int state);
 
-- 
cgit v1.2.3


From 088f7fec8a0e683db72fd8826c5d3ab914e197b1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 8 Jan 2010 12:56:54 -0500
Subject: USB: implement usb_enable_autosuspend

This patch (as1326) adds usb_enable_autosuspend() and
usb_disable_autosuspend() routines for use by drivers.  If a driver
knows that its device can handle suspends and resumes correctly, it
can enable autosuspend all by itself.  This is equivalent to the user
writing "auto" to the device's power/level attribute.

The implementation differs slightly from what it used to be.  Now
autosuspend is disabled simply by doing usb_autoresume_device() (to
increment the usage counter) and enabled by doing
usb_autosuspend_device() (to decrement the usage counter).

The set_level() attribute method is updated to use the new routines,
and the USB Power-Management documentation is updated.

The patch adds a usb_enable_autosuspend() call to the hub driver's
probe routine, allowing the special-case code for hubs in quirks.c to
be removed.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/power-management.txt | 18 +++++++++++++++
 drivers/usb/core/driver.c              | 42 ++++++++++++++++++++++++++++++++++
 drivers/usb/core/hub.c                 |  3 +++
 drivers/usb/core/quirks.c              |  9 ++++----
 drivers/usb/core/sysfs.c               | 23 ++++++-------------
 include/linux/usb.h                    |  8 +++++++
 6 files changed, 82 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index 3bf6818c8cf5..e3fa189c257a 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -229,6 +229,11 @@ necessary operations by hand or add them to a udev script.  You can
 also change the idle-delay time; 2 seconds is not the best choice for
 every device.
 
+If a driver knows that its device has proper suspend/resume support,
+it can enable autosuspend all by itself.  For example, the video
+driver for a laptop's webcam might do this, since these devices are
+rarely used and so should normally be autosuspended.
+
 Sometimes it turns out that even when a device does work okay with
 autosuspend there are still problems.  For example, there are
 experimental patches adding autosuspend support to the usbhid driver,
@@ -384,6 +389,19 @@ autosuspend, there's no delay for an autoresume.
 	Other parts of the driver interface
 	-----------------------------------
 
+Drivers can enable autosuspend for their devices by calling
+
+	usb_enable_autosuspend(struct usb_device *udev);
+
+in their probe() routine, if they know that the device is capable of
+suspending and resuming correctly.  This is exactly equivalent to
+writing "auto" to the device's power/level attribute.  Likewise,
+drivers can disable autosuspend by calling
+
+	usb_disable_autosuspend(struct usb_device *udev);
+
+This is exactly the same as writing "on" to the power/level attribute.
+
 Sometimes a driver needs to make sure that remote wakeup is enabled
 during autosuspend.  For example, there's not much point
 autosuspending a keyboard if the user can't cause the keyboard to do a
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 2b39583040d0..057eeab06004 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1415,6 +1415,48 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
 
 #ifdef CONFIG_USB_SUSPEND
 
+/**
+ * usb_enable_autosuspend - allow a USB device to be autosuspended
+ * @udev: the USB device which may be autosuspended
+ *
+ * This routine allows @udev to be autosuspended.  An autosuspend won't
+ * take place until the autosuspend_delay has elapsed and all the other
+ * necessary conditions are satisfied.
+ *
+ * The caller must hold @udev's device lock.
+ */
+int usb_enable_autosuspend(struct usb_device *udev)
+{
+	if (udev->autosuspend_disabled) {
+		udev->autosuspend_disabled = 0;
+		usb_autosuspend_device(udev);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usb_enable_autosuspend);
+
+/**
+ * usb_disable_autosuspend - prevent a USB device from being autosuspended
+ * @udev: the USB device which may not be autosuspended
+ *
+ * This routine prevents @udev from being autosuspended and wakes it up
+ * if it is already autosuspended.
+ *
+ * The caller must hold @udev's device lock.
+ */
+int usb_disable_autosuspend(struct usb_device *udev)
+{
+	int rc = 0;
+
+	if (!udev->autosuspend_disabled) {
+		rc = usb_autoresume_device(udev);
+		if (rc == 0)
+			udev->autosuspend_disabled = 1;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(usb_disable_autosuspend);
+
 /* Internal routine to adjust a device's usage counter and change
  * its autosuspend state.
  */
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index bfa6123bbdb5..746f26f222ab 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1224,6 +1224,9 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	desc = intf->cur_altsetting;
 	hdev = interface_to_usbdev(intf);
 
+	/* Hubs have proper suspend/resume support */
+	usb_enable_autosuspend(hdev);
+
 	if (hdev->level == MAX_TOPO_LEVEL) {
 		dev_err(&intf->dev,
 			"Unsupported bus topology: hub nested too deep\n");
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 0b689224394b..4314f259524b 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -103,11 +103,10 @@ void usb_detect_quirks(struct usb_device *udev)
 		dev_dbg(&udev->dev, "USB quirks for this device: %x\n",
 				udev->quirks);
 
-	/* By default, disable autosuspend for all non-hubs */
-#ifdef	CONFIG_USB_SUSPEND
-	if (udev->descriptor.bDeviceClass != USB_CLASS_HUB)
-		udev->autosuspend_disabled = 1;
-#endif
+	/* By default, disable autosuspend for all devices.  The hub driver
+	 * will enable it for hubs.
+	 */
+	usb_disable_autosuspend(udev);
 
 	/* For the present, all devices default to USB-PERSIST enabled */
 #if 0		/* was: #ifdef CONFIG_PM */
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 5a1a0e2b6474..313e241f5ccc 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -389,34 +389,25 @@ set_level(struct device *dev, struct device_attribute *attr,
 	struct usb_device *udev = to_usb_device(dev);
 	int len = count;
 	char *cp;
-	int rc = 0;
-	int old_autosuspend_disabled;
+	int rc;
 
 	cp = memchr(buf, '\n', count);
 	if (cp)
 		len = cp - buf;
 
 	usb_lock_device(udev);
-	old_autosuspend_disabled = udev->autosuspend_disabled;
 
-	/* Setting the flags without calling usb_pm_lock is a subject to
-	 * races, but who cares...
-	 */
 	if (len == sizeof on_string - 1 &&
-			strncmp(buf, on_string, len) == 0) {
-		udev->autosuspend_disabled = 1;
-		rc = usb_external_resume_device(udev, PMSG_USER_RESUME);
+			strncmp(buf, on_string, len) == 0)
+		rc = usb_disable_autosuspend(udev);
 
-	} else if (len == sizeof auto_string - 1 &&
-			strncmp(buf, auto_string, len) == 0) {
-		udev->autosuspend_disabled = 0;
-		rc = usb_external_resume_device(udev, PMSG_USER_RESUME);
+	else if (len == sizeof auto_string - 1 &&
+			strncmp(buf, auto_string, len) == 0)
+		rc = usb_enable_autosuspend(udev);
 
-	} else
+	else
 		rc = -EINVAL;
 
-	if (rc)
-		udev->autosuspend_disabled = old_autosuspend_disabled;
 	usb_unlock_device(udev);
 	return (rc < 0 ? rc : count);
 }
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 332eaea61021..e6419ac89ea2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -542,6 +542,9 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
 /* USB autosuspend and autoresume */
 #ifdef CONFIG_USB_SUSPEND
+extern int usb_enable_autosuspend(struct usb_device *udev);
+extern int usb_disable_autosuspend(struct usb_device *udev);
+
 extern int usb_autopm_get_interface(struct usb_interface *intf);
 extern void usb_autopm_put_interface(struct usb_interface *intf);
 extern int usb_autopm_get_interface_async(struct usb_interface *intf);
@@ -565,6 +568,11 @@ static inline void usb_mark_last_busy(struct usb_device *udev)
 
 #else
 
+static inline int usb_enable_autosuspend(struct usb_device *udev)
+{ return 0; }
+static inline int usb_disable_autosuspend(struct usb_device *udev)
+{ return 0; }
+
 static inline int usb_autopm_get_interface(struct usb_interface *intf)
 { return 0; }
 static inline int usb_autopm_get_interface_async(struct usb_interface *intf)
-- 
cgit v1.2.3


From 9bbdf1e0afe771ca7650f9f476769310bee9d8f3 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 8 Jan 2010 12:57:28 -0500
Subject: USB: convert to the runtime PM framework

This patch (as1329) converts the USB stack over to the PM core's
runtime PM framework.  This involves numerous changes throughout
usbcore, especially to hub.c and driver.c.  Perhaps the most notable
change is that CONFIG_USB_SUSPEND now depends on CONFIG_PM_RUNTIME
instead of CONFIG_PM.

Several fields in the usb_device and usb_interface structures are no
longer needed.  Some code which used to depend on CONFIG_USB_PM now
depends on CONFIG_USB_SUSPEND (requiring some rearrangement of header
files).

The only visible change in behavior should be that following a system
sleep (resume from RAM or resume from hibernation), autosuspended USB
devices will be resumed just like everything else.  They won't remain
suspended.  But if they aren't in use then they will naturally
autosuspend again in a few seconds.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/power-management.txt | 217 +++------
 drivers/usb/core/Kconfig               |   4 +-
 drivers/usb/core/driver.c              | 845 ++++++++++++++-------------------
 drivers/usb/core/hcd.c                 |  13 +-
 drivers/usb/core/hcd.h                 |  10 +-
 drivers/usb/core/hub.c                 |  65 +--
 drivers/usb/core/message.c             |   1 -
 drivers/usb/core/usb.c                 |  35 +-
 drivers/usb/core/usb.h                 |  49 +-
 drivers/usb/misc/usbtest.c             |   4 -
 include/linux/usb.h                    |  31 +-
 11 files changed, 490 insertions(+), 784 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index e3fa189c257a..2790ad48cfc2 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -2,7 +2,7 @@
 
 		 Alan Stern <stern@rowland.harvard.edu>
 
-			    November 10, 2009
+			    December 11, 2009
 
 
@@ -29,9 +29,9 @@ covered to some extent (see Documentation/power/*.txt for more
 information about system PM).
 
 Note: Dynamic PM support for USB is present only if the kernel was
-built with CONFIG_USB_SUSPEND enabled.  System PM support is present
-only if the kernel was built with CONFIG_SUSPEND or CONFIG_HIBERNATION
-enabled.
+built with CONFIG_USB_SUSPEND enabled (which depends on
+CONFIG_PM_RUNTIME).  System PM support is present only if the kernel
+was built with CONFIG_SUSPEND or CONFIG_HIBERNATION enabled.
 
 
 	What is Remote Wakeup?
@@ -326,64 +326,63 @@ driver does so by calling these six functions:
 	void usb_autopm_get_interface_no_resume(struct usb_interface *intf);
 	void usb_autopm_put_interface_no_suspend(struct usb_interface *intf);
 
-The functions work by maintaining a counter in the usb_interface
-structure.  When intf->pm_usage_count is > 0 then the interface is
-deemed to be busy, and the kernel will not autosuspend the interface's
-device.  When intf->pm_usage_count is <= 0 then the interface is
-considered to be idle, and the kernel may autosuspend the device.
+The functions work by maintaining a usage counter in the
+usb_interface's embedded device structure.  When the counter is > 0
+then the interface is deemed to be busy, and the kernel will not
+autosuspend the interface's device.  When the usage counter is = 0
+then the interface is considered to be idle, and the kernel may
+autosuspend the device.
 
-(There is a similar pm_usage_count field in struct usb_device,
+(There is a similar usage counter field in struct usb_device,
 associated with the device itself rather than any of its interfaces.
-This field is used only by the USB core.)
-
-Drivers must not modify intf->pm_usage_count directly; its value
-should be changed only be using the functions listed above.  Drivers
-are responsible for insuring that the overall change to pm_usage_count
-during their lifetime balances out to 0 (it may be necessary for the
-disconnect method to call usb_autopm_put_interface() one or more times
-to fulfill this requirement).  The first two routines use the PM mutex
-in struct usb_device for mutual exclusion; drivers using the async
-routines are responsible for their own synchronization and mutual
-exclusion.
-
-	usb_autopm_get_interface() increments pm_usage_count and
-	attempts an autoresume if the new value is > 0 and the
-	device is suspended.
-
-	usb_autopm_put_interface() decrements pm_usage_count and
-	attempts an autosuspend if the new value is <= 0 and the
-	device isn't suspended.
+This counter is used only by the USB core.)
+
+Drivers need not be concerned about balancing changes to the usage
+counter; the USB core will undo any remaining "get"s when a driver
+is unbound from its interface.  As a corollary, drivers must not call
+any of the usb_autopm_* functions after their diconnect() routine has
+returned.
+
+Drivers using the async routines are responsible for their own
+synchronization and mutual exclusion.
+
+	usb_autopm_get_interface() increments the usage counter and
+	does an autoresume if the device is suspended.  If the
+	autoresume fails, the counter is decremented back.
+
+	usb_autopm_put_interface() decrements the usage counter and
+	attempts an autosuspend if the new value is = 0.
 
 	usb_autopm_get_interface_async() and
 	usb_autopm_put_interface_async() do almost the same things as
-	their non-async counterparts.  The differences are: they do
-	not acquire the PM mutex, and they use a workqueue to do their
+	their non-async counterparts.  The big difference is that they
+	use a workqueue to do the resume or suspend part of their
 	jobs.  As a result they can be called in an atomic context,
 	such as an URB's completion handler, but when they return the
-	device will not generally not yet be in the desired state.
+	device will generally not yet be in the desired state.
 
 	usb_autopm_get_interface_no_resume() and
 	usb_autopm_put_interface_no_suspend() merely increment or
-	decrement the pm_usage_count value; they do not attempt to
-	carry out an autoresume or an autosuspend.  Hence they can be
-	called in an atomic context.
+	decrement the usage counter; they do not attempt to carry out
+	an autoresume or an autosuspend.  Hence they can be called in
+	an atomic context.
 
-The conventional usage pattern is that a driver calls
+The simplest usage pattern is that a driver calls
 usb_autopm_get_interface() in its open routine and
-usb_autopm_put_interface() in its close or release routine.  But
-other patterns are possible.
+usb_autopm_put_interface() in its close or release routine.  But other
+patterns are possible.
 
 The autosuspend attempts mentioned above will often fail for one
 reason or another.  For example, the power/level attribute might be
 set to "on", or another interface in the same device might not be
 idle.  This is perfectly normal.  If the reason for failure was that
-the device hasn't been idle for long enough, a delayed workqueue
-routine is automatically set up to carry out the operation when the
-autosuspend idle-delay has expired.
+the device hasn't been idle for long enough, a timer is scheduled to
+carry out the operation automatically when the autosuspend idle-delay
+has expired.
 
 Autoresume attempts also can fail, although failure would mean that
 the device is no longer present or operating properly.  Unlike
-autosuspend, there's no delay for an autoresume.
+autosuspend, there's no idle-delay for an autoresume.
 
 
 	Other parts of the driver interface
@@ -413,26 +412,27 @@ though, setting this flag won't cause the kernel to autoresume it.
 Normally a driver would set this flag in its probe method, at which
 time the device is guaranteed not to be autosuspended.)
 
-The synchronous usb_autopm_* routines have to run in a sleepable
-process context; they must not be called from an interrupt handler or
-while holding a spinlock.  In fact, the entire autosuspend mechanism
-is not well geared toward interrupt-driven operation.  However there
-is one thing a driver can do in an interrupt handler:
+If a driver does its I/O asynchronously in interrupt context, it
+should call usb_autopm_get_interface_async() before starting output and
+usb_autopm_put_interface_async() when the output queue drains.  When
+it receives an input event, it should call
 
 	usb_mark_last_busy(struct usb_device *udev);
 
-This sets udev->last_busy to the current time.  udev->last_busy is the
-field used for idle-delay calculations; updating it will cause any
-pending autosuspend to be moved back.  The usb_autopm_* routines will
-also set the last_busy field to the current time.
-
-Calling urb_mark_last_busy() from within an URB completion handler is
-subject to races: The kernel may have just finished deciding the
-device has been idle for long enough but not yet gotten around to
-calling the driver's suspend method.  The driver would have to be
-responsible for synchronizing its suspend method with its URB
-completion handler and causing the autosuspend to fail with -EBUSY if
-an URB had completed too recently.
+in the event handler.  This sets udev->last_busy to the current time.
+udev->last_busy is the field used for idle-delay calculations;
+updating it will cause any pending autosuspend to be moved back.  Most
+of the usb_autopm_* routines will also set the last_busy field to the
+current time.
+
+Asynchronous operation is always subject to races.  For example, a
+driver may call one of the usb_autopm_*_interface_async() routines at
+a time when the core has just finished deciding the device has been
+idle for long enough but not yet gotten around to calling the driver's
+suspend method.  The suspend method must be responsible for
+synchronizing with the output request routine and the URB completion
+handler; it should cause autosuspends to fail with -EBUSY if the
+driver needs to use the device.
 
 External suspend calls should never be allowed to fail in this way,
 only autosuspend calls.  The driver can tell them apart by checking
@@ -440,75 +440,23 @@ the PM_EVENT_AUTO bit in the message.event argument to the suspend
 method; this bit will be set for internal PM events (autosuspend) and
 clear for external PM events.
 
-Many of the ingredients in the autosuspend framework are oriented
-towards interfaces: The usb_interface structure contains the
-pm_usage_cnt field, and the usb_autopm_* routines take an interface
-pointer as their argument.  But somewhat confusingly, a few of the
-pieces (i.e., usb_mark_last_busy()) use the usb_device structure
-instead.  Drivers need to keep this straight; they can call
-interface_to_usbdev() to find the device structure for a given
-interface.
 
+	Mutual exclusion
+	----------------
 
-	Locking requirements
-	--------------------
-
-All three suspend/resume methods are always called while holding the
-usb_device's PM mutex.  For external events -- but not necessarily for
-autosuspend or autoresume -- the device semaphore (udev->dev.sem) will
-also be held.  This implies that external suspend/resume events are
-mutually exclusive with calls to probe, disconnect, pre_reset, and
-post_reset; the USB core guarantees that this is true of internal
-suspend/resume events as well.
+For external events -- but not necessarily for autosuspend or
+autoresume -- the device semaphore (udev->dev.sem) will be held when a
+suspend or resume method is called.  This implies that external
+suspend/resume events are mutually exclusive with calls to probe,
+disconnect, pre_reset, and post_reset; the USB core guarantees that
+this is true of autosuspend/autoresume events as well.
 
 If a driver wants to block all suspend/resume calls during some
-critical section, it can simply acquire udev->pm_mutex. Note that
-calls to resume may be triggered indirectly. Block IO due to memory
-allocations can make the vm subsystem resume a device. Thus while
-holding this lock you must not allocate memory with GFP_KERNEL or
-GFP_NOFS.
-
-Alternatively, if the critical section might call some of the
-usb_autopm_* routines, the driver can avoid deadlock by doing:
-
-	down(&udev->dev.sem);
-	rc = usb_autopm_get_interface(intf);
-
-and at the end of the critical section:
-
-	if (!rc)
-		usb_autopm_put_interface(intf);
-	up(&udev->dev.sem);
-
-Holding the device semaphore will block all external PM calls, and the
-usb_autopm_get_interface() will prevent any internal PM calls, even if
-it fails.  (Exercise: Why?)
-
-The rules for locking order are:
-
-	Never acquire any device semaphore while holding any PM mutex.
-
-	Never acquire udev->pm_mutex while holding the PM mutex for
-	a device that isn't a descendant of udev.
-
-In other words, PM mutexes should only be acquired going up the device
-tree, and they should be acquired only after locking all the device
-semaphores you need to hold.  These rules don't matter to drivers very
-much; they usually affect just the USB core.
-
-Still, drivers do need to be careful.  For example, many drivers use a
-private mutex to synchronize their normal I/O activities with their
-disconnect method.  Now if the driver supports autosuspend then it
-must call usb_autopm_put_interface() from somewhere -- maybe from its
-close method.  It should make the call while holding the private mutex,
-since a driver shouldn't call any of the usb_autopm_* functions for an
-interface from which it has been unbound.
-
-But the usb_autpm_* routines always acquire the device's PM mutex, and
-consequently the locking order has to be: private mutex first, PM
-mutex second.  Since the suspend method is always called with the PM
-mutex held, it mustn't try to acquire the private mutex.  It has to
-synchronize with the driver's I/O activities in some other way.
+critical section, the best way is to lock the device and call
+usb_autopm_get_interface() (and do the reverse at the end of the
+critical section).  Holding the device semaphore will block all
+external PM calls, and the usb_autopm_get_interface() will prevent any
+internal PM calls, even if it fails.  (Exercise: Why?)
 
 
 	Interaction between dynamic PM and system PM
@@ -517,22 +465,11 @@ synchronize with the driver's I/O activities in some other way.
 Dynamic power management and system power management can interact in
 a couple of ways.
 
-Firstly, a device may already be manually suspended or autosuspended
-when a system suspend occurs.  Since system suspends are supposed to
-be as transparent as possible, the device should remain suspended
-following the system resume.  The 2.6.23 kernel obeys this principle
-for manually suspended devices but not for autosuspended devices; they
-do get resumed when the system wakes up.  (Presumably they will be
-autosuspended again after their idle-delay time expires.)  In later
-kernels this behavior will be fixed.
-
-(There is an exception.  If a device would undergo a reset-resume
-instead of a normal resume, and the device is enabled for remote
-wakeup, then the reset-resume takes place even if the device was
-already suspended when the system suspend began.  The justification is
-that a reset-resume is a kind of remote-wakeup event.  Or to put it
-another way, a device which needs a reset won't be able to generate
-normal remote-wakeup signals, so it ought to be resumed immediately.)
+Firstly, a device may already be autosuspended when a system suspend
+occurs.  Since system suspends are supposed to be as transparent as
+possible, the device should remain suspended following the system
+resume.  But this theory may not work out well in practice; over time
+the kernel's behavior in this regard has changed.
 
 Secondly, a dynamic power-management event may occur as a system
 suspend is underway.  The window for this is short, since system
diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index ad925946f869..97a819c23ef3 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -91,8 +91,8 @@ config USB_DYNAMIC_MINORS
 	  If you are unsure about this, say N here.
 
 config USB_SUSPEND
-	bool "USB selective suspend/resume and wakeup"
-	depends on USB && PM
+	bool "USB runtime power management (suspend/resume and wakeup)"
+	depends on USB && PM_RUNTIME
 	help
 	  If you say Y here, you can use driver calls or the sysfs
 	  "power/level" file to suspend or resume individual USB
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 638d54693a1c..6850ec6576f8 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -25,7 +25,7 @@
 #include <linux/device.h>
 #include <linux/usb.h>
 #include <linux/usb/quirks.h>
-#include <linux/workqueue.h>
+#include <linux/pm_runtime.h>
 #include "hcd.h"
 #include "usb.h"
 
@@ -221,7 +221,7 @@ static int usb_probe_device(struct device *dev)
 {
 	struct usb_device_driver *udriver = to_usb_device_driver(dev->driver);
 	struct usb_device *udev = to_usb_device(dev);
-	int error = -ENODEV;
+	int error = 0;
 
 	dev_dbg(dev, "%s\n", __func__);
 
@@ -230,18 +230,23 @@ static int usb_probe_device(struct device *dev)
 	/* The device should always appear to be in use
 	 * unless the driver suports autosuspend.
 	 */
-	udev->pm_usage_cnt = !(udriver->supports_autosuspend);
+	if (!udriver->supports_autosuspend)
+		error = usb_autoresume_device(udev);
 
-	error = udriver->probe(udev);
+	if (!error)
+		error = udriver->probe(udev);
 	return error;
 }
 
 /* called from driver core with dev locked */
 static int usb_unbind_device(struct device *dev)
 {
+	struct usb_device *udev = to_usb_device(dev);
 	struct usb_device_driver *udriver = to_usb_device_driver(dev->driver);
 
-	udriver->disconnect(to_usb_device(dev));
+	udriver->disconnect(udev);
+	if (!udriver->supports_autosuspend)
+		usb_autosuspend_device(udev);
 	return 0;
 }
 
@@ -293,17 +298,16 @@ static int usb_probe_interface(struct device *dev)
 	if (error)
 		return error;
 
-	/* Interface "power state" doesn't correspond to any hardware
-	 * state whatsoever.  We use it to record when it's bound to
-	 * a driver that may start I/0:  it's not frozen/quiesced.
-	 */
-	mark_active(intf);
 	intf->condition = USB_INTERFACE_BINDING;
 
-	/* The interface should always appear to be in use
-	 * unless the driver suports autosuspend.
+	/* Bound interfaces are initially active.  They are
+	 * runtime-PM-enabled only if the driver has autosuspend support.
+	 * They are sensitive to their children's power states.
 	 */
-	atomic_set(&intf->pm_usage_cnt, !driver->supports_autosuspend);
+	pm_runtime_set_active(dev);
+	pm_suspend_ignore_children(dev, false);
+	if (driver->supports_autosuspend)
+		pm_runtime_enable(dev);
 
 	/* Carry out a deferred switch to altsetting 0 */
 	if (intf->needs_altsetting0) {
@@ -323,10 +327,14 @@ static int usb_probe_interface(struct device *dev)
 	return error;
 
  err:
-	mark_quiesced(intf);
 	intf->needs_remote_wakeup = 0;
 	intf->condition = USB_INTERFACE_UNBOUND;
 	usb_cancel_queued_reset(intf);
+
+	/* Unbound interfaces are always runtime-PM-disabled and -suspended */
+	pm_runtime_disable(dev);
+	pm_runtime_set_suspended(dev);
+
 	usb_autosuspend_device(udev);
 	return error;
 }
@@ -376,9 +384,17 @@ static int usb_unbind_interface(struct device *dev)
 	usb_set_intfdata(intf, NULL);
 
 	intf->condition = USB_INTERFACE_UNBOUND;
-	mark_quiesced(intf);
 	intf->needs_remote_wakeup = 0;
 
+	/* Unbound interfaces are always runtime-PM-disabled and -suspended */
+	pm_runtime_disable(dev);
+	pm_runtime_set_suspended(dev);
+
+	/* Undo any residual pm_autopm_get_interface_* calls */
+	for (r = atomic_read(&intf->pm_usage_cnt); r > 0; --r)
+		usb_autopm_put_interface_no_suspend(intf);
+	atomic_set(&intf->pm_usage_cnt, 0);
+
 	if (!error)
 		usb_autosuspend_device(udev);
 
@@ -409,7 +425,6 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 				struct usb_interface *iface, void *priv)
 {
 	struct device *dev = &iface->dev;
-	struct usb_device *udev = interface_to_usbdev(iface);
 	int retval = 0;
 
 	if (dev->driver)
@@ -419,11 +434,16 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	usb_set_intfdata(iface, priv);
 	iface->needs_binding = 0;
 
-	usb_pm_lock(udev);
 	iface->condition = USB_INTERFACE_BOUND;
-	mark_active(iface);
-	atomic_set(&iface->pm_usage_cnt, !driver->supports_autosuspend);
-	usb_pm_unlock(udev);
+
+	/* Bound interfaces are initially active.  They are
+	 * runtime-PM-enabled only if the driver has autosuspend support.
+	 * They are sensitive to their children's power states.
+	 */
+	pm_runtime_set_active(dev);
+	pm_suspend_ignore_children(dev, false);
+	if (driver->supports_autosuspend)
+		pm_runtime_enable(dev);
 
 	/* if interface was already added, bind now; else let
 	 * the future device_add() bind it, bypassing probe()
@@ -982,7 +1002,6 @@ static void do_unbind_rebind(struct usb_device *udev, int action)
 	}
 }
 
-/* Caller has locked udev's pm_mutex */
 static int usb_suspend_device(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_device_driver	*udriver;
@@ -1006,7 +1025,6 @@ static int usb_suspend_device(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-/* Caller has locked udev's pm_mutex */
 static int usb_resume_device(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_device_driver	*udriver;
@@ -1040,27 +1058,20 @@ static int usb_resume_device(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-/* Caller has locked intf's usb_device's pm mutex */
 static int usb_suspend_interface(struct usb_device *udev,
 		struct usb_interface *intf, pm_message_t msg)
 {
 	struct usb_driver	*driver;
 	int			status = 0;
 
-	/* with no hardware, USB interfaces only use FREEZE and ON states */
-	if (udev->state == USB_STATE_NOTATTACHED || !is_active(intf))
-		goto done;
-
-	/* This can happen; see usb_driver_release_interface() */
-	if (intf->condition == USB_INTERFACE_UNBOUND)
+	if (udev->state == USB_STATE_NOTATTACHED ||
+			intf->condition == USB_INTERFACE_UNBOUND)
 		goto done;
 	driver = to_usb_driver(intf->dev.driver);
 
 	if (driver->suspend) {
 		status = driver->suspend(intf, msg);
-		if (status == 0)
-			mark_quiesced(intf);
-		else if (!(msg.event & PM_EVENT_AUTO))
+		if (status && !(msg.event & PM_EVENT_AUTO))
 			dev_err(&intf->dev, "%s error %d\n",
 					"suspend", status);
 	} else {
@@ -1068,7 +1079,6 @@ static int usb_suspend_interface(struct usb_device *udev,
 		intf->needs_binding = 1;
 		dev_warn(&intf->dev, "no %s for driver %s?\n",
 				"suspend", driver->name);
-		mark_quiesced(intf);
 	}
 
  done:
@@ -1076,14 +1086,13 @@ static int usb_suspend_interface(struct usb_device *udev,
 	return status;
 }
 
-/* Caller has locked intf's usb_device's pm_mutex */
 static int usb_resume_interface(struct usb_device *udev,
 		struct usb_interface *intf, pm_message_t msg, int reset_resume)
 {
 	struct usb_driver	*driver;
 	int			status = 0;
 
-	if (udev->state == USB_STATE_NOTATTACHED || is_active(intf))
+	if (udev->state == USB_STATE_NOTATTACHED)
 		goto done;
 
 	/* Don't let autoresume interfere with unbinding */
@@ -1134,90 +1143,11 @@ static int usb_resume_interface(struct usb_device *udev,
 
 done:
 	dev_vdbg(&intf->dev, "%s: status %d\n", __func__, status);
-	if (status == 0 && intf->condition == USB_INTERFACE_BOUND)
-		mark_active(intf);
 
 	/* Later we will unbind the driver and/or reprobe, if necessary */
 	return status;
 }
 
-#ifdef	CONFIG_USB_SUSPEND
-
-/* Internal routine to check whether we may autosuspend a device. */
-static int autosuspend_check(struct usb_device *udev, int reschedule)
-{
-	int			i;
-	struct usb_interface	*intf;
-	unsigned long		suspend_time, j;
-
-	/* For autosuspend, fail fast if anything is in use or autosuspend
-	 * is disabled.  Also fail if any interfaces require remote wakeup
-	 * but it isn't available.
-	 */
-	if (udev->pm_usage_cnt > 0)
-		return -EBUSY;
-	if (udev->autosuspend_delay < 0 || udev->autosuspend_disabled)
-		return -EPERM;
-
-	suspend_time = udev->last_busy + udev->autosuspend_delay;
-	if (udev->actconfig) {
-		for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
-			intf = udev->actconfig->interface[i];
-			if (!is_active(intf))
-				continue;
-			if (atomic_read(&intf->pm_usage_cnt) > 0)
-				return -EBUSY;
-			if (intf->needs_remote_wakeup &&
-					!udev->do_remote_wakeup) {
-				dev_dbg(&udev->dev, "remote wakeup needed "
-						"for autosuspend\n");
-				return -EOPNOTSUPP;
-			}
-
-			/* Don't allow autosuspend if the device will need
-			 * a reset-resume and any of its interface drivers
-			 * doesn't include support.
-			 */
-			if (udev->quirks & USB_QUIRK_RESET_RESUME) {
-				struct usb_driver *driver;
-
-				driver = to_usb_driver(intf->dev.driver);
-				if (!driver->reset_resume ||
-				    intf->needs_remote_wakeup)
-					return -EOPNOTSUPP;
-			}
-		}
-	}
-
-	/* If everything is okay but the device hasn't been idle for long
-	 * enough, queue a delayed autosuspend request.  If the device
-	 * _has_ been idle for long enough and the reschedule flag is set,
-	 * likewise queue a delayed (1 second) autosuspend request.
-	 */
-	j = jiffies;
-	if (time_before(j, suspend_time))
-		reschedule = 1;
-	else
-		suspend_time = j + HZ;
-	if (reschedule) {
-		if (!timer_pending(&udev->autosuspend.timer)) {
-			queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
-				round_jiffies_up_relative(suspend_time - j));
-		}
-		return -EAGAIN;
-	}
-	return 0;
-}
-
-#else
-
-static inline int autosuspend_check(struct usb_device *udev, int reschedule)
-{
-	return 0;
-}
-
-#endif	/* CONFIG_USB_SUSPEND */
-
 /**
  * usb_suspend_both - suspend a USB device and its interfaces
  * @udev: the usb_device to suspend
@@ -1229,27 +1159,12 @@ static inline int autosuspend_check(struct usb_device *udev, int reschedule)
  * all the interfaces which were suspended are resumed so that they remain
  * in the same state as the device.
  *
- * If an autosuspend is in progress the routine checks first to make sure
- * that neither the device itself or any of its active interfaces is in use
- * (pm_usage_cnt is greater than 0).  If they are, the autosuspend fails.
- *
- * If the suspend succeeds, the routine recursively queues an autosuspend
- * request for @udev's parent device, thereby propagating the change up
- * the device tree.  If all of the parent's children are now suspended,
- * the parent will autosuspend in turn.
- *
- * The suspend method calls are subject to mutual exclusion under control
- * of @udev's pm_mutex.  Many of these calls are also under the protection
- * of @udev's device lock (including all requests originating outside the
- * USB subsystem), but autosuspend requests generated by a child device or
- * interface driver may not be.  Usbcore will insure that the method calls
- * do not arrive during bind, unbind, or reset operations.  However, drivers
- * must be prepared to handle suspend calls arriving at unpredictable times.
- * The only way to block such calls is to do an autoresume (preventing
- * autosuspends) while holding @udev's device lock (preventing outside
- * suspends).
- *
- * The caller must hold @udev->pm_mutex.
+ * Autosuspend requests originating from a child device or an interface
+ * driver may be made without the protection of @udev's device lock, but
+ * all other suspend calls will hold the lock.  Usbcore will insure that
+ * method calls do not arrive during bind, unbind, or reset operations.
+ * However drivers must be prepared to handle suspend calls arriving at
+ * unpredictable times.
  *
  * This routine can run only in process context.
  */
@@ -1258,20 +1173,11 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 	int			status = 0;
 	int			i = 0;
 	struct usb_interface	*intf;
-	struct usb_device	*parent = udev->parent;
 
 	if (udev->state == USB_STATE_NOTATTACHED ||
 			udev->state == USB_STATE_SUSPENDED)
 		goto done;
 
-	udev->do_remote_wakeup = device_may_wakeup(&udev->dev);
-
-	if (msg.event & PM_EVENT_AUTO) {
-		status = autosuspend_check(udev, 0);
-		if (status < 0)
-			goto done;
-	}
-
 	/* Suspend all the interfaces and then udev itself */
 	if (udev->actconfig) {
 		for (; i < udev->actconfig->desc.bNumInterfaces; i++) {
@@ -1286,35 +1192,21 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 
 	/* If the suspend failed, resume interfaces that did get suspended */
 	if (status != 0) {
-		pm_message_t msg2;
-
-		msg2.event = msg.event ^ (PM_EVENT_SUSPEND | PM_EVENT_RESUME);
+		msg.event ^= (PM_EVENT_SUSPEND | PM_EVENT_RESUME);
 		while (--i >= 0) {
 			intf = udev->actconfig->interface[i];
-			usb_resume_interface(udev, intf, msg2, 0);
+			usb_resume_interface(udev, intf, msg, 0);
 		}
 
-		/* Try another autosuspend when the interfaces aren't busy */
-		if (msg.event & PM_EVENT_AUTO)
-			autosuspend_check(udev, status == -EBUSY);
-
-	/* If the suspend succeeded then prevent any more URB submissions,
-	 * flush any outstanding URBs, and propagate the suspend up the tree.
+	/* If the suspend succeeded then prevent any more URB submissions
+	 * and flush any outstanding URBs.
 	 */
 	} else {
-		cancel_delayed_work(&udev->autosuspend);
 		udev->can_submit = 0;
 		for (i = 0; i < 16; ++i) {
 			usb_hcd_flush_endpoint(udev, udev->ep_out[i]);
 			usb_hcd_flush_endpoint(udev, udev->ep_in[i]);
 		}
-
-		/* If this is just a FREEZE or a PRETHAW, udev might
-		 * not really be suspended.  Only true suspends get
-		 * propagated up the device tree.
-		 */
-		if (parent && udev->state == USB_STATE_SUSPENDED)
-			usb_autosuspend_device(parent);
 	}
 
  done:
@@ -1331,23 +1223,12 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
  * the resume method for @udev and then calls the resume methods for all
  * the interface drivers in @udev.
  *
- * Before starting the resume, the routine calls itself recursively for
- * the parent device of @udev, thereby propagating the change up the device
- * tree and assuring that @udev will be able to resume.  If the parent is
- * unable to resume successfully, the routine fails.
- *
- * The resume method calls are subject to mutual exclusion under control
- * of @udev's pm_mutex.  Many of these calls are also under the protection
- * of @udev's device lock (including all requests originating outside the
- * USB subsystem), but autoresume requests generated by a child device or
- * interface driver may not be.  Usbcore will insure that the method calls
- * do not arrive during bind, unbind, or reset operations.  However, drivers
- * must be prepared to handle resume calls arriving at unpredictable times.
- * The only way to block such calls is to do an autoresume (preventing
- * other autoresumes) while holding @udev's device lock (preventing outside
- * resumes).
- *
- * The caller must hold @udev->pm_mutex.
+ * Autoresume requests originating from a child device or an interface
+ * driver may be made without the protection of @udev's device lock, but
+ * all other resume calls will hold the lock.  Usbcore will insure that
+ * method calls do not arrive during bind, unbind, or reset operations.
+ * However drivers must be prepared to handle resume calls arriving at
+ * unpredictable times.
  *
  * This routine can run only in process context.
  */
@@ -1356,48 +1237,18 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
 	int			status = 0;
 	int			i;
 	struct usb_interface	*intf;
-	struct usb_device	*parent = udev->parent;
 
-	cancel_delayed_work(&udev->autosuspend);
 	if (udev->state == USB_STATE_NOTATTACHED) {
 		status = -ENODEV;
 		goto done;
 	}
 	udev->can_submit = 1;
 
-	/* Propagate the resume up the tree, if necessary */
-	if (udev->state == USB_STATE_SUSPENDED) {
-		if (parent) {
-			status = usb_autoresume_device(parent);
-			if (status == 0) {
-				status = usb_resume_device(udev, msg);
-				if (status || udev->state ==
-						USB_STATE_NOTATTACHED) {
-					usb_autosuspend_device(parent);
-
-					/* It's possible usb_resume_device()
-					 * failed after the port was
-					 * unsuspended, causing udev to be
-					 * logically disconnected.  We don't
-					 * want usb_disconnect() to autosuspend
-					 * the parent again, so tell it that
-					 * udev disconnected while still
-					 * suspended. */
-					if (udev->state ==
-							USB_STATE_NOTATTACHED)
-						udev->discon_suspended = 1;
-				}
-			}
-		} else {
-
-			/* We can't progagate beyond the USB subsystem,
-			 * so if a root hub's controller is suspended
-			 * then we're stuck. */
-			status = usb_resume_device(udev, msg);
-		}
-	} else if (udev->reset_resume)
+	/* Resume the device */
+	if (udev->state == USB_STATE_SUSPENDED || udev->reset_resume)
 		status = usb_resume_device(udev, msg);
 
+	/* Resume the interfaces */
 	if (status == 0 && udev->actconfig) {
 		for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
 			intf = udev->actconfig->interface[i];
@@ -1413,104 +1264,46 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-/**
- * usb_external_suspend_device - external suspend of a USB device and its interfaces
- * @udev: the usb_device to suspend
- * @msg: Power Management message describing this state transition
- *
- * This routine handles external suspend requests: ones not generated
- * internally by a USB driver (autosuspend) but rather coming from the user
- * (via sysfs) or the PM core (system sleep).  The suspend will be carried
- * out regardless of @udev's usage counter or those of its interfaces,
- * and regardless of whether or not remote wakeup is enabled.  Of course,
- * interface drivers still have the option of failing the suspend (if
- * there are unsuspended children, for example).
- *
- * The caller must hold @udev's device lock.
- */
-int usb_external_suspend_device(struct usb_device *udev, pm_message_t msg)
-{
-	int	status;
-
-	do_unbind_rebind(udev, DO_UNBIND);
-	usb_pm_lock(udev);
-	status = usb_suspend_both(udev, msg);
-	usb_pm_unlock(udev);
-	return status;
-}
-
-/**
- * usb_external_resume_device - external resume of a USB device and its interfaces
- * @udev: the usb_device to resume
- * @msg: Power Management message describing this state transition
- *
- * This routine handles external resume requests: ones not generated
- * internally by a USB driver (autoresume) but rather coming from the user
- * (via sysfs), the PM core (system resume), or the device itself (remote
- * wakeup).  @udev's usage counter is unaffected.
- *
- * The caller must hold @udev's device lock.
- */
-int usb_external_resume_device(struct usb_device *udev, pm_message_t msg)
-{
-	int	status;
-
-	usb_pm_lock(udev);
-	status = usb_resume_both(udev, msg);
-	udev->last_busy = jiffies;
-	usb_pm_unlock(udev);
-	if (status == 0)
-		do_unbind_rebind(udev, DO_REBIND);
-
-	/* Now that the device is awake, we can start trying to autosuspend
-	 * it again. */
-	if (status == 0)
-		usb_try_autosuspend_device(udev);
-	return status;
-}
-
+/* The device lock is held by the PM core */
 int usb_suspend(struct device *dev, pm_message_t msg)
 {
-	struct usb_device	*udev;
-
-	udev = to_usb_device(dev);
+	struct usb_device	*udev = to_usb_device(dev);
 
-	/* If udev is already suspended, we can skip this suspend and
-	 * we should also skip the upcoming system resume.  High-speed
-	 * root hubs are an exception; they need to resume whenever the
-	 * system wakes up in order for USB-PERSIST port handover to work
-	 * properly.
-	 */
-	if (udev->state == USB_STATE_SUSPENDED) {
-		if (udev->parent || udev->speed != USB_SPEED_HIGH)
-			udev->skip_sys_resume = 1;
-		return 0;
-	}
-
-	udev->skip_sys_resume = 0;
-	return usb_external_suspend_device(udev, msg);
+	do_unbind_rebind(udev, DO_UNBIND);
+	udev->do_remote_wakeup = device_may_wakeup(&udev->dev);
+	return usb_suspend_both(udev, msg);
 }
 
+/* The device lock is held by the PM core */
 int usb_resume(struct device *dev, pm_message_t msg)
 {
-	struct usb_device	*udev;
+	struct usb_device	*udev = to_usb_device(dev);
 	int			status;
 
-	udev = to_usb_device(dev);
+	/* For PM complete calls, all we do is rebind interfaces */
+	if (msg.event == PM_EVENT_ON) {
+		if (udev->state != USB_STATE_NOTATTACHED)
+			do_unbind_rebind(udev, DO_REBIND);
+		status = 0;
 
-	/* If udev->skip_sys_resume is set then udev was already suspended
-	 * when the system sleep started, so we don't want to resume it
-	 * during this system wakeup.
+	/* For all other calls, take the device back to full power and
+	 * tell the PM core in case it was autosuspended previously.
 	 */
-	if (udev->skip_sys_resume)
-		return 0;
-	status = usb_external_resume_device(udev, msg);
+	} else {
+		status = usb_resume_both(udev, msg);
+		if (status == 0) {
+			pm_runtime_disable(dev);
+			pm_runtime_set_active(dev);
+			pm_runtime_enable(dev);
+			udev->last_busy = jiffies;
+		}
+	}
 
 	/* Avoid PM error messages for devices disconnected while suspended
 	 * as we'll display regular disconnect messages just a bit later.
 	 */
 	if (status == -ENODEV)
-		return 0;
+		status = 0;
 	return status;
 }
 
@@ -1560,54 +1353,6 @@ int usb_disable_autosuspend(struct usb_device *udev)
 }
 EXPORT_SYMBOL_GPL(usb_disable_autosuspend);
 
-/* Internal routine to adjust a device's usage counter and change
- * its autosuspend state.
- */
-static int usb_autopm_do_device(struct usb_device *udev, int inc_usage_cnt)
-{
-	int	status = 0;
-
-	usb_pm_lock(udev);
-	udev->pm_usage_cnt += inc_usage_cnt;
-	WARN_ON(udev->pm_usage_cnt < 0);
-	if (inc_usage_cnt)
-		udev->last_busy = jiffies;
-	if (inc_usage_cnt >= 0 && udev->pm_usage_cnt > 0) {
-		if (udev->state == USB_STATE_SUSPENDED)
-			status = usb_resume_both(udev, PMSG_AUTO_RESUME);
-		if (status != 0)
-			udev->pm_usage_cnt -= inc_usage_cnt;
-		else if (inc_usage_cnt)
-			udev->last_busy = jiffies;
-	} else if (inc_usage_cnt <= 0 && udev->pm_usage_cnt <= 0) {
-		status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
-	}
-	usb_pm_unlock(udev);
-	return status;
-}
-
-/* usb_autosuspend_work - callback routine to autosuspend a USB device */
-void usb_autosuspend_work(struct work_struct *work)
-{
-	struct usb_device *udev =
-		container_of(work, struct usb_device, autosuspend.work);
-
-	usb_autopm_do_device(udev, 0);
-}
-
-/* usb_autoresume_work - callback routine to autoresume a USB device */
-void usb_autoresume_work(struct work_struct *work)
-{
-	struct usb_device *udev =
-		container_of(work, struct usb_device, autoresume);
-
-	/* Wake it up, let the drivers do their thing, and then put it
-	 * back to sleep.
-	 */
-	if (usb_autopm_do_device(udev, 1) == 0)
-		usb_autopm_do_device(udev, -1);
-}
-
 /**
  * usb_autosuspend_device - delayed autosuspend of a USB device and its interfaces
  * @udev: the usb_device to autosuspend
@@ -1616,12 +1361,9 @@ void usb_autoresume_work(struct work_struct *work)
  * @udev and wants to allow it to autosuspend.  Examples would be when
  * @udev's device file in usbfs is closed or after a configuration change.
  *
- * @udev's usage counter is decremented.  If it or any of the usage counters
- * for an active interface is greater than 0, no autosuspend request will be
- * queued.  (If an interface driver does not support autosuspend then its
- * usage counter is permanently positive.)  Furthermore, if an interface
- * driver requires remote-wakeup capability during autosuspend but remote
- * wakeup is disabled, the autosuspend will fail.
+ * @udev's usage counter is decremented; if it drops to 0 and all the
+ * interfaces are inactive then a delayed autosuspend will be attempted.
+ * The attempt may fail (see autosuspend_check()).
  *
  * The caller must hold @udev's device lock.
  *
@@ -1631,9 +1373,11 @@ void usb_autosuspend_device(struct usb_device *udev)
 {
 	int	status;
 
-	status = usb_autopm_do_device(udev, -1);
-	dev_vdbg(&udev->dev, "%s: cnt %d\n",
-			__func__, udev->pm_usage_cnt);
+	udev->last_busy = jiffies;
+	status = pm_runtime_put_sync(&udev->dev);
+	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&udev->dev.power.usage_count),
+			status);
 }
 
 /**
@@ -1643,9 +1387,9 @@ void usb_autosuspend_device(struct usb_device *udev)
  * This routine should be called when a core subsystem thinks @udev may
  * be ready to autosuspend.
  *
- * @udev's usage counter left unchanged.  If it or any of the usage counters
- * for an active interface is greater than 0, or autosuspend is not allowed
- * for any other reason, no autosuspend request will be queued.
+ * @udev's usage counter left unchanged.  If it is 0 and all the interfaces
+ * are inactive then an autosuspend will be attempted.  The attempt may
+ * fail or be delayed.
  *
  * The caller must hold @udev's device lock.
  *
@@ -1653,9 +1397,12 @@ void usb_autosuspend_device(struct usb_device *udev)
  */
 void usb_try_autosuspend_device(struct usb_device *udev)
 {
-	usb_autopm_do_device(udev, 0);
-	dev_vdbg(&udev->dev, "%s: cnt %d\n",
-			__func__, udev->pm_usage_cnt);
+	int	status;
+
+	status = pm_runtime_idle(&udev->dev);
+	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&udev->dev.power.usage_count),
+			status);
 }
 
 /**
@@ -1664,9 +1411,9 @@ void usb_try_autosuspend_device(struct usb_device *udev)
  *
  * This routine should be called when a core subsystem wants to use @udev
  * and needs to guarantee that it is not suspended.  No autosuspend will
- * occur until usb_autosuspend_device is called.  (Note that this will not
- * prevent suspend events originating in the PM core.)  Examples would be
- * when @udev's device file in usbfs is opened or when a remote-wakeup
+ * occur until usb_autosuspend_device() is called.  (Note that this will
+ * not prevent suspend events originating in the PM core.)  Examples would
+ * be when @udev's device file in usbfs is opened or when a remote-wakeup
  * request is received.
  *
  * @udev's usage counter is incremented to prevent subsequent autosuspends.
@@ -1680,42 +1427,14 @@ int usb_autoresume_device(struct usb_device *udev)
 {
 	int	status;
 
-	status = usb_autopm_do_device(udev, 1);
-	dev_vdbg(&udev->dev, "%s: status %d cnt %d\n",
-			__func__, status, udev->pm_usage_cnt);
-	return status;
-}
-
-/* Internal routine to adjust an interface's usage counter and change
- * its device's autosuspend state.
- */
-static int usb_autopm_do_interface(struct usb_interface *intf,
-		int inc_usage_cnt)
-{
-	struct usb_device	*udev = interface_to_usbdev(intf);
-	int			status = 0;
-
-	usb_pm_lock(udev);
-	if (intf->condition == USB_INTERFACE_UNBOUND)
-		status = -ENODEV;
-	else {
-		atomic_add(inc_usage_cnt, &intf->pm_usage_cnt);
-		udev->last_busy = jiffies;
-		if (inc_usage_cnt >= 0 &&
-				atomic_read(&intf->pm_usage_cnt) > 0) {
-			if (udev->state == USB_STATE_SUSPENDED)
-				status = usb_resume_both(udev,
-						PMSG_AUTO_RESUME);
-			if (status != 0)
-				atomic_sub(inc_usage_cnt, &intf->pm_usage_cnt);
-			else
-				udev->last_busy = jiffies;
-		} else if (inc_usage_cnt <= 0 &&
-				atomic_read(&intf->pm_usage_cnt) <= 0) {
-			status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
-		}
-	}
-	usb_pm_unlock(udev);
+	status = pm_runtime_get_sync(&udev->dev);
+	if (status < 0)
+		pm_runtime_put_sync(&udev->dev);
+	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&udev->dev.power.usage_count),
+			status);
+	if (status > 0)
+		status = 0;
 	return status;
 }
 
@@ -1729,34 +1448,25 @@ static int usb_autopm_do_interface(struct usb_interface *intf,
  * closed.
  *
  * The routine decrements @intf's usage counter.  When the counter reaches
- * 0, a delayed autosuspend request for @intf's device is queued.  When
- * the delay expires, if @intf->pm_usage_cnt is still <= 0 along with all
- * the other usage counters for the sibling interfaces and @intf's
- * usb_device, the device and all its interfaces will be autosuspended.
- *
- * Note that @intf->pm_usage_cnt is owned by the interface driver.  The
- * core will not change its value other than the increment and decrement
- * in usb_autopm_get_interface and usb_autopm_put_interface.  The driver
- * may use this simple counter-oriented discipline or may set the value
- * any way it likes.
+ * 0, a delayed autosuspend request for @intf's device is attempted.  The
+ * attempt may fail (see autosuspend_check()).
  *
  * If the driver has set @intf->needs_remote_wakeup then autosuspend will
  * take place only if the device's remote-wakeup facility is enabled.
  *
- * Suspend method calls queued by this routine can arrive at any time
- * while @intf is resumed and its usage counter is equal to 0.  They are
- * not protected by the usb_device's lock but only by its pm_mutex.
- * Drivers must provide their own synchronization.
- *
  * This routine can run only in process context.
  */
 void usb_autopm_put_interface(struct usb_interface *intf)
 {
-	int	status;
+	struct usb_device	*udev = interface_to_usbdev(intf);
+	int			status;
 
-	status = usb_autopm_do_interface(intf, -1);
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
+	udev->last_busy = jiffies;
+	atomic_dec(&intf->pm_usage_cnt);
+	status = pm_runtime_put_sync(&intf->dev);
+	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&intf->dev.power.usage_count),
+			status);
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 
@@ -1764,11 +1474,11 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
  * usb_autopm_put_interface_async - decrement a USB interface's PM-usage counter
  * @intf: the usb_interface whose counter should be decremented
  *
- * This routine does essentially the same thing as
- * usb_autopm_put_interface(): it decrements @intf's usage counter and
- * queues a delayed autosuspend request if the counter is <= 0.  The
- * difference is that it does not acquire the device's pm_mutex;
- * callers must handle all synchronization issues themselves.
+ * This routine does much the same thing as usb_autopm_put_interface():
+ * It decrements @intf's usage counter and schedules a delayed
+ * autosuspend request if the counter is <= 0.  The difference is that it
+ * does not perform any synchronization; callers should hold a private
+ * lock and handle all synchronization issues themselves.
  *
  * Typically a driver would call this routine during an URB's completion
  * handler, if no more URBs were pending.
@@ -1778,27 +1488,57 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 void usb_autopm_put_interface_async(struct usb_interface *intf)
 {
 	struct usb_device	*udev = interface_to_usbdev(intf);
+	unsigned long		last_busy;
 	int			status = 0;
 
-	if (intf->condition == USB_INTERFACE_UNBOUND) {
-		status = -ENODEV;
-	} else {
-		udev->last_busy = jiffies;
-		atomic_dec(&intf->pm_usage_cnt);
-		if (udev->autosuspend_disabled || udev->autosuspend_delay < 0)
-			status = -EPERM;
-		else if (atomic_read(&intf->pm_usage_cnt) <= 0 &&
-				!timer_pending(&udev->autosuspend.timer)) {
-			queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
+	last_busy = udev->last_busy;
+	udev->last_busy = jiffies;
+	atomic_dec(&intf->pm_usage_cnt);
+	pm_runtime_put_noidle(&intf->dev);
+
+	if (!udev->autosuspend_disabled) {
+		/* Optimization: Don't schedule a delayed autosuspend if
+		 * the timer is already running and the expiration time
+		 * wouldn't change.
+		 *
+		 * We have to use the interface's timer.  Attempts to
+		 * schedule a suspend for the device would fail because
+		 * the interface is still active.
+		 */
+		if (intf->dev.power.timer_expires == 0 ||
+				round_jiffies_up(last_busy) !=
+				round_jiffies_up(jiffies)) {
+			status = pm_schedule_suspend(&intf->dev,
+					jiffies_to_msecs(
 					round_jiffies_up_relative(
-						udev->autosuspend_delay));
+						udev->autosuspend_delay)));
 		}
 	}
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
+	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&intf->dev.power.usage_count),
+			status);
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);
 
+/**
+ * usb_autopm_put_interface_no_suspend - decrement a USB interface's PM-usage counter
+ * @intf: the usb_interface whose counter should be decremented
+ *
+ * This routine decrements @intf's usage counter but does not carry out an
+ * autosuspend.
+ *
+ * This routine can run in atomic context.
+ */
+void usb_autopm_put_interface_no_suspend(struct usb_interface *intf)
+{
+	struct usb_device	*udev = interface_to_usbdev(intf);
+
+	udev->last_busy = jiffies;
+	atomic_dec(&intf->pm_usage_cnt);
+	pm_runtime_put_noidle(&intf->dev);
+}
+EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend);
+
 /**
  * usb_autopm_get_interface - increment a USB interface's PM-usage counter
  * @intf: the usb_interface whose counter should be incremented
@@ -1811,25 +1551,8 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);
  * or @intf is unbound.  A typical example would be a character-device
  * driver when its device file is opened.
  *
- *
- * The routine increments @intf's usage counter.  (However if the
- * autoresume fails then the counter is re-decremented.)  So long as the
- * counter is greater than 0, autosuspend will not be allowed for @intf
- * or its usb_device.  When the driver is finished using @intf it should
- * call usb_autopm_put_interface() to decrement the usage counter and
- * queue a delayed autosuspend request (if the counter is <= 0).
- *
- *
- * Note that @intf->pm_usage_cnt is owned by the interface driver.  The
- * core will not change its value other than the increment and decrement
- * in usb_autopm_get_interface and usb_autopm_put_interface.  The driver
- * may use this simple counter-oriented discipline or may set the value
- * any way it likes.
- *
- * Resume method calls generated by this routine can arrive at any time
- * while @intf is suspended.  They are not protected by the usb_device's
- * lock but only by its pm_mutex.  Drivers must provide their own
- * synchronization.
+ * @intf's usage counter is incremented to prevent subsequent autosuspends.
+ * However if the autoresume fails then the counter is re-decremented.
  *
  * This routine can run only in process context.
  */
@@ -1837,9 +1560,16 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 {
 	int	status;
 
-	status = usb_autopm_do_interface(intf, 1);
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
+	status = pm_runtime_get_sync(&intf->dev);
+	if (status < 0)
+		pm_runtime_put_sync(&intf->dev);
+	else
+		atomic_inc(&intf->pm_usage_cnt);
+	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&intf->dev.power.usage_count),
+			status);
+	if (status > 0)
+		status = 0;
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
@@ -1849,41 +1579,201 @@ EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
  * @intf: the usb_interface whose counter should be incremented
  *
  * This routine does much the same thing as
- * usb_autopm_get_interface(): it increments @intf's usage counter and
- * queues an autoresume request if the result is > 0.  The differences
- * are that it does not acquire the device's pm_mutex (callers must
- * handle all synchronization issues themselves), and it does not
- * autoresume the device directly (it only queues a request).  After a
- * successful call, the device will generally not yet be resumed.
+ * usb_autopm_get_interface(): It increments @intf's usage counter and
+ * queues an autoresume request if the device is suspended.  The
+ * differences are that it does not perform any synchronization (callers
+ * should hold a private lock and handle all synchronization issues
+ * themselves), and it does not autoresume the device directly (it only
+ * queues a request).  After a successful call, the device may not yet be
+ * resumed.
  *
  * This routine can run in atomic context.
  */
 int usb_autopm_get_interface_async(struct usb_interface *intf)
 {
-	struct usb_device	*udev = interface_to_usbdev(intf);
-	int			status = 0;
+	int		status = 0;
+	enum rpm_status	s;
 
-	if (intf->condition == USB_INTERFACE_UNBOUND)
-		status = -ENODEV;
-	else {
+	/* Don't request a resume unless the interface is already suspending
+	 * or suspended.  Doing so would force a running suspend timer to be
+	 * cancelled.
+	 */
+	pm_runtime_get_noresume(&intf->dev);
+	s = ACCESS_ONCE(intf->dev.power.runtime_status);
+	if (s == RPM_SUSPENDING || s == RPM_SUSPENDED)
+		status = pm_request_resume(&intf->dev);
+
+	if (status < 0 && status != -EINPROGRESS)
+		pm_runtime_put_noidle(&intf->dev);
+	else
 		atomic_inc(&intf->pm_usage_cnt);
-		if (atomic_read(&intf->pm_usage_cnt) > 0 &&
-				udev->state == USB_STATE_SUSPENDED)
-			queue_work(ksuspend_usb_wq, &udev->autoresume);
-	}
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
+	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&intf->dev.power.usage_count),
+			status);
+	if (status > 0)
+		status = 0;
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async);
 
-#else
+/**
+ * usb_autopm_get_interface_no_resume - increment a USB interface's PM-usage counter
+ * @intf: the usb_interface whose counter should be incremented
+ *
+ * This routine increments @intf's usage counter but does not carry out an
+ * autoresume.
+ *
+ * This routine can run in atomic context.
+ */
+void usb_autopm_get_interface_no_resume(struct usb_interface *intf)
+{
+	struct usb_device	*udev = interface_to_usbdev(intf);
+
+	udev->last_busy = jiffies;
+	atomic_inc(&intf->pm_usage_cnt);
+	pm_runtime_get_noresume(&intf->dev);
+}
+EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume);
+
+/* Internal routine to check whether we may autosuspend a device. */
+static int autosuspend_check(struct usb_device *udev)
+{
+	int			i;
+	struct usb_interface	*intf;
+	unsigned long		suspend_time, j;
+
+	/* Fail if autosuspend is disabled, or any interfaces are in use, or
+	 * any interface drivers require remote wakeup but it isn't available.
+	 */
+	udev->do_remote_wakeup = device_may_wakeup(&udev->dev);
+	if (udev->actconfig) {
+		for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
+			intf = udev->actconfig->interface[i];
+
+			/* We don't need to check interfaces that are
+			 * disabled for runtime PM.  Either they are unbound
+			 * or else their drivers don't support autosuspend
+			 * and so they are permanently active.
+			 */
+			if (intf->dev.power.disable_depth)
+				continue;
+			if (atomic_read(&intf->dev.power.usage_count) > 0)
+				return -EBUSY;
+			if (intf->needs_remote_wakeup &&
+					!udev->do_remote_wakeup) {
+				dev_dbg(&udev->dev, "remote wakeup needed "
+						"for autosuspend\n");
+				return -EOPNOTSUPP;
+			}
+
+			/* Don't allow autosuspend if the device will need
+			 * a reset-resume and any of its interface drivers
+			 * doesn't include support or needs remote wakeup.
+			 */
+			if (udev->quirks & USB_QUIRK_RESET_RESUME) {
+				struct usb_driver *driver;
+
+				driver = to_usb_driver(intf->dev.driver);
+				if (!driver->reset_resume ||
+						intf->needs_remote_wakeup)
+					return -EOPNOTSUPP;
+			}
+		}
+	}
+
+	/* If everything is okay but the device hasn't been idle for long
+	 * enough, queue a delayed autosuspend request.
+	 */
+	j = ACCESS_ONCE(jiffies);
+	suspend_time = udev->last_busy + udev->autosuspend_delay;
+	if (time_before(j, suspend_time)) {
+		pm_schedule_suspend(&udev->dev, jiffies_to_msecs(
+				round_jiffies_up_relative(suspend_time - j)));
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static int usb_runtime_suspend(struct device *dev)
+{
+	int	status = 0;
 
-void usb_autosuspend_work(struct work_struct *work)
-{}
+	/* A USB device can be suspended if it passes the various autosuspend
+	 * checks.  Runtime suspend for a USB device means suspending all the
+	 * interfaces and then the device itself.
+	 */
+	if (is_usb_device(dev)) {
+		struct usb_device	*udev = to_usb_device(dev);
+
+		if (autosuspend_check(udev) != 0)
+			return -EAGAIN;
+
+		status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
+
+		/* If an interface fails the suspend, adjust the last_busy
+		 * time so that we don't get another suspend attempt right
+		 * away.
+		 */
+		if (status) {
+			udev->last_busy = jiffies +
+					(udev->autosuspend_delay == 0 ?
+						HZ/2 : 0);
+		}
+
+		/* Prevent the parent from suspending immediately after */
+		else if (udev->parent) {
+			udev->parent->last_busy = jiffies;
+		}
+	}
+
+	/* Runtime suspend for a USB interface doesn't mean anything. */
+	return status;
+}
+
+static int usb_runtime_resume(struct device *dev)
+{
+	/* Runtime resume for a USB device means resuming both the device
+	 * and all its interfaces.
+	 */
+	if (is_usb_device(dev)) {
+		struct usb_device	*udev = to_usb_device(dev);
+		int			status;
+
+		status = usb_resume_both(udev, PMSG_AUTO_RESUME);
+		udev->last_busy = jiffies;
+		return status;
+	}
+
+	/* Runtime resume for a USB interface doesn't mean anything. */
+	return 0;
+}
+
+static int usb_runtime_idle(struct device *dev)
+{
+	/* An idle USB device can be suspended if it passes the various
+	 * autosuspend checks.  An idle interface can be suspended at
+	 * any time.
+	 */
+	if (is_usb_device(dev)) {
+		struct usb_device	*udev = to_usb_device(dev);
+
+		if (autosuspend_check(udev) != 0)
+			return 0;
+	}
+
+	pm_runtime_suspend(dev);
+	return 0;
+}
+
+static struct dev_pm_ops usb_bus_pm_ops = {
+	.runtime_suspend =	usb_runtime_suspend,
+	.runtime_resume =	usb_runtime_resume,
+	.runtime_idle =		usb_runtime_idle,
+};
+
+#else
 
-void usb_autoresume_work(struct work_struct *work)
-{}
+#define usb_bus_pm_ops	(*(struct dev_pm_ops *) NULL)
 
 #endif /* CONFIG_USB_SUSPEND */
 
@@ -1891,4 +1781,5 @@ struct bus_type usb_bus_type = {
 	.name =		"usb",
 	.match =	usb_device_match,
 	.uevent =	usb_uevent,
+	.pm =		&usb_bus_pm_ops,
 };
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index fc4290b6691c..b07ba051118d 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -39,6 +39,7 @@
 #include <linux/platform_device.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/usb.h>
 
@@ -1858,6 +1859,10 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
 	return status;
 }
 
+#endif	/* CONFIG_PM */
+
+#ifdef	CONFIG_USB_SUSPEND
+
 /* Workqueue routine for root-hub remote wakeup */
 static void hcd_resume_work(struct work_struct *work)
 {
@@ -1884,12 +1889,12 @@ void usb_hcd_resume_root_hub (struct usb_hcd *hcd)
 
 	spin_lock_irqsave (&hcd_root_hub_lock, flags);
 	if (hcd->rh_registered)
-		queue_work(ksuspend_usb_wq, &hcd->wakeup_work);
+		queue_work(pm_wq, &hcd->wakeup_work);
 	spin_unlock_irqrestore (&hcd_root_hub_lock, flags);
 }
 EXPORT_SYMBOL_GPL(usb_hcd_resume_root_hub);
 
-#endif
+#endif	/* CONFIG_USB_SUSPEND */
 
 /*-------------------------------------------------------------------------*/
 
@@ -2034,7 +2039,7 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver,
 	init_timer(&hcd->rh_timer);
 	hcd->rh_timer.function = rh_timer_func;
 	hcd->rh_timer.data = (unsigned long) hcd;
-#ifdef CONFIG_PM
+#ifdef CONFIG_USB_SUSPEND
 	INIT_WORK(&hcd->wakeup_work, hcd_resume_work);
 #endif
 	mutex_init(&hcd->bandwidth_mutex);
@@ -2234,7 +2239,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 	hcd->rh_registered = 0;
 	spin_unlock_irq (&hcd_root_hub_lock);
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_USB_SUSPEND
 	cancel_work_sync(&hcd->wakeup_work);
 #endif
 
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index 70a7e490f81b..8953ded69541 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -80,7 +80,7 @@ struct usb_hcd {
 
 	struct timer_list	rh_timer;	/* drives root-hub polling */
 	struct urb		*status_urb;	/* the current status urb */
-#ifdef CONFIG_PM
+#ifdef CONFIG_USB_SUSPEND
 	struct work_struct	wakeup_work;	/* for remote wakeup */
 #endif
 
@@ -464,16 +464,20 @@ extern int usb_find_interface_driver(struct usb_device *dev,
 #define usb_endpoint_out(ep_dir)	(!((ep_dir) & USB_DIR_IN))
 
 #ifdef CONFIG_PM
-extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
 extern void usb_root_hub_lost_power(struct usb_device *rhdev);
 extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg);
 extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg);
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_USB_SUSPEND
+extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
 #else
 static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd)
 {
 	return;
 }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_USB_SUSPEND */
+
 
 /*
  * USB device fs stuff
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 746f26f222ab..0e0a190bbd00 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -22,6 +22,7 @@
 #include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
+#include <linux/pm_runtime.h>
 
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
@@ -71,7 +72,6 @@ struct usb_hub {
 
 	unsigned		mA_per_port;	/* current for each child */
 
-	unsigned		init_done:1;
 	unsigned		limited_power:1;
 	unsigned		quiescing:1;
 	unsigned		disconnected:1;
@@ -820,7 +820,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 	}
  init3:
 	hub->quiescing = 0;
-	hub->init_done = 1;
 
 	status = usb_submit_urb(hub->urb, GFP_NOIO);
 	if (status < 0)
@@ -861,11 +860,6 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
 	int i;
 
 	cancel_delayed_work_sync(&hub->init_work);
-	if (!hub->init_done) {
-		hub->init_done = 1;
-		usb_autopm_put_interface_no_suspend(
-				to_usb_interface(hub->intfdev));
-	}
 
 	/* khubd and related activity won't re-trigger */
 	hub->quiescing = 1;
@@ -1405,10 +1399,8 @@ static void recursively_mark_NOTATTACHED(struct usb_device *udev)
 		if (udev->children[i])
 			recursively_mark_NOTATTACHED(udev->children[i]);
 	}
-	if (udev->state == USB_STATE_SUSPENDED) {
-		udev->discon_suspended = 1;
+	if (udev->state == USB_STATE_SUSPENDED)
 		udev->active_duration -= jiffies;
-	}
 	udev->state = USB_STATE_NOTATTACHED;
 }
 
@@ -1532,31 +1524,6 @@ static void update_address(struct usb_device *udev, int devnum)
 		udev->devnum = devnum;
 }
 
-#ifdef	CONFIG_USB_SUSPEND
-
-static void usb_stop_pm(struct usb_device *udev)
-{
-	/* Synchronize with the ksuspend thread to prevent any more
-	 * autosuspend requests from being submitted, and decrement
-	 * the parent's count of unsuspended children.
-	 */
-	usb_pm_lock(udev);
-	if (udev->parent && !udev->discon_suspended)
-		usb_autosuspend_device(udev->parent);
-	usb_pm_unlock(udev);
-
-	/* Stop any autosuspend or autoresume requests already submitted */
-	cancel_delayed_work_sync(&udev->autosuspend);
-	cancel_work_sync(&udev->autoresume);
-}
-
-#else
-
-static inline void usb_stop_pm(struct usb_device *udev)
-{ }
-
-#endif
-
 /**
  * usb_disconnect - disconnect a device (usbcore-internal)
  * @pdev: pointer to device being disconnected
@@ -1625,8 +1592,6 @@ void usb_disconnect(struct usb_device **pdev)
 	*pdev = NULL;
 	spin_unlock_irq(&device_state_lock);
 
-	usb_stop_pm(udev);
-
 	put_device(&udev->dev);
 }
 
@@ -1803,9 +1768,6 @@ int usb_new_device(struct usb_device *udev)
 	int err;
 
 	if (udev->parent) {
-		/* Increment the parent's count of unsuspended children */
-		usb_autoresume_device(udev->parent);
-
 		/* Initialize non-root-hub device wakeup to disabled;
 		 * device (un)configuration controls wakeup capable
 		 * sysfs power/wakeup controls wakeup enabled/disabled
@@ -1814,6 +1776,10 @@ int usb_new_device(struct usb_device *udev)
 		device_set_wakeup_enable(&udev->dev, 1);
 	}
 
+	/* Tell the runtime-PM framework the device is active */
+	pm_runtime_set_active(&udev->dev);
+	pm_runtime_enable(&udev->dev);
+
 	usb_detect_quirks(udev);
 	err = usb_enumerate_device(udev);	/* Read descriptors */
 	if (err < 0)
@@ -1844,7 +1810,8 @@ int usb_new_device(struct usb_device *udev)
 
 fail:
 	usb_set_device_state(udev, USB_STATE_NOTATTACHED);
-	usb_stop_pm(udev);
+	pm_runtime_disable(&udev->dev);
+	pm_runtime_set_suspended(&udev->dev);
 	return err;
 }
 
@@ -2408,8 +2375,11 @@ int usb_remote_wakeup(struct usb_device *udev)
 
 	if (udev->state == USB_STATE_SUSPENDED) {
 		dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-");
-		usb_mark_last_busy(udev);
-		status = usb_external_resume_device(udev, PMSG_REMOTE_RESUME);
+		status = usb_autoresume_device(udev);
+		if (status == 0) {
+			/* Let the drivers do their thing, then... */
+			usb_autosuspend_device(udev);
+		}
 	}
 	return status;
 }
@@ -2446,11 +2416,6 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-int usb_remote_wakeup(struct usb_device *udev)
-{
-	return 0;
-}
-
 #endif
 
 static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
@@ -3268,7 +3233,7 @@ static void hub_events(void)
 		 * disconnected while waiting for the lock to succeed. */
 		usb_lock_device(hdev);
 		if (unlikely(hub->disconnected))
-			goto loop2;
+			goto loop_disconnected;
 
 		/* If the hub has died, clean up after it */
 		if (hdev->state == USB_STATE_NOTATTACHED) {
@@ -3428,7 +3393,7 @@ static void hub_events(void)
 		 * kick_khubd() and allow autosuspend.
 		 */
 		usb_autopm_put_interface(intf);
- loop2:
+ loop_disconnected:
 		usb_unlock_device(hdev);
 		kref_put(&hub->kref, hub_release);
 
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index df73574a9cc9..73de41bb2546 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1843,7 +1843,6 @@ free_interfaces:
 		intf->dev.dma_mask = dev->dev.dma_mask;
 		INIT_WORK(&intf->reset_ws, __usb_queue_reset_device);
 		device_initialize(&intf->dev);
-		mark_quiesced(intf);
 		dev_set_name(&intf->dev, "%d-%s:%d.%d",
 			dev->bus->busnum, dev->devpath,
 			configuration, alt->desc.bInterfaceNumber);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 0daff0d968ba..32966ccdff63 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -49,9 +49,6 @@ const char *usbcore_name = "usbcore";
 
 static int nousb;	/* Disable USB when built into kernel image */
 
-/* Workqueue for autosuspend and for remote wakeup of root hubs */
-struct workqueue_struct *ksuspend_usb_wq;
-
 #ifdef	CONFIG_USB_SUSPEND
 static int usb_autosuspend_delay = 2;		/* Default delay value,
 						 * in seconds */
@@ -264,23 +261,6 @@ static int usb_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
 
 #ifdef	CONFIG_PM
 
-static int ksuspend_usb_init(void)
-{
-	/* This workqueue is supposed to be both freezable and
-	 * singlethreaded.  Its job doesn't justify running on more
-	 * than one CPU.
-	 */
-	ksuspend_usb_wq = create_freezeable_workqueue("ksuspend_usbd");
-	if (!ksuspend_usb_wq)
-		return -ENOMEM;
-	return 0;
-}
-
-static void ksuspend_usb_cleanup(void)
-{
-	destroy_workqueue(ksuspend_usb_wq);
-}
-
 /* USB device Power-Management thunks.
  * There's no need to distinguish here between quiescing a USB device
  * and powering it down; the generic_suspend() routine takes care of
@@ -296,7 +276,7 @@ static int usb_dev_prepare(struct device *dev)
 static void usb_dev_complete(struct device *dev)
 {
 	/* Currently used only for rebinding interfaces */
-	usb_resume(dev, PMSG_RESUME);	/* Message event is meaningless */
+	usb_resume(dev, PMSG_ON);	/* FIXME: change to PMSG_COMPLETE */
 }
 
 static int usb_dev_suspend(struct device *dev)
@@ -342,9 +322,7 @@ static const struct dev_pm_ops usb_device_pm_ops = {
 
 #else
 
-#define ksuspend_usb_init()	0
-#define ksuspend_usb_cleanup()	do {} while (0)
-#define usb_device_pm_ops	(*(struct dev_pm_ops *)0)
+#define usb_device_pm_ops	(*(struct dev_pm_ops *) NULL)
 
 #endif	/* CONFIG_PM */
 
@@ -472,9 +450,6 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 	INIT_LIST_HEAD(&dev->filelist);
 
 #ifdef	CONFIG_PM
-	mutex_init(&dev->pm_mutex);
-	INIT_DELAYED_WORK(&dev->autosuspend, usb_autosuspend_work);
-	INIT_WORK(&dev->autoresume, usb_autoresume_work);
 	dev->autosuspend_delay = usb_autosuspend_delay * HZ;
 	dev->connect_time = jiffies;
 	dev->active_duration = -jiffies;
@@ -1117,9 +1092,6 @@ static int __init usb_init(void)
 	if (retval)
 		goto out;
 
-	retval = ksuspend_usb_init();
-	if (retval)
-		goto out;
 	retval = bus_register(&usb_bus_type);
 	if (retval)
 		goto bus_register_failed;
@@ -1159,7 +1131,7 @@ major_init_failed:
 bus_notifier_failed:
 	bus_unregister(&usb_bus_type);
 bus_register_failed:
-	ksuspend_usb_cleanup();
+	usb_debugfs_cleanup();
 out:
 	return retval;
 }
@@ -1181,7 +1153,6 @@ static void __exit usb_exit(void)
 	usb_hub_cleanup();
 	bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
 	bus_unregister(&usb_bus_type);
-	ksuspend_usb_cleanup();
 	usb_debugfs_cleanup();
 }
 
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 2b74a7f99c41..cd882203ad34 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -55,25 +55,8 @@ extern void usb_major_cleanup(void);
 extern int usb_suspend(struct device *dev, pm_message_t msg);
 extern int usb_resume(struct device *dev, pm_message_t msg);
 
-extern void usb_autosuspend_work(struct work_struct *work);
-extern void usb_autoresume_work(struct work_struct *work);
 extern int usb_port_suspend(struct usb_device *dev, pm_message_t msg);
 extern int usb_port_resume(struct usb_device *dev, pm_message_t msg);
-extern int usb_external_suspend_device(struct usb_device *udev,
-		pm_message_t msg);
-extern int usb_external_resume_device(struct usb_device *udev,
-		pm_message_t msg);
-extern int usb_remote_wakeup(struct usb_device *dev);
-
-static inline void usb_pm_lock(struct usb_device *udev)
-{
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
-}
-
-static inline void usb_pm_unlock(struct usb_device *udev)
-{
-	mutex_unlock(&udev->pm_mutex);
-}
 
 #else
 
@@ -87,14 +70,6 @@ static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 	return 0;
 }
 
-static inline int usb_remote_wakeup(struct usb_device *udev)
-{
-	return 0;
-}
-
-static inline void usb_pm_lock(struct usb_device *udev) {}
-static inline void usb_pm_unlock(struct usb_device *udev) {}
-
 #endif
 
 #ifdef CONFIG_USB_SUSPEND
@@ -102,6 +77,7 @@ static inline void usb_pm_unlock(struct usb_device *udev) {}
 extern void usb_autosuspend_device(struct usb_device *udev);
 extern void usb_try_autosuspend_device(struct usb_device *udev);
 extern int usb_autoresume_device(struct usb_device *udev);
+extern int usb_remote_wakeup(struct usb_device *dev);
 
 #else
 
@@ -112,9 +88,13 @@ static inline int usb_autoresume_device(struct usb_device *udev)
 	return 0;
 }
 
+static inline int usb_remote_wakeup(struct usb_device *udev)
+{
+	return 0;
+}
+
 #endif
 
-extern struct workqueue_struct *ksuspend_usb_wq;
 extern struct bus_type usb_bus_type;
 extern struct device_type usb_device_type;
 extern struct device_type usb_if_device_type;
@@ -144,23 +124,6 @@ static inline int is_usb_device_driver(struct device_driver *drv)
 			for_devices;
 }
 
-/* Interfaces and their "power state" are owned by usbcore */
-
-static inline void mark_active(struct usb_interface *f)
-{
-	f->is_active = 1;
-}
-
-static inline void mark_quiesced(struct usb_interface *f)
-{
-	f->is_active = 0;
-}
-
-static inline int is_active(const struct usb_interface *f)
-{
-	return f->is_active;
-}
-
 
 /* for labeling diagnostics */
 extern const char *usbcore_name;
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 3dab0c0b196f..707a87da77f8 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -1580,10 +1580,6 @@ usbtest_ioctl (struct usb_interface *intf, unsigned int code, void *buf)
 		return -ERESTARTSYS;
 
 	/* FIXME: What if a system sleep starts while a test is running? */
-	if (!intf->is_active) {
-		mutex_unlock(&dev->lock);
-		return -EHOSTUNREACH;
-	}
 
 	/* some devices, like ez-usb default devices, need a non-default
 	 * altsetting to have any active endpoints.  some tests change
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e6419ac89ea2..ad50fc8a7ad3 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -122,7 +122,6 @@ enum usb_interface_condition {
  *	number from the USB core by calling usb_register_dev().
  * @condition: binding state of the interface: not bound, binding
  *	(in probe()), bound to a driver, or unbinding (in disconnect())
- * @is_active: flag set when the interface is bound and not suspended.
  * @sysfs_files_created: sysfs attributes exist
  * @ep_devs_created: endpoint child pseudo-devices exist
  * @unregistering: flag set when the interface is being unregistered
@@ -135,8 +134,7 @@ enum usb_interface_condition {
  * @dev: driver model's view of this device
  * @usb_dev: if an interface is bound to the USB major, this will point
  *	to the sysfs representation for that device.
- * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not
- *	allowed unless the counter is 0.
+ * @pm_usage_cnt: PM usage counter for this interface
  * @reset_ws: Used for scheduling resets from atomic context.
  * @reset_running: set to 1 if the interface is currently running a
  *      queued reset so that usb_cancel_queued_reset() doesn't try to
@@ -184,7 +182,6 @@ struct usb_interface {
 	int minor;			/* minor number this interface is
 					 * bound to */
 	enum usb_interface_condition condition;		/* state of binding */
-	unsigned is_active:1;		/* the interface is not suspended */
 	unsigned sysfs_files_created:1;	/* the sysfs attributes exist */
 	unsigned ep_devs_created:1;	/* endpoint "devices" exist */
 	unsigned unregistering:1;	/* unregistration is in progress */
@@ -401,7 +398,6 @@ struct usb_tt;
  * @portnum: parent port number (origin 1)
  * @level: number of USB hub ancestors
  * @can_submit: URBs may be submitted
- * @discon_suspended: disconnected while suspended
  * @persist_enabled:  USB_PERSIST enabled for this device
  * @have_langid: whether string_langid is valid
  * @authorized: policy has said we can use it;
@@ -421,20 +417,15 @@ struct usb_tt;
  * @usbfs_dentry: usbfs dentry entry for the device
  * @maxchild: number of ports if hub
  * @children: child devices - USB devices that are attached to this hub
- * @pm_usage_cnt: usage counter for autosuspend
  * @quirks: quirks of the whole device
  * @urbnum: number of URBs submitted for the whole device
  * @active_duration: total time device is not suspended
- * @autosuspend: for delayed autosuspends
- * @autoresume: for autoresumes requested while in_interrupt
- * @pm_mutex: protects PM operations
  * @last_busy: time of last use
  * @autosuspend_delay: in jiffies
  * @connect_time: time device was first connected
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
  * @autosuspend_disabled: autosuspend disabled by the user
- * @skip_sys_resume: skip the next system resume
  * @wusb_dev: if this is a Wireless USB device, link to the WUSB
  *	specific data for the device.
  * @slot_id: Slot ID assigned by xHCI
@@ -475,7 +466,6 @@ struct usb_device {
 	u8 level;
 
 	unsigned can_submit:1;
-	unsigned discon_suspended:1;
 	unsigned persist_enabled:1;
 	unsigned have_langid:1;
 	unsigned authorized:1;
@@ -499,17 +489,12 @@ struct usb_device {
 	int maxchild;
 	struct usb_device *children[USB_MAXCHILDREN];
 
-	int pm_usage_cnt;
 	u32 quirks;
 	atomic_t urbnum;
 
 	unsigned long active_duration;
 
 #ifdef CONFIG_PM
-	struct delayed_work autosuspend;
-	struct work_struct autoresume;
-	struct mutex pm_mutex;
-
 	unsigned long last_busy;
 	int autosuspend_delay;
 	unsigned long connect_time;
@@ -517,7 +502,6 @@ struct usb_device {
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
 	unsigned autosuspend_disabled:1;
-	unsigned skip_sys_resume:1;
 #endif
 	struct wusb_dev *wusb_dev;
 	int slot_id;
@@ -549,17 +533,8 @@ extern int usb_autopm_get_interface(struct usb_interface *intf);
 extern void usb_autopm_put_interface(struct usb_interface *intf);
 extern int usb_autopm_get_interface_async(struct usb_interface *intf);
 extern void usb_autopm_put_interface_async(struct usb_interface *intf);
-
-static inline void usb_autopm_get_interface_no_resume(
-		struct usb_interface *intf)
-{
-	atomic_inc(&intf->pm_usage_cnt);
-}
-static inline void usb_autopm_put_interface_no_suspend(
-		struct usb_interface *intf)
-{
-	atomic_dec(&intf->pm_usage_cnt);
-}
+extern void usb_autopm_get_interface_no_resume(struct usb_interface *intf);
+extern void usb_autopm_put_interface_no_suspend(struct usb_interface *intf);
 
 static inline void usb_mark_last_busy(struct usb_device *udev)
 {
-- 
cgit v1.2.3


From c58bfa6b97731590e42cba6bd13829c4e480992f Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Thu, 21 Jan 2010 15:33:55 +0200
Subject: USB: musb: deprecate what we don't use

after 2.6.34, those fields will be removed from
struct musb_hdrc_platform_data, it's expected
that other architectures are fixed by then.

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/musb.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 4b7f8fa252f0..8e0f2bc8d8c9 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -30,26 +30,26 @@ struct musb_hdrc_eps_bits {
 struct musb_hdrc_config {
 	/* MUSB configuration-specific details */
 	unsigned	multipoint:1;	/* multipoint device */
-	unsigned	dyn_fifo:1;	/* supports dynamic fifo sizing */
-	unsigned	soft_con:1;	/* soft connect required */
-	unsigned	utm_16:1;	/* utm data witdh is 16 bits */
+	unsigned	dyn_fifo:1 __deprecated; /* supports dynamic fifo sizing */
+	unsigned	soft_con:1 __deprecated; /* soft connect required */
+	unsigned	utm_16:1 __deprecated; /* utm data witdh is 16 bits */
 	unsigned	big_endian:1;	/* true if CPU uses big-endian */
 	unsigned	mult_bulk_tx:1;	/* Tx ep required for multbulk pkts */
 	unsigned	mult_bulk_rx:1;	/* Rx ep required for multbulk pkts */
 	unsigned	high_iso_tx:1;	/* Tx ep required for HB iso */
 	unsigned	high_iso_rx:1;	/* Rx ep required for HD iso */
-	unsigned	dma:1;		/* supports DMA */
-	unsigned	vendor_req:1;	/* vendor registers required */
+	unsigned	dma:1 __deprecated; /* supports DMA */
+	unsigned	vendor_req:1 __deprecated; /* vendor registers required */
 
 	u8		num_eps;	/* number of endpoints _with_ ep0 */
-	u8		dma_channels;	/* number of dma channels */
+	u8		dma_channels __deprecated; /* number of dma channels */
 	u8		dyn_fifo_size;	/* dynamic size in bytes */
-	u8		vendor_ctrl;	/* vendor control reg width */
-	u8		vendor_stat;	/* vendor status reg witdh */
-	u8		dma_req_chan;	/* bitmask for required dma channels */
+	u8		vendor_ctrl __deprecated; /* vendor control reg width */
+	u8		vendor_stat __deprecated; /* vendor status reg witdh */
+	u8		dma_req_chan __deprecated; /* bitmask for required dma channels */
 	u8		ram_bits;	/* ram address size */
 
-	struct musb_hdrc_eps_bits *eps_bits;
+	struct musb_hdrc_eps_bits *eps_bits __deprecated;
 #ifdef CONFIG_BLACKFIN
         /* A GPIO controlling VRSEL in Blackfin */
         unsigned int    gpio_vrsel;
-- 
cgit v1.2.3


From 640e95abdfae9fef5949084c92e80c8f2f8b5ec5 Mon Sep 17 00:00:00 2001
From: Eirik Aanonsen <EAA@wprmedical.com>
Date: Fri, 5 Feb 2010 09:49:25 +0100
Subject: USB: atmel uaba: Adding invert vbus_pin

Adding vbus_pin_inverted so that the usb detect pin can be active high
or low depending on HW implementation also replaced the
gpio_get_value(udc->vbus_pin); with a call to vbus_is_present(udc); This
allows the driver to be loaded and save about 0,15W on the consumption.

Signed-off-by: Eirik Aanonsen <eaa@wprmedical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/avr32/mach-at32ap/at32ap700x.c | 7 +++++--
 drivers/usb/gadget/atmel_usba_udc.c | 5 +++--
 drivers/usb/gadget/atmel_usba_udc.h | 1 +
 include/linux/usb/atmel_usba_udc.h  | 1 +
 4 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index b13d1879e51b..3a4bc1a18433 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1770,10 +1770,13 @@ at32_add_device_usba(unsigned int id, struct usba_platform_data *data)
 					  ARRAY_SIZE(usba0_resource)))
 		goto out_free_pdev;
 
-	if (data)
+	if (data) {
 		usba_data.pdata.vbus_pin = data->vbus_pin;
-	else
+		usba_data.pdata.vbus_pin_inverted = data->vbus_pin_inverted;
+	} else {
 		usba_data.pdata.vbus_pin = -EINVAL;
+		usba_data.pdata.vbus_pin_inverted = -EINVAL;
+	}
 
 	data = &usba_data.pdata;
 	data->num_ep = ARRAY_SIZE(at32_usba_ep);
diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index 976822f50c70..f79bdfe4bed9 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -320,7 +320,7 @@ static inline void usba_cleanup_debugfs(struct usba_udc *udc)
 static int vbus_is_present(struct usba_udc *udc)
 {
 	if (gpio_is_valid(udc->vbus_pin))
-		return gpio_get_value(udc->vbus_pin);
+		return gpio_get_value(udc->vbus_pin) ^ udc->vbus_pin_inverted;
 
 	/* No Vbus detection: Assume always present */
 	return 1;
@@ -1763,7 +1763,7 @@ static irqreturn_t usba_vbus_irq(int irq, void *devid)
 	if (!udc->driver)
 		goto out;
 
-	vbus = gpio_get_value(udc->vbus_pin);
+	vbus = vbus_is_present(udc);
 	if (vbus != udc->vbus_prev) {
 		if (vbus) {
 			toggle_bias(1);
@@ -2000,6 +2000,7 @@ static int __init usba_udc_probe(struct platform_device *pdev)
 	if (gpio_is_valid(pdata->vbus_pin)) {
 		if (!gpio_request(pdata->vbus_pin, "atmel_usba_udc")) {
 			udc->vbus_pin = pdata->vbus_pin;
+			udc->vbus_pin_inverted = pdata->vbus_pin_inverted;
 
 			ret = request_irq(gpio_to_irq(udc->vbus_pin),
 					usba_vbus_irq, 0,
diff --git a/drivers/usb/gadget/atmel_usba_udc.h b/drivers/usb/gadget/atmel_usba_udc.h
index f7baea307f0d..88a2e07a11a8 100644
--- a/drivers/usb/gadget/atmel_usba_udc.h
+++ b/drivers/usb/gadget/atmel_usba_udc.h
@@ -323,6 +323,7 @@ struct usba_udc {
 	struct platform_device *pdev;
 	int irq;
 	int vbus_pin;
+	int vbus_pin_inverted;
 	struct clk *pclk;
 	struct clk *hclk;
 
diff --git a/include/linux/usb/atmel_usba_udc.h b/include/linux/usb/atmel_usba_udc.h
index 6311fa2d9f82..baf41c8616e9 100644
--- a/include/linux/usb/atmel_usba_udc.h
+++ b/include/linux/usb/atmel_usba_udc.h
@@ -15,6 +15,7 @@ struct usba_ep_data {
 
 struct usba_platform_data {
 	int			vbus_pin;
+	int		 	vbus_pin_inverted;
 	int			num_ep;
 	struct usba_ep_data	ep[0];
 };
-- 
cgit v1.2.3


From efcbd3df079a6f8a8a2d5207c4e8429e02356c79 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 5 Feb 2010 18:09:49 -0800
Subject: USB: Extend and neaten dbg macros

Add format/argument validation for #ifndef DEBUG dbg macro
Neaten dbg macro definitions

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h        | 14 +++++++++-----
 include/linux/usb/serial.h | 13 +++++--------
 2 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index ad50fc8a7ad3..3492abf82e75 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1566,14 +1566,18 @@ extern void usb_register_notify(struct notifier_block *nb);
 extern void usb_unregister_notify(struct notifier_block *nb);
 
 #ifdef DEBUG
-#define dbg(format, arg...) printk(KERN_DEBUG "%s: " format "\n" , \
-	__FILE__ , ## arg)
+#define dbg(format, arg...)						\
+	printk(KERN_DEBUG "%s: " format "\n", __FILE__, ##arg)
 #else
-#define dbg(format, arg...) do {} while (0)
+#define dbg(format, arg...)						\
+do {									\
+	if (0)								\
+		printk(KERN_DEBUG "%s: " format "\n", __FILE__, ##arg); \
+} while (0)
 #endif
 
-#define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \
-	format "\n" , ## arg)
+#define err(format, arg...)					\
+	printk(KERN_ERR KBUILD_MODNAME ": " format "\n", ##arg)
 
 /* debugfs stuff */
 extern struct dentry *usb_debug_root;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 1819396ed501..0a458b861933 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -351,14 +351,11 @@ static inline void usb_serial_debug_data(int debug,
 
 /* Use our own dbg macro */
 #undef dbg
-#define dbg(format, arg...) \
-	do { \
-		if (debug) \
-			printk(KERN_DEBUG "%s: " format "\n" , __FILE__ , \
-				## arg); \
-	} while (0)
-
-
+#define dbg(format, arg...)						\
+do {									\
+	if (debug)							\
+		printk(KERN_DEBUG "%s: " format "\n", __FILE__, ##arg);	\
+} while (0)
 
 #endif /* __LINUX_USB_SERIAL_H */
 
-- 
cgit v1.2.3


From 6d61ae9112960a2b3ed3360602dfb3bfd357954f Mon Sep 17 00:00:00 2001
From: Dennis O'Brien <dennis.obrien@eqware.net>
Date: Mon, 15 Feb 2010 08:50:38 -0800
Subject: USB: vstusb.c: removal of driver for Vernier Software & Technology,
 Inc., devices and spectrometers

This patch removes the vstusb driver and support from the Linux tree.
This driver provided support for Vernier Software & Technology devices
and spectrometers (Ocean Optics). This driver is being replaced by a
user space - libusb - implementation.

Signed-off-by: Jim Collar <jim.collar@eqware.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/ioctl/ioctl-number.txt |   1 -
 drivers/usb/misc/Kconfig             |  14 -
 drivers/usb/misc/Makefile            |   1 -
 drivers/usb/misc/vstusb.c            | 783 -----------------------------------
 include/linux/usb/Kbuild             |   1 -
 include/linux/usb/vstusb.h           |  71 ----
 6 files changed, 871 deletions(-)
 delete mode 100644 drivers/usb/misc/vstusb.c
 delete mode 100644 include/linux/usb/vstusb.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 35cf64d4436d..35c9b51d20ea 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -139,7 +139,6 @@ Code  Seq#(hex)	Include File		Comments
 'K'	all	linux/kd.h
 'L'	00-1F	linux/loop.h		conflict!
 'L'	10-1F	drivers/scsi/mpt2sas/mpt2sas_ctl.h	conflict!
-'L'	20-2F	linux/usb/vstusb.h
 'L'	E0-FF	linux/ppdd.h		encrypted disk device driver
 					<http://linux01.gwdg.de/~alatham/ppdd.html>
 'M'	all	linux/soundcard.h	conflict!
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index ef9bbef7a88b..55660eaf947c 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -231,17 +231,3 @@ config USB_ISIGHTFW
 	  driver beforehand. Tools for doing so are available at
 	  http://bersace03.free.fr
 
-config USB_VST
-	tristate "USB VST driver"
-	depends on USB
-	help
-	  This driver is intended for Vernier Software Technologies
-	  bulk usb devices such as their Ocean-Optics spectrometers or
-	  Labquest.
-	  It is a bulk channel driver with configurable read and write
-	  timeouts.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called vstusb.
-
-
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index 36dd40dda1b3..717703e81425 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_USB_TEST)		+= usbtest.o
 obj-$(CONFIG_USB_TRANCEVIBRATOR)	+= trancevibrator.o
 obj-$(CONFIG_USB_USS720)	+= uss720.o
 obj-$(CONFIG_USB_SEVSEG)	+= usbsevseg.o
-obj-$(CONFIG_USB_VST)		+= vstusb.o
 
 obj-$(CONFIG_USB_SISUSBVGA)	+= sisusbvga/
 
diff --git a/drivers/usb/misc/vstusb.c b/drivers/usb/misc/vstusb.c
deleted file mode 100644
index 874c81bb27b9..000000000000
--- a/drivers/usb/misc/vstusb.c
+++ /dev/null
@@ -1,783 +0,0 @@
-/*****************************************************************************
- *  File: drivers/usb/misc/vstusb.c
- *
- *  Purpose: Support for the bulk USB Vernier Spectrophotometers
- *
- *  Author:     Johnnie Peters
- *              Axian Consulting
- *              Beaverton, OR, USA 97005
- *
- *  Modified by:     EQware Engineering, Inc.
- *                   Oregon City, OR, USA 97045
- *
- *  Copyright:  2007, 2008
- *              Vernier Software & Technology
- *              Beaverton, OR, USA 97005
- *
- *  Web:        www.vernier.com
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *****************************************************************************/
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/uaccess.h>
-#include <linux/usb.h>
-
-#include <linux/usb/vstusb.h>
-
-#define DRIVER_VERSION "VST USB Driver Version 1.5"
-#define DRIVER_DESC "Vernier Software Technology Bulk USB Driver"
-
-#ifdef CONFIG_USB_DYNAMIC_MINORS
-	#define VSTUSB_MINOR_BASE	0
-#else
-	#define VSTUSB_MINOR_BASE	199
-#endif
-
-#define USB_VENDOR_OCEANOPTICS	0x2457
-#define USB_VENDOR_VERNIER	0x08F7	/* Vernier Software & Technology */
-
-#define USB_PRODUCT_USB2000	0x1002
-#define USB_PRODUCT_ADC1000_FW	0x1003	/* firmware download (renumerates) */
-#define USB_PRODUCT_ADC1000	0x1004
-#define USB_PRODUCT_HR2000_FW	0x1009	/* firmware download (renumerates) */
-#define USB_PRODUCT_HR2000	0x100A
-#define USB_PRODUCT_HR4000_FW	0x1011	/* firmware download (renumerates) */
-#define USB_PRODUCT_HR4000	0x1012
-#define USB_PRODUCT_USB650	0x1014	/* "Red Tide" */
-#define USB_PRODUCT_QE65000	0x1018
-#define USB_PRODUCT_USB4000	0x1022
-#define USB_PRODUCT_USB325	0x1024	/* "Vernier Spectrometer" */
-
-#define USB_PRODUCT_LABPRO	0x0001
-#define USB_PRODUCT_LABQUEST	0x0005
-
-#define VST_MAXBUFFER		(64*1024)
-
-static const struct usb_device_id id_table[] = {
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_USB2000)},
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_HR4000)},
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_USB650)},
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_USB4000)},
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_USB325)},
-	{ USB_DEVICE(USB_VENDOR_VERNIER, USB_PRODUCT_LABQUEST)},
-	{ USB_DEVICE(USB_VENDOR_VERNIER, USB_PRODUCT_LABPRO)},
-	{},
-};
-
-MODULE_DEVICE_TABLE(usb, id_table);
-
-struct vstusb_device {
-	struct kref				kref;
-	struct mutex            lock;
-	struct usb_device       *usb_dev;
-	char                    present;
-	char                    isopen;
-	struct usb_anchor       submitted;
-	int                     rd_pipe;
-	int                     rd_timeout_ms;
-	int                     wr_pipe;
-	int                     wr_timeout_ms;
-};
-#define to_vst_dev(d) container_of(d, struct vstusb_device, kref)
-
-static struct usb_driver vstusb_driver;
-
-static void vstusb_delete(struct kref *kref)
-{
-	struct vstusb_device *vstdev = to_vst_dev(kref);
-
-	usb_put_dev(vstdev->usb_dev);
-	kfree(vstdev);
-}
-
-static int vstusb_open(struct inode *inode, struct file *file)
-{
-	struct vstusb_device *vstdev;
-	struct usb_interface *interface;
-
-	interface = usb_find_interface(&vstusb_driver, iminor(inode));
-
-	if (!interface) {
-		printk(KERN_ERR KBUILD_MODNAME
-		       ": %s - error, can't find device for minor %d\n",
-		       __func__, iminor(inode));
-		return -ENODEV;
-	}
-
-	vstdev = usb_get_intfdata(interface);
-
-	if (!vstdev)
-		return -ENODEV;
-
-	/* lock this device */
-	mutex_lock(&vstdev->lock);
-
-	/* can only open one time */
-	if ((!vstdev->present) || (vstdev->isopen)) {
-		mutex_unlock(&vstdev->lock);
-		return -EBUSY;
-	}
-
-	/* increment our usage count */
-	kref_get(&vstdev->kref);
-
-	vstdev->isopen = 1;
-
-	/* save device in the file's private structure */
-	file->private_data = vstdev;
-
-	dev_dbg(&vstdev->usb_dev->dev, "%s: opened\n", __func__);
-
-	mutex_unlock(&vstdev->lock);
-
-	return 0;
-}
-
-static int vstusb_release(struct inode *inode, struct file *file)
-{
-	struct vstusb_device *vstdev;
-
-	vstdev = file->private_data;
-
-	if (vstdev == NULL)
-		return -ENODEV;
-
-	mutex_lock(&vstdev->lock);
-
-	vstdev->isopen = 0;
-
-	dev_dbg(&vstdev->usb_dev->dev, "%s: released\n", __func__);
-
-	mutex_unlock(&vstdev->lock);
-
-	kref_put(&vstdev->kref, vstusb_delete);
-
-	return 0;
-}
-
-static void usb_api_blocking_completion(struct urb *urb)
-{
-	struct completion *completeit = urb->context;
-
-	complete(completeit);
-}
-
-static int vstusb_fill_and_send_urb(struct urb *urb,
-				    struct usb_device *usb_dev,
-				    unsigned int pipe, void *data,
-				    unsigned int len, struct completion *done)
-{
-	struct usb_host_endpoint *ep;
-	struct usb_host_endpoint **hostep;
-	unsigned int pipend;
-
-	int status;
-
-	hostep = usb_pipein(pipe) ? usb_dev->ep_in : usb_dev->ep_out;
-	pipend = usb_pipeendpoint(pipe);
-	ep = hostep[pipend];
-
-	if (!ep || (len == 0))
-		return -EINVAL;
-
-	if ((ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
-	    == USB_ENDPOINT_XFER_INT) {
-		pipe = (pipe & ~(3 << 30)) | (PIPE_INTERRUPT << 30);
-		usb_fill_int_urb(urb, usb_dev, pipe, data, len,
-				 (usb_complete_t)usb_api_blocking_completion,
-				 NULL, ep->desc.bInterval);
-	} else
-		usb_fill_bulk_urb(urb, usb_dev, pipe, data, len,
-				  (usb_complete_t)usb_api_blocking_completion,
-				  NULL);
-
-	init_completion(done);
-	urb->context = done;
-	urb->actual_length = 0;
-	status = usb_submit_urb(urb, GFP_KERNEL);
-
-	return status;
-}
-
-static int vstusb_complete_urb(struct urb *urb, struct completion *done,
-			       int timeout, int *actual_length)
-{
-	unsigned long expire;
-	int status;
-
-	expire = timeout ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT;
-	if (!wait_for_completion_interruptible_timeout(done, expire)) {
-		usb_kill_urb(urb);
-		status = urb->status == -ENOENT ? -ETIMEDOUT : urb->status;
-
-		dev_dbg(&urb->dev->dev,
-			"%s timed out on ep%d%s len=%d/%d, urb status = %d\n",
-			current->comm,
-			usb_pipeendpoint(urb->pipe),
-			usb_pipein(urb->pipe) ? "in" : "out",
-			urb->actual_length,
-			urb->transfer_buffer_length,
-			urb->status);
-
-	} else {
-		if (signal_pending(current)) {
-			/* if really an error */
-			if (urb->status && !((urb->status == -ENOENT)     ||
-					     (urb->status == -ECONNRESET) ||
-					     (urb->status == -ESHUTDOWN))) {
-				status = -EINTR;
-				usb_kill_urb(urb);
-			} else {
-				status = 0;
-			}
-
-			dev_dbg(&urb->dev->dev,
-				"%s: signal pending on ep%d%s len=%d/%d,"
-				"urb status = %d\n",
-				current->comm,
-				usb_pipeendpoint(urb->pipe),
-				usb_pipein(urb->pipe) ? "in" : "out",
-				urb->actual_length,
-				urb->transfer_buffer_length,
-				urb->status);
-
-		} else {
-			status = urb->status;
-		}
-	}
-
-	if (actual_length)
-		*actual_length = urb->actual_length;
-
-	return status;
-}
-
-static ssize_t vstusb_read(struct file *file, char __user *buffer,
-			   size_t count, loff_t *ppos)
-{
-	struct vstusb_device *vstdev;
-	int cnt = -1;
-	void *buf;
-	int retval = 0;
-
-	struct urb              *urb;
-	struct usb_device       *dev;
-	unsigned int            pipe;
-	int                     timeout;
-
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	vstdev = file->private_data;
-
-	if (vstdev == NULL)
-		return -ENODEV;
-
-	/* verify that we actually want to read some data */
-	if ((count == 0) || (count > VST_MAXBUFFER))
-		return -EINVAL;
-
-	/* lock this object */
-	if (mutex_lock_interruptible(&vstdev->lock))
-		return -ERESTARTSYS;
-
-	/* anyone home */
-	if (!vstdev->present) {
-		mutex_unlock(&vstdev->lock);
-		printk(KERN_ERR KBUILD_MODNAME
-		       ": %s: device not present\n", __func__);
-		return -ENODEV;
-	}
-
-	/* pull out the necessary data */
-	dev =     vstdev->usb_dev;
-	pipe =    usb_rcvbulkpipe(dev, vstdev->rd_pipe);
-	timeout = vstdev->rd_timeout_ms;
-
-	buf = kmalloc(count, GFP_KERNEL);
-	if (buf == NULL) {
-		mutex_unlock(&vstdev->lock);
-		return -ENOMEM;
-	}
-
-	urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!urb) {
-		kfree(buf);
-		mutex_unlock(&vstdev->lock);
-		return -ENOMEM;
-	}
-
-	usb_anchor_urb(urb, &vstdev->submitted);
-	retval = vstusb_fill_and_send_urb(urb, dev, pipe, buf, count, &done);
-	mutex_unlock(&vstdev->lock);
-	if (retval) {
-		usb_unanchor_urb(urb);
-		dev_err(&dev->dev, "%s: error %d filling and sending urb %d\n",
-			__func__, retval, pipe);
-		goto exit;
-	}
-
-	retval = vstusb_complete_urb(urb, &done, timeout, &cnt);
-	if (retval) {
-		dev_err(&dev->dev, "%s: error %d completing urb %d\n",
-			__func__, retval, pipe);
-		goto exit;
-	}
-
-	if (copy_to_user(buffer, buf, cnt)) {
-		dev_err(&dev->dev, "%s: can't copy_to_user\n", __func__);
-		retval = -EFAULT;
-	} else {
-		retval = cnt;
-		dev_dbg(&dev->dev, "%s: read %d bytes from pipe %d\n",
-			__func__, cnt, pipe);
-	}
-
-exit:
-	usb_free_urb(urb);
-	kfree(buf);
-	return retval;
-}
-
-static ssize_t vstusb_write(struct file *file, const char __user *buffer,
-			    size_t count, loff_t *ppos)
-{
-	struct vstusb_device *vstdev;
-	int cnt = -1;
-	void *buf;
-	int retval = 0;
-
-	struct urb              *urb;
-	struct usb_device       *dev;
-	unsigned int            pipe;
-	int                     timeout;
-
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	vstdev = file->private_data;
-
-	if (vstdev == NULL)
-		return -ENODEV;
-
-	/* verify that we actually have some data to write */
-	if ((count == 0) || (count > VST_MAXBUFFER))
-		return retval;
-
-	/* lock this object */
-	if (mutex_lock_interruptible(&vstdev->lock))
-		return -ERESTARTSYS;
-
-	/* anyone home */
-	if (!vstdev->present) {
-		mutex_unlock(&vstdev->lock);
-		printk(KERN_ERR KBUILD_MODNAME
-		       ": %s: device not present\n", __func__);
-		return -ENODEV;
-	}
-
-	/* pull out the necessary data */
-	dev =     vstdev->usb_dev;
-	pipe =    usb_sndbulkpipe(dev, vstdev->wr_pipe);
-	timeout = vstdev->wr_timeout_ms;
-
-	buf = kmalloc(count, GFP_KERNEL);
-	if (buf == NULL) {
-		mutex_unlock(&vstdev->lock);
-		return -ENOMEM;
-	}
-
-	urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!urb) {
-		kfree(buf);
-		mutex_unlock(&vstdev->lock);
-		return -ENOMEM;
-	}
-
-	if (copy_from_user(buf, buffer, count)) {
-		mutex_unlock(&vstdev->lock);
-		dev_err(&dev->dev, "%s: can't copy_from_user\n", __func__);
-		retval = -EFAULT;
-		goto exit;
-	}
-
-	usb_anchor_urb(urb, &vstdev->submitted);
-	retval = vstusb_fill_and_send_urb(urb, dev, pipe, buf, count, &done);
-	mutex_unlock(&vstdev->lock);
-	if (retval) {
-		usb_unanchor_urb(urb);
-		dev_err(&dev->dev, "%s: error %d filling and sending urb %d\n",
-			__func__, retval, pipe);
-		goto exit;
-	}
-
-	retval = vstusb_complete_urb(urb, &done, timeout, &cnt);
-	if (retval) {
-		dev_err(&dev->dev, "%s: error %d completing urb %d\n",
-			__func__, retval, pipe);
-		goto exit;
-	} else {
-		retval = cnt;
-		dev_dbg(&dev->dev, "%s: sent %d bytes to pipe %d\n",
-			__func__, cnt, pipe);
-	}
-
-exit:
-	usb_free_urb(urb);
-	kfree(buf);
-	return retval;
-}
-
-static long vstusb_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int retval = 0;
-	int cnt = -1;
-	void __user *data = (void __user *)arg;
-	struct vstusb_args usb_data;
-
-	struct vstusb_device *vstdev;
-	void *buffer = NULL; /* must be initialized. buffer is
-			      *	referenced on exit but not all
-			      * ioctls allocate it */
-
-	struct urb              *urb = NULL; /* must be initialized. urb is
-					      *	referenced on exit but not all
-					      * ioctls allocate it */
-	struct usb_device       *dev;
-	unsigned int            pipe;
-	int                     timeout;
-
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	vstdev = file->private_data;
-
-	if (_IOC_TYPE(cmd) != VST_IOC_MAGIC) {
-		dev_warn(&vstdev->usb_dev->dev,
-			 "%s: ioctl command %x, bad ioctl magic %x, "
-			 "expected %x\n", __func__, cmd,
-			 _IOC_TYPE(cmd), VST_IOC_MAGIC);
-		return -EINVAL;
-	}
-
-	if (vstdev == NULL)
-		return -ENODEV;
-
-	if (copy_from_user(&usb_data, data, sizeof(struct vstusb_args))) {
-		dev_err(&vstdev->usb_dev->dev, "%s: can't copy_from_user\n",
-			__func__);
-		return -EFAULT;
-	}
-
-	/* lock this object */
-	if (mutex_lock_interruptible(&vstdev->lock)) {
-		retval = -ERESTARTSYS;
-		goto exit;
-	}
-
-	/* anyone home */
-	if (!vstdev->present) {
-		mutex_unlock(&vstdev->lock);
-		dev_err(&vstdev->usb_dev->dev, "%s: device not present\n",
-			__func__);
-		retval = -ENODEV;
-		goto exit;
-	}
-
-	/* pull out the necessary data */
-	dev = vstdev->usb_dev;
-
-	switch (cmd) {
-
-	case IOCTL_VSTUSB_CONFIG_RW:
-
-		vstdev->rd_pipe = usb_data.rd_pipe;
-		vstdev->rd_timeout_ms = usb_data.rd_timeout_ms;
-		vstdev->wr_pipe = usb_data.wr_pipe;
-		vstdev->wr_timeout_ms = usb_data.wr_timeout_ms;
-
-		mutex_unlock(&vstdev->lock);
-
-		dev_dbg(&dev->dev, "%s: setting pipes/timeouts, "
-			"rdpipe = %d, rdtimeout = %d, "
-			"wrpipe = %d, wrtimeout = %d\n", __func__,
-			vstdev->rd_pipe, vstdev->rd_timeout_ms,
-			vstdev->wr_pipe, vstdev->wr_timeout_ms);
-		break;
-
-	case IOCTL_VSTUSB_SEND_PIPE:
-
-		if ((usb_data.count == 0) || (usb_data.count > VST_MAXBUFFER)) {
-			mutex_unlock(&vstdev->lock);
-			retval = -EINVAL;
-			goto exit;
-		}
-
-		buffer = kmalloc(usb_data.count, GFP_KERNEL);
-		if (buffer == NULL) {
-			mutex_unlock(&vstdev->lock);
-			retval = -ENOMEM;
-			goto exit;
-		}
-
-		urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!urb) {
-			mutex_unlock(&vstdev->lock);
-			retval = -ENOMEM;
-			goto exit;
-		}
-
-		timeout = usb_data.timeout_ms;
-
-		pipe = usb_sndbulkpipe(dev, usb_data.pipe);
-
-		if (copy_from_user(buffer, usb_data.buffer, usb_data.count)) {
-			dev_err(&dev->dev, "%s: can't copy_from_user\n",
-				__func__);
-			mutex_unlock(&vstdev->lock);
-			retval = -EFAULT;
-			goto exit;
-		}
-
-		usb_anchor_urb(urb, &vstdev->submitted);
-		retval = vstusb_fill_and_send_urb(urb, dev, pipe, buffer,
-						  usb_data.count, &done);
-		mutex_unlock(&vstdev->lock);
-		if (retval) {
-			usb_unanchor_urb(urb);
-			dev_err(&dev->dev,
-				"%s: error %d filling and sending urb %d\n",
-				__func__, retval, pipe);
-			goto exit;
-		}
-
-		retval = vstusb_complete_urb(urb, &done, timeout, &cnt);
-		if (retval) {
-			dev_err(&dev->dev, "%s: error %d completing urb %d\n",
-				__func__, retval, pipe);
-		}
-
-		break;
-	case IOCTL_VSTUSB_RECV_PIPE:
-
-		if ((usb_data.count == 0) || (usb_data.count > VST_MAXBUFFER)) {
-			mutex_unlock(&vstdev->lock);
-			retval = -EINVAL;
-			goto exit;
-		}
-
-		buffer = kmalloc(usb_data.count, GFP_KERNEL);
-		if (buffer == NULL) {
-			mutex_unlock(&vstdev->lock);
-			retval = -ENOMEM;
-			goto exit;
-		}
-
-		urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!urb) {
-			mutex_unlock(&vstdev->lock);
-			retval = -ENOMEM;
-			goto exit;
-		}
-
-		timeout = usb_data.timeout_ms;
-
-		pipe = usb_rcvbulkpipe(dev, usb_data.pipe);
-
-		usb_anchor_urb(urb, &vstdev->submitted);
-		retval = vstusb_fill_and_send_urb(urb, dev, pipe, buffer,
-						  usb_data.count, &done);
-		mutex_unlock(&vstdev->lock);
-		if (retval) {
-			usb_unanchor_urb(urb);
-			dev_err(&dev->dev,
-				"%s: error %d filling and sending urb %d\n",
-				__func__, retval, pipe);
-			goto exit;
-		}
-
-		retval = vstusb_complete_urb(urb, &done, timeout, &cnt);
-		if (retval) {
-			dev_err(&dev->dev, "%s: error %d completing urb %d\n",
-				__func__, retval, pipe);
-			goto exit;
-		}
-
-		if (copy_to_user(usb_data.buffer, buffer, cnt)) {
-			dev_err(&dev->dev, "%s: can't copy_to_user\n",
-				__func__);
-			retval = -EFAULT;
-			goto exit;
-		}
-
-		usb_data.count = cnt;
-		if (copy_to_user(data, &usb_data, sizeof(struct vstusb_args))) {
-			dev_err(&dev->dev, "%s: can't copy_to_user\n",
-				__func__);
-			retval = -EFAULT;
-		} else {
-			dev_dbg(&dev->dev, "%s: recv %zd bytes from pipe %d\n",
-				__func__, usb_data.count, usb_data.pipe);
-		}
-
-		break;
-
-	default:
-		mutex_unlock(&vstdev->lock);
-		dev_warn(&dev->dev, "ioctl_vstusb: invalid ioctl cmd %x\n",
-			 cmd);
-		return -EINVAL;
-		break;
-	}
-exit:
-	usb_free_urb(urb);
-	kfree(buffer);
-	return retval;
-}
-
-static const struct file_operations vstusb_fops = {
-	.owner =                THIS_MODULE,
-	.read =                 vstusb_read,
-	.write =                vstusb_write,
-	.unlocked_ioctl =       vstusb_ioctl,
-	.compat_ioctl =         vstusb_ioctl,
-	.open =                 vstusb_open,
-	.release =              vstusb_release,
-};
-
-static struct usb_class_driver usb_vstusb_class = {
-	.name =         "usb/vstusb%d",
-	.fops =         &vstusb_fops,
-	.minor_base =   VSTUSB_MINOR_BASE,
-};
-
-static int vstusb_probe(struct usb_interface *intf,
-			const struct usb_device_id *id)
-{
-	struct usb_device *dev = interface_to_usbdev(intf);
-	struct vstusb_device *vstdev;
-	int i;
-	int retval = 0;
-
-	/* allocate memory for our device state and intialize it */
-
-	vstdev = kzalloc(sizeof(*vstdev), GFP_KERNEL);
-	if (vstdev == NULL)
-		return -ENOMEM;
-
-	/* must do usb_get_dev() prior to kref_init() since the kref_put()
-	 * release function will do a usb_put_dev() */
-	usb_get_dev(dev);
-	kref_init(&vstdev->kref);
-	mutex_init(&vstdev->lock);
-
-	i = dev->descriptor.bcdDevice;
-
-	dev_dbg(&intf->dev, "Version %1d%1d.%1d%1d found at address %d\n",
-		(i & 0xF000) >> 12, (i & 0xF00) >> 8,
-		(i & 0xF0) >> 4, (i & 0xF), dev->devnum);
-
-	vstdev->present = 1;
-	vstdev->isopen = 0;
-	vstdev->usb_dev = dev;
-	init_usb_anchor(&vstdev->submitted);
-
-	usb_set_intfdata(intf, vstdev);
-	retval = usb_register_dev(intf, &usb_vstusb_class);
-	if (retval) {
-		dev_err(&intf->dev,
-			"%s: Not able to get a minor for this device.\n",
-			__func__);
-		usb_set_intfdata(intf, NULL);
-		kref_put(&vstdev->kref, vstusb_delete);
-		return retval;
-	}
-
-	/* let the user know what node this device is now attached to */
-	dev_info(&intf->dev,
-		 "VST USB Device #%d now attached to major %d minor %d\n",
-		 (intf->minor - VSTUSB_MINOR_BASE), USB_MAJOR, intf->minor);
-
-	dev_info(&intf->dev, "%s, %s\n", DRIVER_DESC, DRIVER_VERSION);
-
-	return retval;
-}
-
-static void vstusb_disconnect(struct usb_interface *intf)
-{
-	struct vstusb_device *vstdev = usb_get_intfdata(intf);
-
-	usb_deregister_dev(intf, &usb_vstusb_class);
-	usb_set_intfdata(intf, NULL);
-
-	if (vstdev) {
-
-		mutex_lock(&vstdev->lock);
-		vstdev->present = 0;
-
-		usb_kill_anchored_urbs(&vstdev->submitted);
-
-		mutex_unlock(&vstdev->lock);
-
-		kref_put(&vstdev->kref, vstusb_delete);
-	}
-
-}
-
-static int vstusb_suspend(struct usb_interface *intf, pm_message_t message)
-{
-	struct vstusb_device *vstdev = usb_get_intfdata(intf);
-	int time;
-	if (!vstdev)
-		return 0;
-
-	mutex_lock(&vstdev->lock);
-	time = usb_wait_anchor_empty_timeout(&vstdev->submitted, 1000);
-	if (!time)
-		usb_kill_anchored_urbs(&vstdev->submitted);
-	mutex_unlock(&vstdev->lock);
-
-	return 0;
-}
-
-static int vstusb_resume(struct usb_interface *intf)
-{
-	return 0;
-}
-
-static struct usb_driver vstusb_driver = {
-	.name =         "vstusb",
-	.probe =        vstusb_probe,
-	.disconnect =   vstusb_disconnect,
-	.suspend =      vstusb_suspend,
-	.resume =       vstusb_resume,
-	.id_table = id_table,
-};
-
-static int __init vstusb_init(void)
-{
-	int rc;
-
-	rc = usb_register(&vstusb_driver);
-	if (rc)
-		printk(KERN_ERR "%s: failed to register (%d)", __func__, rc);
-
-	return rc;
-}
-
-static void __exit vstusb_exit(void)
-{
-	usb_deregister(&vstusb_driver);
-}
-
-module_init(vstusb_init);
-module_exit(vstusb_exit);
-
-MODULE_AUTHOR("Dennis O'Brien/Stephen Ware");
-MODULE_DESCRIPTION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild
index 54c446309a2a..29fd73b0bffc 100644
--- a/include/linux/usb/Kbuild
+++ b/include/linux/usb/Kbuild
@@ -5,4 +5,3 @@ header-y += gadgetfs.h
 header-y += midi.h
 header-y += g_printer.h
 header-y += tmc.h
-header-y += vstusb.h
diff --git a/include/linux/usb/vstusb.h b/include/linux/usb/vstusb.h
deleted file mode 100644
index 1cfac67191ff..000000000000
--- a/include/linux/usb/vstusb.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*****************************************************************************
- *  File: drivers/usb/misc/vstusb.h
- *
- *  Purpose: Support for the bulk USB Vernier Spectrophotometers
- *
- *  Author:     EQware Engineering, Inc.
- *              Oregon City, OR, USA 97045
- *
- *  Copyright:  2007, 2008
- *              Vernier Software & Technology
- *              Beaverton, OR, USA 97005
- *
- *  Web:        www.vernier.com
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *****************************************************************************/
-/*****************************************************************************
- *
- *  The vstusb module is a standard usb 'client' driver running on top of the
- *  standard usb host controller stack.
- *
- *  In general, vstusb supports standard bulk usb pipes.  It supports multiple
- *  devices and multiple pipes per device.
- *
- *  The vstusb driver supports two interfaces:
- *  1 - ioctl SEND_PIPE/RECV_PIPE - a general bulk write/read msg
- *  	interface to any pipe with timeout support;
- *  2 - standard read/write with ioctl config - offers standard read/write
- *  	interface with ioctl configured pipes and timeouts.
- *
- *  Both interfaces can be signal from other process and will abort its i/o
- *  operation.
- *
- *  A timeout of 0 means NO timeout.  The user can still terminate the read via
- *  signal.
- *
- *  If using multiple threads with this driver, the user should ensure that
- *  any reads, writes, or ioctls are complete before closing the device.
- *  Changing read/write timeouts or pipes takes effect on next read/write.
- *
- *****************************************************************************/
-
-struct vstusb_args {
-	union {
-		/* this struct is used for IOCTL_VSTUSB_SEND_PIPE,	*
-		 * IOCTL_VSTUSB_RECV_PIPE, and read()/write() fops	*/
-		struct {
-			void __user	*buffer;
-			size_t          count;
-			unsigned int    timeout_ms;
-			int             pipe;
-		};
-
-		/* this one is used for IOCTL_VSTUSB_CONFIG_RW  	*/
-		struct {
-			int rd_pipe;
-			int rd_timeout_ms;
-			int wr_pipe;
-			int wr_timeout_ms;
-		};
-	};
-};
-
-#define VST_IOC_MAGIC 'L'
-#define VST_IOC_FIRST 0x20
-#define IOCTL_VSTUSB_SEND_PIPE	_IO(VST_IOC_MAGIC, VST_IOC_FIRST)
-#define IOCTL_VSTUSB_RECV_PIPE	_IO(VST_IOC_MAGIC, VST_IOC_FIRST + 1)
-#define IOCTL_VSTUSB_CONFIG_RW	_IO(VST_IOC_MAGIC, VST_IOC_FIRST + 2)
-- 
cgit v1.2.3


From 2832fc11f1360668482beec06dbcd631ae5f0cf1 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 18 Feb 2010 16:43:54 +0000
Subject: USB: tty: Add a function to insert a string of characters with the
 same flag

The USB drivers often want to insert a series of bytes all with the same
flag set - provide a helper for this case.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_buffer.c | 11 ++++++-----
 include/linux/tty_flip.h  |  7 ++++++-
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_buffer.c b/drivers/char/tty_buffer.c
index 66fa4e10d76b..4c133459ab7e 100644
--- a/drivers/char/tty_buffer.c
+++ b/drivers/char/tty_buffer.c
@@ -231,9 +231,10 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size)
 EXPORT_SYMBOL_GPL(tty_buffer_request_room);
 
 /**
- *	tty_insert_flip_string	-	Add characters to the tty buffer
+ *	tty_insert_flip_string_fixed_flag - Add characters to the tty buffer
  *	@tty: tty structure
  *	@chars: characters
+ *	@flag: flag value for each character
  *	@size: size
  *
  *	Queue a series of bytes to the tty buffering. All the characters
@@ -242,8 +243,8 @@ EXPORT_SYMBOL_GPL(tty_buffer_request_room);
  *	Locking: Called functions may take tty->buf.lock
  */
 
-int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
-				size_t size)
+int tty_insert_flip_string_fixed_flag(struct tty_struct *tty,
+		const unsigned char *chars, char flag, size_t size)
 {
 	int copied = 0;
 	do {
@@ -253,7 +254,7 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
 		if (unlikely(space == 0))
 			break;
 		memcpy(tb->char_buf_ptr + tb->used, chars, space);
-		memset(tb->flag_buf_ptr + tb->used, TTY_NORMAL, space);
+		memset(tb->flag_buf_ptr + tb->used, flag, space);
 		tb->used += space;
 		copied += space;
 		chars += space;
@@ -262,7 +263,7 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
 	} while (unlikely(size > copied));
 	return copied;
 }
-EXPORT_SYMBOL(tty_insert_flip_string);
+EXPORT_SYMBOL(tty_insert_flip_string_fixed_flag);
 
 /**
  *	tty_insert_flip_string_flags	-	Add characters to the tty buffer
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index eb677cf56106..9239d033a0a3 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -2,8 +2,8 @@
 #define _LINUX_TTY_FLIP_H
 
 extern int tty_buffer_request_room(struct tty_struct *tty, size_t size);
-extern int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, size_t size);
 extern int tty_insert_flip_string_flags(struct tty_struct *tty, const unsigned char *chars, const char *flags, size_t size);
+extern int tty_insert_flip_string_fixed_flag(struct tty_struct *tty, const unsigned char *chars, char flag, size_t size);
 extern int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size);
 extern int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size);
 void tty_schedule_flip(struct tty_struct *tty);
@@ -20,4 +20,9 @@ static inline int tty_insert_flip_char(struct tty_struct *tty,
 	return tty_insert_flip_string_flags(tty, &ch, &flag, 1);
 }
 
+static inline int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, size_t size)
+{
+	return tty_insert_flip_string_fixed_flag(tty, chars, TTY_NORMAL, size);
+}
+
 #endif /* _LINUX_TTY_FLIP_H */
-- 
cgit v1.2.3


From 84b6826306119dc3c41ef9d7ed6c408112f63301 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 1 Dec 2009 21:12:27 +0000
Subject: regulator: Add notifier event on regulator disable

The intended use case is for drivers which disable regulators to save
power but need to do some work to restore the hardware state when
restarting.  If the supplies are not actually disabled due to board
limits or sharing with other active devices this notifier allows the
driver to avoid unneeded reinitialisation, particularly when used with
runtime PM.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c           | 7 +++++--
 include/linux/regulator/consumer.h | 4 +++-
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index b60a4c9f8f16..6d2ce8a05331 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1341,6 +1341,9 @@ static int _regulator_disable(struct regulator_dev *rdev)
 				       __func__, rdev_get_name(rdev));
 				return ret;
 			}
+
+			_notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE,
+					     NULL);
 		}
 
 		/* decrease our supplies ref count and disable if required */
@@ -1399,8 +1402,8 @@ static int _regulator_force_disable(struct regulator_dev *rdev)
 			return ret;
 		}
 		/* notify other consumers that power has been forced off */
-		_notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE,
-			NULL);
+		_notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE |
+			REGULATOR_EVENT_DISABLE, NULL);
 	}
 
 	/* decrease our supplies ref count and disable if required */
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 030d92255c7a..28c9fd020d39 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -89,8 +89,9 @@
  * REGULATION_OUT Regulator output is out of regulation.
  * FAIL           Regulator output has failed.
  * OVER_TEMP      Regulator over temp.
- * FORCE_DISABLE  Regulator shut down by software.
+ * FORCE_DISABLE  Regulator forcibly shut down by software.
  * VOLTAGE_CHANGE Regulator voltage changed.
+ * DISABLE        Regulator was disabled.
  *
  * NOTE: These events can be OR'ed together when passed into handler.
  */
@@ -102,6 +103,7 @@
 #define REGULATOR_EVENT_OVER_TEMP		0x10
 #define REGULATOR_EVENT_FORCE_DISABLE		0x20
 #define REGULATOR_EVENT_VOLTAGE_CHANGE		0x40
+#define REGULATOR_EVENT_DISABLE 		0x80
 
 struct regulator;
 
-- 
cgit v1.2.3


From 31aae2beeb3d601d556b6a8c39085940ad1e9f42 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 21 Dec 2009 12:21:52 +0000
Subject: regulator: Allow regulators to specify the time taken to ramp on
 enable

Regulators may sometimes take longer to enable than the control operation
used to do so, either because the regulator has ramp rate control used to
limit inrush current or because the control operation is very fast (GPIO
being the most common example of this).  In order to ensure that consumers
do not rely on the regulator before it is enabled provide an enable_time()
operation and have the core delay for that time before returning to the
caller.

This is implemented as a function since the ramp rate may be specified in
voltage per unit time and therefore the time depend on the configuration.
In future it would be desirable to allow the bulk operations to run the
delays for multiple enables in parallel but this is not currently supported.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c         | 41 ++++++++++++++++++++++++++++++++++------
 include/linux/regulator/driver.h |  6 ++++++
 2 files changed, 41 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 6d2ce8a05331..ca8e1642538b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -19,6 +19,7 @@
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/suspend.h>
+#include <linux/delay.h>
 #include <linux/regulator/consumer.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
@@ -1084,6 +1085,13 @@ overflow_err:
 	return NULL;
 }
 
+static int _regulator_get_enable_time(struct regulator_dev *rdev)
+{
+	if (!rdev->desc->ops->enable_time)
+		return 0;
+	return rdev->desc->ops->enable_time(rdev);
+}
+
 /* Internal regulator request function */
 static struct regulator *_regulator_get(struct device *dev, const char *id,
 					int exclusive)
@@ -1251,7 +1259,7 @@ static int _regulator_can_change_status(struct regulator_dev *rdev)
 /* locks held by regulator_enable() */
 static int _regulator_enable(struct regulator_dev *rdev)
 {
-	int ret;
+	int ret, delay;
 
 	/* do we need to enable the supply regulator first */
 	if (rdev->supply) {
@@ -1275,13 +1283,34 @@ static int _regulator_enable(struct regulator_dev *rdev)
 			if (!_regulator_can_change_status(rdev))
 				return -EPERM;
 
-			if (rdev->desc->ops->enable) {
-				ret = rdev->desc->ops->enable(rdev);
-				if (ret < 0)
-					return ret;
-			} else {
+			if (!rdev->desc->ops->enable)
 				return -EINVAL;
+
+			/* Query before enabling in case configuration
+			 * dependant.  */
+			ret = _regulator_get_enable_time(rdev);
+			if (ret >= 0) {
+				delay = ret;
+			} else {
+				printk(KERN_WARNING
+					"%s: enable_time() failed for %s: %d\n",
+					__func__, rdev_get_name(rdev),
+					ret);
+				delay = 0;
 			}
+
+			/* Allow the regulator to ramp; it would be useful
+			 * to extend this for bulk operations so that the
+			 * regulators can ramp together.  */
+			ret = rdev->desc->ops->enable(rdev);
+			if (ret < 0)
+				return ret;
+
+			if (delay >= 1000)
+				mdelay(delay / 1000);
+			else if (delay)
+				udelay(delay);
+
 		} else if (ret < 0) {
 			printk(KERN_ERR "%s: is_enabled() failed for %s: %d\n",
 			       __func__, rdev_get_name(rdev), ret);
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 31f2055eae28..592cd7c642c2 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -58,6 +58,9 @@ enum regulator_status {
  * @get_optimum_mode: Get the most efficient operating mode for the regulator
  *                    when running with the specified parameters.
  *
+ * @enable_time: Time taken for the regulator voltage output voltage to
+ *               stabalise after being enabled, in microseconds.
+ *
  * @set_suspend_voltage: Set the voltage for the regulator when the system
  *                       is suspended.
  * @set_suspend_enable: Mark the regulator as enabled when the system is
@@ -93,6 +96,9 @@ struct regulator_ops {
 	int (*set_mode) (struct regulator_dev *, unsigned int mode);
 	unsigned int (*get_mode) (struct regulator_dev *);
 
+	/* Time taken to enable the regulator */
+	int (*enable_time) (struct regulator_dev *);
+
 	/* report regulator status ... most other accessors report
 	 * control inputs, this reports results of combining inputs
 	 * from Linux (and other sources) with the actual load.
-- 
cgit v1.2.3


From eda79a3041a2cada0d4ee9491c99c3874b322356 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 12 Jan 2010 12:25:13 +0200
Subject: regulator: Add 'start-up time' to fixed voltage regulators

Add a field to specify a delay for the start-up time of
a fixed voltage regulator.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/fixed.c       | 5 +++++
 include/linux/regulator/fixed.h | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index f9f516a3028a..ee3e7eb97b1c 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -24,12 +24,14 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/fixed.h>
 #include <linux/gpio.h>
+#include <linux/delay.h>
 
 struct fixed_voltage_data {
 	struct regulator_desc desc;
 	struct regulator_dev *dev;
 	int microvolts;
 	int gpio;
+	unsigned startup_delay;
 	unsigned enable_high:1;
 	unsigned is_enabled:1;
 };
@@ -48,6 +50,8 @@ static int fixed_voltage_enable(struct regulator_dev *dev)
 	if (gpio_is_valid(data->gpio)) {
 		gpio_set_value_cansleep(data->gpio, data->enable_high);
 		data->is_enabled = 1;
+		if (data->startup_delay)
+			udelay(data->startup_delay);
 	}
 
 	return 0;
@@ -117,6 +121,7 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 
 	drvdata->microvolts = config->microvolts;
 	drvdata->gpio = config->gpio;
+	drvdata->startup_delay = config->startup_delay;
 
 	if (gpio_is_valid(config->gpio)) {
 		drvdata->enable_high = config->enable_high;
diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
index e94a4a1c7c8a..ffd7d508e726 100644
--- a/include/linux/regulator/fixed.h
+++ b/include/linux/regulator/fixed.h
@@ -25,6 +25,7 @@ struct regulator_init_data;
  * @microvolts:		Output voltage of regulator
  * @gpio:		GPIO to use for enable control
  * 			set to -EINVAL if not used
+ * @startup_delay:	Start-up time in microseconds
  * @enable_high:	Polarity of enable GPIO
  *			1 = Active high, 0 = Active low
  * @enabled_at_boot:	Whether regulator has been enabled at
@@ -41,6 +42,7 @@ struct fixed_voltage_config {
 	const char *supply_name;
 	int microvolts;
 	int gpio;
+	unsigned startup_delay;
 	unsigned enable_high:1;
 	unsigned enabled_at_boot:1;
 	struct regulator_init_data *init_data;
-- 
cgit v1.2.3


From f4b97b36b7c6b2d4455f27d6371869f915cbe8fd Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Tue, 19 Jan 2010 12:48:54 +0100
Subject: regulator: mc13783: consider Power Gates as digital regulators.

GPO regulators are digital outputs that can be enabled or disabled by a
dedicated bit in mc13783 POWERMISC register.
In this family can be count in also Power Gates (PWGT1 and 2): enabled by
a dedicated pin a Power Gate is an hardware driven supply where the output
(PWGTnDRV) follow this law:

 Bit PWGTxSPIEN | Pin PWGTxEN | PWGTxDRV |  Read Back
   0 = default  |             |          | PWGTxSPIEN
 ---------------+-------------+----------+------------
       1        |      x      |   Low    |     0
       0        |      0      |   High   |     1
       0        |      1      |   Low    |     0

As read back value of control bit reflects the PWGTxDRV state (not the
control value previously written) and mc13783 POWERMISC register contain
only regulator related bits, a dedicated function to manage these bits is
created here with the aim of tracing the real value of PWGTxSPIEN bits
and reproduce it on next writes.

All POWERMISC users _must_ use the new function to not accidentally
disable Power Gates supplies.

v2 changes:
-Better utilization of abstraction layers.
-Voltage query support. GPO's and PWGTxDRV are fixed voltage regulator
 with voltage value of 3.1V and 5.5V respectively.

Signed-off-by: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/mc13783-regulator.c | 132 ++++++++++++++++++++++++++++++++--
 include/linux/mfd/mc13783.h           |   2 +
 2 files changed, 128 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/mc13783-regulator.c b/drivers/regulator/mc13783-regulator.c
index a40e35ab8555..f7b81845a196 100644
--- a/drivers/regulator/mc13783-regulator.c
+++ b/drivers/regulator/mc13783-regulator.c
@@ -82,6 +82,11 @@
 #define MC13783_REG_POWERMISC_GPO2EN			(1 << 8)
 #define MC13783_REG_POWERMISC_GPO3EN			(1 << 10)
 #define MC13783_REG_POWERMISC_GPO4EN			(1 << 12)
+#define MC13783_REG_POWERMISC_PWGT1SPIEN		(1 << 15)
+#define MC13783_REG_POWERMISC_PWGT2SPIEN		(1 << 16)
+
+#define MC13783_REG_POWERMISC_PWGTSPI_M			(3 << 15)
+
 
 struct mc13783_regulator {
 	struct regulator_desc desc;
@@ -161,8 +166,17 @@ static const int const mc13783_vrf_val[] = {
 	1500000, 1875000, 2700000, 2775000,
 };
 
+static const int const mc13783_gpo_val[] = {
+	3100000,
+};
+
+static const int const mc13783_pwgtdrv_val[] = {
+	5500000,
+};
+
 static struct regulator_ops mc13783_regulator_ops;
 static struct regulator_ops mc13783_fixed_regulator_ops;
+static struct regulator_ops mc13783_gpo_regulator_ops;
 
 #define MC13783_DEFINE(prefix, _name, _reg, _vsel_reg, _voltages)	\
 	[MC13783_ ## prefix ## _ ## _name] = {				\
@@ -197,17 +211,19 @@ static struct regulator_ops mc13783_fixed_regulator_ops;
 		.voltages =  _voltages,					\
 	}
 
-#define MC13783_GPO_DEFINE(prefix, _name, _reg)				\
+#define MC13783_GPO_DEFINE(prefix, _name, _reg,  _voltages)		\
 	[MC13783_ ## prefix ## _ ## _name] = {				\
 		.desc = {						\
 			.name = #prefix "_" #_name,			\
-			.ops = &mc13783_regulator_ops,			\
+			.n_voltages = ARRAY_SIZE(_voltages),		\
+			.ops = &mc13783_gpo_regulator_ops,		\
 			.type = REGULATOR_VOLTAGE,			\
 			.id = MC13783_ ## prefix ## _ ## _name,		\
 			.owner = THIS_MODULE,				\
 		},							\
 		.reg = MC13783_REG_ ## _reg,				\
 		.enable_bit = MC13783_REG_ ## _reg ## _ ## _name ## EN,	\
+		.voltages =  _voltages,					\
 	}
 
 #define MC13783_DEFINE_SW(_name, _reg, _vsel_reg, _voltages)		\
@@ -249,14 +265,17 @@ static struct mc13783_regulator mc13783_regulators[] = {
 			    mc13783_vmmc_val),
 	MC13783_DEFINE_REGU(VMMC2, REGULATORMODE1, REGULATORSETTING1,	\
 			    mc13783_vmmc_val),
-	MC13783_GPO_DEFINE(REGU, GPO1, POWERMISC),
-	MC13783_GPO_DEFINE(REGU, GPO2, POWERMISC),
-	MC13783_GPO_DEFINE(REGU, GPO3, POWERMISC),
-	MC13783_GPO_DEFINE(REGU, GPO4, POWERMISC),
+	MC13783_GPO_DEFINE(REGU, GPO1, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO2, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO3, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO4, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, PWGT1SPI, POWERMISC, mc13783_pwgtdrv_val),
+	MC13783_GPO_DEFINE(REGU, PWGT2SPI, POWERMISC, mc13783_pwgtdrv_val),
 };
 
 struct mc13783_regulator_priv {
 	struct mc13783 *mc13783;
+	u32 powermisc_pwgt_state;
 	struct regulator_dev *regulators[];
 };
 
@@ -445,6 +464,107 @@ static struct regulator_ops mc13783_fixed_regulator_ops = {
 	.get_voltage = mc13783_fixed_regulator_get_voltage,
 };
 
+int mc13783_powermisc_rmw(struct mc13783_regulator_priv *priv, u32 mask,
+									u32 val)
+{
+	struct mc13783 *mc13783 = priv->mc13783;
+	int ret;
+	u32 valread;
+
+	BUG_ON(val & ~mask);
+
+	ret = mc13783_reg_read(mc13783, MC13783_REG_POWERMISC, &valread);
+	if (ret)
+		return ret;
+
+	/* Update the stored state for Power Gates. */
+	priv->powermisc_pwgt_state =
+				(priv->powermisc_pwgt_state & ~mask) | val;
+	priv->powermisc_pwgt_state &= MC13783_REG_POWERMISC_PWGTSPI_M;
+
+	/* Construct the new register value */
+	valread = (valread & ~mask) | val;
+	/* Overwrite the PWGTxEN with the stored version */
+	valread = (valread & ~MC13783_REG_POWERMISC_PWGTSPI_M) |
+						priv->powermisc_pwgt_state;
+
+	return mc13783_reg_write(mc13783, MC13783_REG_POWERMISC, valread);
+}
+
+static int mc13783_gpo_regulator_enable(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	int ret;
+	u32 en_val = mc13783_regulators[id].enable_bit;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id);
+
+	/* Power Gate enable value is 0 */
+	if (id == MC13783_REGU_PWGT1SPI ||
+	    id == MC13783_REGU_PWGT2SPI)
+		en_val = 0;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_powermisc_rmw(priv, mc13783_regulators[id].enable_bit,
+					en_val);
+	mc13783_unlock(priv->mc13783);
+
+	return ret;
+}
+
+static int mc13783_gpo_regulator_disable(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	int ret;
+	u32 dis_val = 0;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id);
+
+	/* Power Gate disable value is 1 */
+	if (id == MC13783_REGU_PWGT1SPI ||
+	    id == MC13783_REGU_PWGT2SPI)
+		dis_val = mc13783_regulators[id].enable_bit;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_powermisc_rmw(priv, mc13783_regulators[id].enable_bit,
+					dis_val);
+	mc13783_unlock(priv->mc13783);
+
+	return ret;
+}
+
+static int mc13783_gpo_regulator_is_enabled(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int ret, id = rdev_get_id(rdev);
+	unsigned int val;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_reg_read(priv->mc13783, mc13783_regulators[id].reg, &val);
+	mc13783_unlock(priv->mc13783);
+
+	if (ret)
+		return ret;
+
+	/* Power Gates state is stored in powermisc_pwgt_state
+	 * where the meaning of bits is negated */
+	val = (val & ~MC13783_REG_POWERMISC_PWGTSPI_M) |
+	      (priv->powermisc_pwgt_state ^ MC13783_REG_POWERMISC_PWGTSPI_M);
+
+	return (val & mc13783_regulators[id].enable_bit) != 0;
+}
+
+static struct regulator_ops mc13783_gpo_regulator_ops = {
+	.enable = mc13783_gpo_regulator_enable,
+	.disable = mc13783_gpo_regulator_disable,
+	.is_enabled = mc13783_gpo_regulator_is_enabled,
+	.list_voltage = mc13783_regulator_list_voltage,
+	.set_voltage = mc13783_fixed_regulator_set_voltage,
+	.get_voltage = mc13783_fixed_regulator_get_voltage,
+};
+
 static int __devinit mc13783_regulator_probe(struct platform_device *pdev)
 {
 	struct mc13783_regulator_priv *priv;
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index 35680409b8cf..94cb51a64037 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -108,6 +108,8 @@ int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 #define	MC13783_REGU_V2		28
 #define	MC13783_REGU_V3		29
 #define	MC13783_REGU_V4		30
+#define	MC13783_REGU_PWGT1SPI	31
+#define	MC13783_REGU_PWGT2SPI	32
 
 #define MC13783_IRQ_ADCDONE	0
 #define MC13783_IRQ_ADCBISDONE	1
-- 
cgit v1.2.3


From a71b797fdc672714bfff1fdc142042a95e97d7ba Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Mon, 25 Jan 2010 10:24:09 -0500
Subject: regulator: enable max8649 regulator driver

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig         |   7 +
 drivers/regulator/Makefile        |   1 +
 drivers/regulator/max8649.c       | 408 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/max8649.h |  44 ++++
 4 files changed, 460 insertions(+)
 create mode 100644 drivers/regulator/max8649.c
 create mode 100644 include/linux/regulator/max8649.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index c565e0d87287..2bc01ee9d9f2 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -69,6 +69,13 @@ config REGULATOR_MAX1586
 	  regulator via I2C bus. The provided regulator is suitable
 	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
 
+config REGULATOR_MAX8649
+	tristate "Maxim 8649 voltage regulator"
+	depends on I2C
+	help
+	  This driver controls a Maxim 8649 voltage output regulator via
+	  I2C bus.
+
 config REGULATOR_MAX8660
 	tristate "Maxim 8660/8661 voltage regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index b3c806c79415..075835be4396 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
+obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
 obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o
diff --git a/drivers/regulator/max8649.c b/drivers/regulator/max8649.c
new file mode 100644
index 000000000000..3ebdf698c648
--- /dev/null
+++ b/drivers/regulator/max8649.c
@@ -0,0 +1,408 @@
+/*
+ * Regulators driver for Maxim max8649
+ *
+ * Copyright (C) 2009-2010 Marvell International Ltd.
+ *      Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/max8649.h>
+
+#define MAX8649_DCDC_VMIN	750000		/* uV */
+#define MAX8649_DCDC_VMAX	1380000		/* uV */
+#define MAX8649_DCDC_STEP	10000		/* uV */
+#define MAX8649_VOL_MASK	0x3f
+
+/* Registers */
+#define MAX8649_MODE0		0x00
+#define MAX8649_MODE1		0x01
+#define MAX8649_MODE2		0x02
+#define MAX8649_MODE3		0x03
+#define MAX8649_CONTROL		0x04
+#define MAX8649_SYNC		0x05
+#define MAX8649_RAMP		0x06
+#define MAX8649_CHIP_ID1	0x08
+#define MAX8649_CHIP_ID2	0x09
+
+/* Bits */
+#define MAX8649_EN_PD		(1 << 7)
+#define MAX8649_VID0_PD		(1 << 6)
+#define MAX8649_VID1_PD		(1 << 5)
+#define MAX8649_VID_MASK	(3 << 5)
+
+#define MAX8649_FORCE_PWM	(1 << 7)
+#define MAX8649_SYNC_EXTCLK	(1 << 6)
+
+#define MAX8649_EXT_MASK	(3 << 6)
+
+#define MAX8649_RAMP_MASK	(7 << 5)
+#define MAX8649_RAMP_DOWN	(1 << 1)
+
+struct max8649_regulator_info {
+	struct regulator_dev	*regulator;
+	struct i2c_client	*i2c;
+	struct device		*dev;
+	struct mutex		io_lock;
+
+	int		vol_reg;
+	unsigned	mode:2;	/* bit[1:0] = VID1, VID0 */
+	unsigned	extclk_freq:2;
+	unsigned	extclk:1;
+	unsigned	ramp_timing:3;
+	unsigned	ramp_down:1;
+};
+
+/* I2C operations */
+
+static inline int max8649_read_device(struct i2c_client *i2c,
+				      int reg, int bytes, void *dest)
+{
+	unsigned char data;
+	int ret;
+
+	data = (unsigned char)reg;
+	ret = i2c_master_send(i2c, &data, 1);
+	if (ret < 0)
+		return ret;
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static inline int max8649_write_device(struct i2c_client *i2c,
+				       int reg, int bytes, void *src)
+{
+	unsigned char buf[bytes + 1];
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+
+	ret = i2c_master_send(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static int max8649_reg_read(struct i2c_client *i2c, int reg)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(i2c);
+	unsigned char data;
+	int ret;
+
+	mutex_lock(&info->io_lock);
+	ret = max8649_read_device(i2c, reg, 1, &data);
+	mutex_unlock(&info->io_lock);
+
+	if (ret < 0)
+		return ret;
+	return (int)data;
+}
+
+static int max8649_set_bits(struct i2c_client *i2c, int reg,
+			    unsigned char mask, unsigned char data)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(i2c);
+	unsigned char value;
+	int ret;
+
+	mutex_lock(&info->io_lock);
+	ret = max8649_read_device(i2c, reg, 1, &value);
+	if (ret < 0)
+		goto out;
+	value &= ~mask;
+	value |= data;
+	ret = max8649_write_device(i2c, reg, 1, &value);
+out:
+	mutex_unlock(&info->io_lock);
+	return ret;
+}
+
+static inline int check_range(int min_uV, int max_uV)
+{
+	if ((min_uV < MAX8649_DCDC_VMIN) || (max_uV > MAX8649_DCDC_VMAX)
+		|| (min_uV > max_uV))
+		return -EINVAL;
+	return 0;
+}
+
+static int max8649_list_voltage(struct regulator_dev *rdev, unsigned index)
+{
+	return (MAX8649_DCDC_VMIN + index * MAX8649_DCDC_STEP);
+}
+
+static int max8649_get_voltage(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	unsigned char data;
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret < 0)
+		return ret;
+	data = (unsigned char)ret & MAX8649_VOL_MASK;
+	return max8649_list_voltage(rdev, data);
+}
+
+static int max8649_set_voltage(struct regulator_dev *rdev,
+			       int min_uV, int max_uV)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	unsigned char data, mask;
+
+	if (check_range(min_uV, max_uV)) {
+		dev_err(info->dev, "invalid voltage range (%d, %d) uV\n",
+			min_uV, max_uV);
+		return -EINVAL;
+	}
+	data = (min_uV - MAX8649_DCDC_VMIN + MAX8649_DCDC_STEP - 1)
+		/ MAX8649_DCDC_STEP;
+	mask = MAX8649_VOL_MASK;
+
+	return max8649_set_bits(info->i2c, info->vol_reg, mask, data);
+}
+
+/* EN_PD means pulldown on EN input */
+static int max8649_enable(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	return max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_EN_PD, 0);
+}
+
+/*
+ * Applied internal pulldown resistor on EN input pin.
+ * If pulldown EN pin outside, it would be better.
+ */
+static int max8649_disable(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	return max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_EN_PD,
+				MAX8649_EN_PD);
+}
+
+static int max8649_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, MAX8649_CONTROL);
+	if (ret < 0)
+		return ret;
+	return !((unsigned char)ret & MAX8649_EN_PD);
+}
+
+static int max8649_enable_time(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int voltage, rate, ret;
+
+	/* get voltage */
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret < 0)
+		return ret;
+	ret &= MAX8649_VOL_MASK;
+	voltage = max8649_list_voltage(rdev, (unsigned char)ret); /* uV */
+
+	/* get rate */
+	ret = max8649_reg_read(info->i2c, MAX8649_RAMP);
+	if (ret < 0)
+		return ret;
+	ret = (ret & MAX8649_RAMP_MASK) >> 5;
+	rate = (32 * 1000) >> ret;	/* uV/uS */
+
+	return (voltage / rate);
+}
+
+static int max8649_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		max8649_set_bits(info->i2c, info->vol_reg, MAX8649_FORCE_PWM,
+				 MAX8649_FORCE_PWM);
+		break;
+	case REGULATOR_MODE_NORMAL:
+		max8649_set_bits(info->i2c, info->vol_reg,
+				 MAX8649_FORCE_PWM, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static unsigned int max8649_get_mode(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret & MAX8649_FORCE_PWM)
+		return REGULATOR_MODE_FAST;
+	return REGULATOR_MODE_NORMAL;
+}
+
+static struct regulator_ops max8649_dcdc_ops = {
+	.set_voltage	= max8649_set_voltage,
+	.get_voltage	= max8649_get_voltage,
+	.list_voltage	= max8649_list_voltage,
+	.enable		= max8649_enable,
+	.disable	= max8649_disable,
+	.is_enabled	= max8649_is_enabled,
+	.enable_time	= max8649_enable_time,
+	.set_mode	= max8649_set_mode,
+	.get_mode	= max8649_get_mode,
+
+};
+
+static struct regulator_desc dcdc_desc = {
+	.name		= "max8649",
+	.ops		= &max8649_dcdc_ops,
+	.type		= REGULATOR_VOLTAGE,
+	.n_voltages	= 1 << 6,
+	.owner		= THIS_MODULE,
+};
+
+static int __devinit max8649_regulator_probe(struct i2c_client *client,
+					     const struct i2c_device_id *id)
+{
+	struct max8649_platform_data *pdata = client->dev.platform_data;
+	struct max8649_regulator_info *info = NULL;
+	unsigned char data;
+	int ret;
+
+	info = kzalloc(sizeof(struct max8649_regulator_info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&client->dev, "No enough memory\n");
+		return -ENOMEM;
+	}
+
+	info->i2c = client;
+	info->dev = &client->dev;
+	mutex_init(&info->io_lock);
+	i2c_set_clientdata(client, info);
+
+	info->mode = pdata->mode;
+	switch (info->mode) {
+	case 0:
+		info->vol_reg = MAX8649_MODE0;
+		break;
+	case 1:
+		info->vol_reg = MAX8649_MODE1;
+		break;
+	case 2:
+		info->vol_reg = MAX8649_MODE2;
+		break;
+	case 3:
+		info->vol_reg = MAX8649_MODE3;
+		break;
+	default:
+		break;
+	}
+
+	ret = max8649_reg_read(info->i2c, MAX8649_CHIP_ID1);
+	if (ret < 0) {
+		dev_err(info->dev, "Failed to detect ID of MAX8649:%d\n",
+			ret);
+		goto out;
+	}
+	dev_info(info->dev, "Detected MAX8649 (ID:%x)\n", ret);
+
+	/* enable VID0 & VID1 */
+	max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_VID_MASK, 0);
+
+	/* enable/disable external clock synchronization */
+	info->extclk = pdata->extclk;
+	data = (info->extclk) ? MAX8649_SYNC_EXTCLK : 0;
+	max8649_set_bits(info->i2c, info->vol_reg, MAX8649_SYNC_EXTCLK, data);
+	if (info->extclk) {
+		/* set external clock frequency */
+		info->extclk_freq = pdata->extclk_freq;
+		max8649_set_bits(info->i2c, MAX8649_SYNC, MAX8649_EXT_MASK,
+				 info->extclk_freq);
+	}
+
+	if (pdata->ramp_timing) {
+		info->ramp_timing = pdata->ramp_timing;
+		max8649_set_bits(info->i2c, MAX8649_RAMP, MAX8649_RAMP_MASK,
+				 info->ramp_timing << 5);
+	}
+
+	info->ramp_down = pdata->ramp_down;
+	if (info->ramp_down) {
+		max8649_set_bits(info->i2c, MAX8649_RAMP, MAX8649_RAMP_DOWN,
+				 MAX8649_RAMP_DOWN);
+	}
+
+	info->regulator = regulator_register(&dcdc_desc, &client->dev,
+					     pdata->regulator, info);
+	if (IS_ERR(info->regulator)) {
+		dev_err(info->dev, "failed to register regulator %s\n",
+			dcdc_desc.name);
+		ret = PTR_ERR(info->regulator);
+		goto out;
+	}
+
+	dev_info(info->dev, "Max8649 regulator device is detected.\n");
+	return 0;
+out:
+	kfree(info);
+	return ret;
+}
+
+static int __devexit max8649_regulator_remove(struct i2c_client *client)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(client);
+
+	if (info) {
+		if (info->regulator)
+			regulator_unregister(info->regulator);
+		kfree(info);
+	}
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static const struct i2c_device_id max8649_id[] = {
+	{ "max8649", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max8649_id);
+
+static struct i2c_driver max8649_driver = {
+	.probe		= max8649_regulator_probe,
+	.remove		= __devexit_p(max8649_regulator_remove),
+	.driver		= {
+		.name	= "max8649",
+	},
+	.id_table	= max8649_id,
+};
+
+static int __init max8649_init(void)
+{
+	return i2c_add_driver(&max8649_driver);
+}
+subsys_initcall(max8649_init);
+
+static void __exit max8649_exit(void)
+{
+	i2c_del_driver(&max8649_driver);
+}
+module_exit(max8649_exit);
+
+/* Module information */
+MODULE_DESCRIPTION("MAXIM 8649 voltage regulator driver");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
+
diff --git a/include/linux/regulator/max8649.h b/include/linux/regulator/max8649.h
new file mode 100644
index 000000000000..417d14ecd5cb
--- /dev/null
+++ b/include/linux/regulator/max8649.h
@@ -0,0 +1,44 @@
+/*
+ * Interface of Maxim max8649
+ *
+ * Copyright (C) 2009-2010 Marvell International Ltd.
+ *      Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_REGULATOR_MAX8649_H
+#define	__LINUX_REGULATOR_MAX8649_H
+
+#include <linux/regulator/machine.h>
+
+enum {
+	MAX8649_EXTCLK_26MHZ = 0,
+	MAX8649_EXTCLK_13MHZ,
+	MAX8649_EXTCLK_19MHZ,	/* 19.2MHz */
+};
+
+enum {
+	MAX8649_RAMP_32MV = 0,
+	MAX8649_RAMP_16MV,
+	MAX8649_RAMP_8MV,
+	MAX8649_RAMP_4MV,
+	MAX8649_RAMP_2MV,
+	MAX8649_RAMP_1MV,
+	MAX8649_RAMP_0_5MV,
+	MAX8649_RAMP_0_25MV,
+};
+
+struct max8649_platform_data {
+	struct regulator_init_data *regulator;
+
+	unsigned	mode:2;		/* bit[1:0] = VID1,VID0 */
+	unsigned	extclk_freq:2;
+	unsigned	extclk:1;
+	unsigned	ramp_timing:3;
+	unsigned	ramp_down:1;
+};
+
+#endif	/* __LINUX_REGULATOR_MAX8649_H */
-- 
cgit v1.2.3


From 193cf4b99113a4550598ba9e8343e591fc062e23 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Tue, 12 Jan 2010 16:18:08 +0200
Subject: libfs: Unexport and kill simple_prepare_write

Remove the EXPORT_UNUSED_SYMBOL of simple_prepare_write

Collapse simple_prepare_write into it's only caller, though
making it simpler and clearer to understand.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/libfs.c         | 22 ++++++----------------
 include/linux/fs.h |  2 --
 2 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/libfs.c b/fs/libfs.c
index cd88abdcb436..9e50bcf55857 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -338,28 +338,14 @@ int simple_readpage(struct file *file, struct page *page)
 	return 0;
 }
 
-int simple_prepare_write(struct file *file, struct page *page,
-			unsigned from, unsigned to)
-{
-	if (!PageUptodate(page)) {
-		if (to - from != PAGE_CACHE_SIZE)
-			zero_user_segments(page,
-				0, from,
-				to, PAGE_CACHE_SIZE);
-	}
-	return 0;
-}
-
 int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
 	struct page *page;
 	pgoff_t index;
-	unsigned from;
 
 	index = pos >> PAGE_CACHE_SHIFT;
-	from = pos & (PAGE_CACHE_SIZE - 1);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -367,7 +353,12 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
 
 	*pagep = page;
 
-	return simple_prepare_write(file, page, from, from+len);
+	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+
+		zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
+	}
+	return 0;
 }
 
 /**
@@ -864,7 +855,6 @@ EXPORT_SYMBOL(simple_getattr);
 EXPORT_SYMBOL(simple_link);
 EXPORT_SYMBOL(simple_lookup);
 EXPORT_SYMBOL(simple_pin_fs);
-EXPORT_UNUSED_SYMBOL(simple_prepare_write);
 EXPORT_SYMBOL(simple_readpage);
 EXPORT_SYMBOL(simple_release_fs);
 EXPORT_SYMBOL(simple_rename);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ebb1cd5bc241..2b124c825e38 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2340,8 +2340,6 @@ extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct
 extern int simple_sync_file(struct file *, struct dentry *, int);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
-extern int simple_prepare_write(struct file *file, struct page *page,
-			unsigned offset, unsigned to);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata);
-- 
cgit v1.2.3


From 270ba5f7c5dac0bfb564aa35a536fb31ad4075bd Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Tue, 26 Jan 2010 14:12:43 +0000
Subject: fs: re-order super_block to remove 16 bytes of padding on 64bit
 builds

re-order structure super_block to remove 16 bytes of alignment padding
on 64bit builds.

This shrinks the size of super_block from 712 to 696 bytes so requiring
one fewer 64 byte cache lines.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>

-----
patch against 2.6.33-rc5
compiled & tested on x86_64 AMDX2 desktop machine.

I've been running with this patch applied for several weeks with no
problems.

regards
Richard
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2b124c825e38..aa76dae673eb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1314,9 +1314,9 @@ extern spinlock_t sb_lock;
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
 	dev_t			s_dev;		/* search index; _not_ kdev_t */
-	unsigned long		s_blocksize;
-	unsigned char		s_blocksize_bits;
 	unsigned char		s_dirt;
+	unsigned char		s_blocksize_bits;
+	unsigned long		s_blocksize;
 	loff_t			s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
@@ -1357,16 +1357,16 @@ struct super_block {
 	void 			*s_fs_info;	/* Filesystem private info */
 	fmode_t			s_mode;
 
+	/* Granularity of c/m/atime in ns.
+	   Cannot be worse than a second */
+	u32		   s_time_gran;
+
 	/*
 	 * The next field is for VFS *only*. No filesystems have any business
 	 * even looking at it. You had been warned.
 	 */
 	struct mutex s_vfs_rename_mutex;	/* Kludge */
 
-	/* Granularity of c/m/atime in ns.
-	   Cannot be worse than a second */
-	u32		   s_time_gran;
-
 	/*
 	 * Filesystem subtype.  If non-empty the filesystem type field
 	 * in /proc/mounts will be "type.subtype"
-- 
cgit v1.2.3


From 2ecdc82ef0b03e67ce5ecee79d0d108177a704df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 26 Jan 2010 17:27:20 +0100
Subject: kill unused invalidate_inode_pages helper

No one is calling this anymore as everyone has switched to
invalidate_mapping_pages long time ago.  Also update a few
references to it in comments.  nfs has two more, but I can't
easily figure what they are actually referring to, so I left
them as-is.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/gadget/f_mass_storage.c | 2 +-
 drivers/usb/gadget/file_storage.c   | 2 +-
 include/linux/fs.h                  | 6 ------
 mm/filemap.c                        | 2 +-
 4 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c
index a37640eba434..77fcd1b697e8 100644
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -1041,7 +1041,7 @@ static void invalidate_sub(struct fsg_lun *curlun)
 	unsigned long	rc;
 
 	rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
-	VLDBG(curlun, "invalidate_inode_pages -> %ld\n", rc);
+	VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc);
 }
 
 static int do_verify(struct fsg_common *common)
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 29dfb0277ffb..7dcdbda49cac 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -1448,7 +1448,7 @@ static void invalidate_sub(struct fsg_lun *curlun)
 	unsigned long	rc;
 
 	rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
-	VLDBG(curlun, "invalidate_inode_pages -> %ld\n", rc);
+	VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc);
 }
 
 static int do_verify(struct fsg_dev *fsg)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa76dae673eb..d443c9dd3caa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2058,12 +2058,6 @@ extern int invalidate_inodes(struct super_block *);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 
-static inline unsigned long __deprecated
-invalidate_inode_pages(struct address_space *mapping)
-{
-	return invalidate_mapping_pages(mapping, 0, ~0UL);
-}
-
 static inline void invalidate_remote_inode(struct inode *inode)
 {
 	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
diff --git a/mm/filemap.c b/mm/filemap.c
index 698ea80f2102..148b52a5bb7e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1117,7 +1117,7 @@ readpage:
 			if (!PageUptodate(page)) {
 				if (page->mapping == NULL) {
 					/*
-					 * invalidate_inode_pages got it
+					 * invalidate_mapping_pages got it
 					 */
 					unlock_page(page);
 					page_cache_release(page);
-- 
cgit v1.2.3


From 495d6c9c6595ec7b37910dfd42634839431d21fd Mon Sep 17 00:00:00 2001
From: Valerie Aurora <vaurora@redhat.com>
Date: Tue, 26 Jan 2010 14:20:47 -0500
Subject: VFS: Clean up shared mount flag propagation

The handling of mount flags in set_mnt_shared() got a little tangled
up during previous cleanups, with the following problems:

* MNT_PNODE_MASK is defined as a literal constant when it should be a
bitwise xor of other MNT_* flags
* set_mnt_shared() clears and then sets MNT_SHARED (part of MNT_PNODE_MASK)
* MNT_PNODE_MASK could use a comment in mount.h
* MNT_PNODE_MASK is a terrible name, change to MNT_SHARED_MASK

This patch fixes these problems.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        |  2 +-
 fs/pnode.h            |  2 +-
 include/linux/mount.h | 11 ++++++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 25c1dcf9e9eb..d25d4602ab50 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1538,7 +1538,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 		err = do_remount_sb(sb, flags, data, 0);
 	if (!err) {
 		spin_lock(&vfsmount_lock);
-		mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
+		mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
 		path->mnt->mnt_flags = mnt_flags;
 		spin_unlock(&vfsmount_lock);
 	}
diff --git a/fs/pnode.h b/fs/pnode.h
index 6c7ef3252a26..1ea4ae1efcd3 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -25,7 +25,7 @@
 
 static inline void set_mnt_shared(struct vfsmount *mnt)
 {
-	mnt->mnt_flags &= ~MNT_PNODE_MASK;
+	mnt->mnt_flags &= ~MNT_SHARED_MASK;
 	mnt->mnt_flags |= MNT_SHARED;
 }
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5d5275364867..375d43a5d802 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -34,7 +34,16 @@ struct mnt_namespace;
 
 #define MNT_SHARED	0x1000	/* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
-#define MNT_PNODE_MASK	0x3000	/* propagation flag mask */
+/*
+ * MNT_SHARED_MASK is the set of flags that should be cleared when a
+ * mount becomes shared.  Currently, this is only the flag that says a
+ * mount cannot be bind mounted, since this is how we create a mount
+ * that shares events with another mount.  If you add a new MNT_*
+ * flag, consider how it interacts with shared mounts.
+ */
+#define MNT_SHARED_MASK	(MNT_UNBINDABLE)
+#define MNT_PROPAGATION_MASK	(MNT_SHARED | MNT_UNBINDABLE)
+
 
 struct vfsmount {
 	struct list_head mnt_hash;
-- 
cgit v1.2.3


From 2096f759abcb42200a81d776f597362fd9265024 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Jan 2010 13:16:21 -0500
Subject: New helper: path_is_under(path1, path2)

Analog of is_subdir for vfsmount,dentry pairs, moved from audit_tree.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c         | 24 ++++++++++++++++++++++++
 include/linux/fs.h  |  1 +
 kernel/audit_tree.c | 51 ++++++++++++---------------------------------------
 3 files changed, 37 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 4365998b8df4..74da947b160b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2191,6 +2191,30 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 	return result;
 }
 
+int path_is_under(struct path *path1, struct path *path2)
+{
+	struct vfsmount *mnt = path1->mnt;
+	struct dentry *dentry = path1->dentry;
+	int res;
+	spin_lock(&vfsmount_lock);
+	if (mnt != path2->mnt) {
+		for (;;) {
+			if (mnt->mnt_parent == mnt) {
+				spin_unlock(&vfsmount_lock);
+				return 0;
+			}
+			if (mnt->mnt_parent == path2->mnt)
+				break;
+			mnt = mnt->mnt_parent;
+		}
+		dentry = mnt->mnt_mountpoint;
+	}
+	res = is_subdir(dentry, path2->dentry);
+	spin_unlock(&vfsmount_lock);
+	return res;
+}
+EXPORT_SYMBOL(path_is_under);
+
 void d_genocide(struct dentry *root)
 {
 	struct dentry *this_parent = root;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d443c9dd3caa..8d53bc17f93f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2126,6 +2126,7 @@ extern struct file * open_exec(const char *);
  
 /* fs/dcache.c -- generic fs support functions */
 extern int is_subdir(struct dentry *, struct dentry *);
+extern int path_is_under(struct path *, struct path *);
 extern ino_t find_inode_number(struct dentry *, struct qstr *);
 
 #include <linux/err.h>
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 4b05bd9479db..f09b42d9c32d 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -603,22 +603,6 @@ skip_it:
 	mutex_unlock(&audit_filter_mutex);
 }
 
-static int is_under(struct vfsmount *mnt, struct dentry *dentry,
-		    struct path *path)
-{
-	if (mnt != path->mnt) {
-		for (;;) {
-			if (mnt->mnt_parent == mnt)
-				return 0;
-			if (mnt->mnt_parent == path->mnt)
-					break;
-			mnt = mnt->mnt_parent;
-		}
-		dentry = mnt->mnt_mountpoint;
-	}
-	return is_subdir(dentry, path->dentry);
-}
-
 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
 {
 
@@ -714,29 +698,24 @@ int audit_tag_tree(char *old, char *new)
 {
 	struct list_head cursor, barrier;
 	int failed = 0;
-	struct path path;
+	struct path path1, path2;
 	struct vfsmount *tagged;
 	struct list_head list;
-	struct vfsmount *mnt;
-	struct dentry *dentry;
 	int err;
 
-	err = kern_path(new, 0, &path);
+	err = kern_path(new, 0, &path2);
 	if (err)
 		return err;
-	tagged = collect_mounts(&path);
-	path_put(&path);
+	tagged = collect_mounts(&path2);
+	path_put(&path2);
 	if (!tagged)
 		return -ENOMEM;
 
-	err = kern_path(old, 0, &path);
+	err = kern_path(old, 0, &path1);
 	if (err) {
 		drop_collected_mounts(tagged);
 		return err;
 	}
-	mnt = mntget(path.mnt);
-	dentry = dget(path.dentry);
-	path_put(&path);
 
 	list_add_tail(&list, &tagged->mnt_list);
 
@@ -747,6 +726,7 @@ int audit_tag_tree(char *old, char *new)
 	while (cursor.next != &tree_list) {
 		struct audit_tree *tree;
 		struct vfsmount *p;
+		int good_one = 0;
 
 		tree = container_of(cursor.next, struct audit_tree, list);
 		get_tree(tree);
@@ -754,23 +734,17 @@ int audit_tag_tree(char *old, char *new)
 		list_add(&cursor, &tree->list);
 		mutex_unlock(&audit_filter_mutex);
 
-		err = kern_path(tree->pathname, 0, &path);
-		if (err) {
-			put_tree(tree);
-			mutex_lock(&audit_filter_mutex);
-			continue;
+		err = kern_path(tree->pathname, 0, &path2);
+		if (!err) {
+			good_one = path_is_under(&path1, &path2);
+			path_put(&path2);
 		}
 
-		spin_lock(&vfsmount_lock);
-		if (!is_under(mnt, dentry, &path)) {
-			spin_unlock(&vfsmount_lock);
-			path_put(&path);
+		if (!good_one) {
 			put_tree(tree);
 			mutex_lock(&audit_filter_mutex);
 			continue;
 		}
-		spin_unlock(&vfsmount_lock);
-		path_put(&path);
 
 		list_for_each_entry(p, &list, mnt_list) {
 			failed = tag_chunk(p->mnt_root->d_inode, tree);
@@ -820,8 +794,7 @@ int audit_tag_tree(char *old, char *new)
 	list_del(&cursor);
 	list_del(&list);
 	mutex_unlock(&audit_filter_mutex);
-	dput(dentry);
-	mntput(mnt);
+	path_put(&path1);
 	drop_collected_mounts(tagged);
 	return failed;
 }
-- 
cgit v1.2.3


From 1f707137b55764740981d022d29c622832a61880 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Jan 2010 22:51:25 -0500
Subject: new helper: iterate_mounts()

apply function to vfsmounts in set returned by collect_mounts(),
stop if it returns non-zero.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c      | 15 +++++++++++++++
 include/linux/fs.h  |  3 ++-
 kernel/audit_tree.c | 49 ++++++++++++++++---------------------------------
 3 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index d25d4602ab50..d5906c19e08e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1246,6 +1246,21 @@ void drop_collected_mounts(struct vfsmount *mnt)
 	release_mounts(&umount_list);
 }
 
+int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
+		   struct vfsmount *root)
+{
+	struct vfsmount *mnt;
+	int res = f(root, arg);
+	if (res)
+		return res;
+	list_for_each_entry(mnt, &root->mnt_list, mnt_list) {
+		res = f(mnt, arg);
+		if (res)
+			return res;
+	}
+	return 0;
+}
+
 static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
 {
 	struct vfsmount *p;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8d53bc17f93f..e764f247d0ab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1794,7 +1794,8 @@ extern int may_umount(struct vfsmount *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
 extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
-
+extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
+			  struct vfsmount *);
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
 
 extern int current_umask(void);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f09b42d9c32d..028e85663f27 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -548,6 +548,11 @@ int audit_remove_tree_rule(struct audit_krule *rule)
 	return 0;
 }
 
+static int compare_root(struct vfsmount *mnt, void *arg)
+{
+	return mnt->mnt_root->d_inode == arg;
+}
+
 void audit_trim_trees(void)
 {
 	struct list_head cursor;
@@ -559,7 +564,6 @@ void audit_trim_trees(void)
 		struct path path;
 		struct vfsmount *root_mnt;
 		struct node *node;
-		struct list_head list;
 		int err;
 
 		tree = container_of(cursor.next, struct audit_tree, list);
@@ -577,24 +581,16 @@ void audit_trim_trees(void)
 		if (!root_mnt)
 			goto skip_it;
 
-		list_add_tail(&list, &root_mnt->mnt_list);
 		spin_lock(&hash_lock);
 		list_for_each_entry(node, &tree->chunks, list) {
-			struct audit_chunk *chunk = find_chunk(node);
-			struct inode *inode = chunk->watch.inode;
-			struct vfsmount *mnt;
+			struct inode *inode = find_chunk(node)->watch.inode;
 			node->index |= 1U<<31;
-			list_for_each_entry(mnt, &list, mnt_list) {
-				if (mnt->mnt_root->d_inode == inode) {
-					node->index &= ~(1U<<31);
-					break;
-				}
-			}
+			if (iterate_mounts(compare_root, inode, root_mnt))
+				node->index &= ~(1U<<31);
 		}
 		spin_unlock(&hash_lock);
 		trim_marked(tree);
 		put_tree(tree);
-		list_del_init(&list);
 		drop_collected_mounts(root_mnt);
 skip_it:
 		mutex_lock(&audit_filter_mutex);
@@ -622,13 +618,17 @@ void audit_put_tree(struct audit_tree *tree)
 	put_tree(tree);
 }
 
+static int tag_mount(struct vfsmount *mnt, void *arg)
+{
+	return tag_chunk(mnt->mnt_root->d_inode, arg);
+}
+
 /* called with audit_filter_mutex */
 int audit_add_tree_rule(struct audit_krule *rule)
 {
 	struct audit_tree *seed = rule->tree, *tree;
 	struct path path;
-	struct vfsmount *mnt, *p;
-	struct list_head list;
+	struct vfsmount *mnt;
 	int err;
 
 	list_for_each_entry(tree, &tree_list, list) {
@@ -654,16 +654,9 @@ int audit_add_tree_rule(struct audit_krule *rule)
 		err = -ENOMEM;
 		goto Err;
 	}
-	list_add_tail(&list, &mnt->mnt_list);
 
 	get_tree(tree);
-	list_for_each_entry(p, &list, mnt_list) {
-		err = tag_chunk(p->mnt_root->d_inode, tree);
-		if (err)
-			break;
-	}
-
-	list_del(&list);
+	err = iterate_mounts(tag_mount, tree, mnt);
 	drop_collected_mounts(mnt);
 
 	if (!err) {
@@ -700,7 +693,6 @@ int audit_tag_tree(char *old, char *new)
 	int failed = 0;
 	struct path path1, path2;
 	struct vfsmount *tagged;
-	struct list_head list;
 	int err;
 
 	err = kern_path(new, 0, &path2);
@@ -717,15 +709,12 @@ int audit_tag_tree(char *old, char *new)
 		return err;
 	}
 
-	list_add_tail(&list, &tagged->mnt_list);
-
 	mutex_lock(&audit_filter_mutex);
 	list_add(&barrier, &tree_list);
 	list_add(&cursor, &barrier);
 
 	while (cursor.next != &tree_list) {
 		struct audit_tree *tree;
-		struct vfsmount *p;
 		int good_one = 0;
 
 		tree = container_of(cursor.next, struct audit_tree, list);
@@ -746,12 +735,7 @@ int audit_tag_tree(char *old, char *new)
 			continue;
 		}
 
-		list_for_each_entry(p, &list, mnt_list) {
-			failed = tag_chunk(p->mnt_root->d_inode, tree);
-			if (failed)
-				break;
-		}
-
+		failed = iterate_mounts(tag_mount, tree, tagged);
 		if (failed) {
 			put_tree(tree);
 			mutex_lock(&audit_filter_mutex);
@@ -792,7 +776,6 @@ int audit_tag_tree(char *old, char *new)
 	}
 	list_del(&barrier);
 	list_del(&cursor);
-	list_del(&list);
 	mutex_unlock(&audit_filter_mutex);
 	path_put(&path1);
 	drop_collected_mounts(tagged);
-- 
cgit v1.2.3


From 9f5596af44514f99e3a654a4f7cb813354b9e516 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 5 Feb 2010 00:40:25 -0500
Subject: take check for new events in namespace (guts of mounts_poll()) to
 namespace.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c                | 15 +++++++++++++++
 fs/proc/base.c                | 10 ++--------
 include/linux/mnt_namespace.h |  1 +
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index d5906c19e08e..970fe79d7867 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -737,6 +737,21 @@ static void m_stop(struct seq_file *m, void *v)
 	up_read(&namespace_sem);
 }
 
+int mnt_had_events(struct proc_mounts *p)
+{
+	struct mnt_namespace *ns = p->ns;
+	int res = 0;
+
+	spin_lock(&vfsmount_lock);
+	if (p->event != ns->event) {
+		p->event = ns->event;
+		res = 1;
+	}
+	spin_unlock(&vfsmount_lock);
+
+	return res;
+}
+
 struct proc_fs_info {
 	int flag;
 	const char *str;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58324c299165..746895ddfda1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -647,17 +647,11 @@ static int mounts_release(struct inode *inode, struct file *file)
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
 	struct proc_mounts *p = file->private_data;
-	struct mnt_namespace *ns = p->ns;
 	unsigned res = POLLIN | POLLRDNORM;
 
-	poll_wait(file, &ns->poll, wait);
-
-	spin_lock(&vfsmount_lock);
-	if (p->event != ns->event) {
-		p->event = ns->event;
+	poll_wait(file, &p->ns->poll, wait);
+	if (mnt_had_events(p))
 		res |= POLLERR | POLLPRI;
-	}
-	spin_unlock(&vfsmount_lock);
 
 	return res;
 }
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d74785c2393a..0b89efc6f215 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -35,6 +35,7 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
 extern const struct seq_operations mounts_op;
 extern const struct seq_operations mountinfo_op;
 extern const struct seq_operations mountstats_op;
+extern int mnt_had_events(struct proc_mounts *);
 
 #endif
 #endif
-- 
cgit v1.2.3


From 47cd813f2984569570021ce3d34cdf9cb20aa6a2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 5 Feb 2010 02:01:14 -0500
Subject: Take vfsmount_lock to fs/internal.h

no more users left outside of fs/*.c (and very few outside of
fs/namespace.c, actually)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h         | 2 ++
 include/linux/mount.h | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index e96a1667d749..8a03a5447bdf 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -70,6 +70,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
 
 extern void __init mnt_init(void);
 
+extern spinlock_t vfsmount_lock;
+
 /*
  * fs_struct.c
  */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 375d43a5d802..163896137ab5 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -132,7 +132,6 @@ extern int do_add_mount(struct vfsmount *newmnt, struct path *path,
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 
-extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
 
 #endif /* _LINUX_MOUNT_H */
-- 
cgit v1.2.3


From 8089352a13b785d4e0df63d87bd2b71c76bb9aee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 5 Feb 2010 09:30:46 -0500
Subject: Mirror MS_KERNMOUNT in ->mnt_flags

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        | 2 +-
 fs/super.c            | 3 +++
 include/linux/mount.h | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index b0b15cc2117c..ffa3843404e0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1701,7 +1701,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 {
 	int err;
 
-	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
+	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
 
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
diff --git a/fs/super.c b/fs/super.c
index 903896ec7c73..f35ac6022109 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -937,6 +937,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	if (!mnt)
 		goto out;
 
+	if (flags & MS_KERNMOUNT)
+		mnt->mnt_flags = MNT_INTERNAL;
+
 	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
 		secdata = alloc_secdata();
 		if (!secdata)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 163896137ab5..ca726ebf50a3 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -45,6 +45,8 @@ struct mnt_namespace;
 #define MNT_PROPAGATION_MASK	(MNT_SHARED | MNT_UNBINDABLE)
 
 
+#define MNT_INTERNAL	0x4000
+
 struct vfsmount {
 	struct list_head mnt_hash;
 	struct vfsmount *mnt_parent;	/* fs we are mounted on */
-- 
cgit v1.2.3


From db1f05bb85d7966b9176e293f3ceead1cb8b5d79 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 10 Feb 2010 12:15:53 +0100
Subject: vfs: add NOFOLLOW flag to umount(2)

Add a new UMOUNT_NOFOLLOW flag to umount(2).  This is needed to prevent
symlink attacks in unprivileged unmounts (fuse, samba, ncpfs).

Additionally, return -EINVAL if an unknown flag is used (and specify
an explicitly unused flag: UMOUNT_UNUSED).  This makes it possible for
the caller to determine if a flag is supported or not.

CC: Eugene Teo <eugene@redhat.com>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c     | 9 ++++++++-
 include/linux/fs.h | 2 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index ffa3843404e0..8174c8ab5c70 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1136,8 +1136,15 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 {
 	struct path path;
 	int retval;
+	int lookup_flags = 0;
 
-	retval = user_path(name, &path);
+	if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
+		return -EINVAL;
+
+	if (!(flags & UMOUNT_NOFOLLOW))
+		lookup_flags |= LOOKUP_FOLLOW;
+
+	retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
 	if (retval)
 		goto out;
 	retval = -EINVAL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e764f247d0ab..5b3182c7eb5f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1305,6 +1305,8 @@ extern int send_sigurg(struct fown_struct *fown);
 #define MNT_FORCE	0x00000001	/* Attempt to forcibily umount */
 #define MNT_DETACH	0x00000002	/* Just detach from the tree */
 #define MNT_EXPIRE	0x00000004	/* Mark for expiry */
+#define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
+#define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
 
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
-- 
cgit v1.2.3


From db1466b3e1bd1727375cdbfcbea4bcce2f860f61 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 07:46:56 -0800
Subject: rcu: Use wrapper function instead of exporting tasklist_lock

Lockdep-RCU commit d11c563d exported tasklist_lock, which is not
a good thing.  This patch instead exports a function that uses
lockdep to check whether tasklist_lock is held.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: Christoph Hellwig <hch@lst.de>
LKML-Reference: <1267631219-8713-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cred.h  | 2 +-
 include/linux/sched.h | 4 ++++
 kernel/exit.c         | 2 +-
 kernel/fork.c         | 9 ++++++++-
 kernel/pid.c          | 4 +++-
 5 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4db09f89b637..52507c3e1387 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -280,7 +280,7 @@ static inline void put_cred(const struct cred *_cred)
  * task or by holding tasklist_lock to prevent it from being unlinked.
  */
 #define __task_cred(task) \
-	((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock))))
+	((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held())))
 
 /**
  * get_task_cred - Get another task's objective credentials
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0eef87b58ea5..a47af2064dcc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -258,6 +258,10 @@ extern spinlock_t mmlist_lock;
 
 struct task_struct;
 
+#ifdef CONFIG_PROVE_RCU
+extern int lockdep_tasklist_lock_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
 extern void sched_init(void);
 extern void sched_init_smp(void);
 extern asmlinkage void schedule_tail(struct task_struct *prev);
diff --git a/kernel/exit.c b/kernel/exit.c
index 45ed043b8bf5..fed3a4db6f04 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -87,7 +87,7 @@ static void __exit_signal(struct task_struct *tsk)
 
 	sighand = rcu_dereference_check(tsk->sighand,
 					rcu_read_lock_held() ||
-					lockdep_is_held(&tasklist_lock));
+					lockdep_tasklist_lock_is_held());
 	spin_lock(&sighand->siglock);
 
 	posix_cpu_timers_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 17bbf093356d..8691c540a470 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,7 +86,14 @@ int max_threads;		/* tunable limit on nr_threads */
 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
-EXPORT_SYMBOL_GPL(tasklist_lock);
+
+#ifdef CONFIG_PROVE_RCU
+int lockdep_tasklist_lock_is_held(void)
+{
+	return lockdep_is_held(&tasklist_lock);
+}
+EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
+#endif /* #ifdef CONFIG_PROVE_RCU */
 
 int nr_processes(void)
 {
diff --git a/kernel/pid.c b/kernel/pid.c
index b08e697cd83f..b6064405f367 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,9 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
 	struct task_struct *result = NULL;
 	if (pid) {
 		struct hlist_node *first;
-		first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
+		first = rcu_dereference_check(pid->tasks[type].first,
+					      rcu_read_lock_held() ||
+					      lockdep_tasklist_lock_is_held());
 		if (first)
 			result = hlist_entry(first, struct task_struct, pids[(type)].node);
 	}
-- 
cgit v1.2.3


From 5ed42b8113667c06a6ff2c72717395b5044d30a1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 07:46:58 -0800
Subject: rcu, cgroup: Relax the check in task_subsys_state() as early boot is
 now handled by lockdep-RCU

This patch removes the check for !rcu_scheduler_active because
this check has been incorporated into rcu_dereference_check().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267631219-8713-3-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a73e1ced09b8..c9bbcb2a75ae 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -489,7 +489,6 @@ static inline struct cgroup_subsys_state *task_subsys_state(
 {
 	return rcu_dereference_check(task->cgroups->subsys[subsys_id],
 				     rcu_read_lock_held() ||
-				     !rcu_scheduler_active ||
 				     cgroup_lock_is_held());
 }
 
-- 
cgit v1.2.3


From e6033e3b307fcfae08408e0673266db38392bda4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 17:50:16 -0800
Subject: rcu: Make rcu_read_lock_sched_held() handle !PREEMPT

The rcu_read_lock_sched_held() needs to unconditionally return
the value "1" in a !PREEMPT kernel, because under !PREEMPT,
-all- kernel code is implicitly preempt-disabled.  This patch
makes this happen.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267667418-32233-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c84373626336..e22960ecb71a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -136,6 +136,7 @@ static inline int rcu_read_lock_bh_held(void)
  * can prove otherwise.  Note that disabling of preemption (including
  * disabling irqs) counts as an RCU-sched read-side critical section.
  */
+#ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
 	int lockdep_opinion = 0;
@@ -144,6 +145,12 @@ static inline int rcu_read_lock_sched_held(void)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
 	return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active;
 }
+#else /* #ifdef CONFIG_PREEMPT */
+static inline int rcu_read_lock_sched_held(void)
+{
+	return 1;
+}
+#endif /* #else #ifdef CONFIG_PREEMPT */
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
@@ -164,10 +171,17 @@ static inline int rcu_read_lock_bh_held(void)
 	return 1;
 }
 
+#ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
 	return preempt_count() != 0 || !rcu_scheduler_active;
 }
+#else /* #ifdef CONFIG_PREEMPT */
+static inline int rcu_read_lock_sched_held(void)
+{
+	return 1;
+}
+#endif /* #else #ifdef CONFIG_PREEMPT */
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-- 
cgit v1.2.3


From 54dbf96c921513bf98484a20ef366d51944a4c4d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 07:46:57 -0800
Subject: rcu: Suppress RCU lockdep warnings during early boot

RCU is used during very early boot, before RCU and lockdep have
been initialized.  So make the underlying primitives
(rcu_read_lock_held(), rcu_read_lock_bh_held(),
rcu_read_lock_sched_held(), and rcu_dereference_check()) check
for early boot via the rcu_scheduler_active flag.  This will
suppress false positives.

Also introduce a debug_lockdep_rcu_enabled() static inline
helper function, which tags the CONTINUE_PROVE_RCU case as
likely(), as suggested by Ingo Molnar.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267631219-8713-2-git-send-email-paulmck@linux.vnet.ibm.com>
[ v2: removed incomplete debug_lockdep_rcu_update() bits ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e22960ecb71a..75921b83c0ab 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -97,6 +97,11 @@ extern struct lockdep_map rcu_sched_lock_map;
 # define rcu_read_release_sched() \
 		lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
 
+static inline int debug_lockdep_rcu_enabled(void)
+{
+	return likely(rcu_scheduler_active && debug_locks);
+}
+
 /**
  * rcu_read_lock_held - might we be in RCU read-side critical section?
  *
@@ -104,12 +109,14 @@ extern struct lockdep_map rcu_sched_lock_map;
  * an RCU read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
  * this assumes we are in an RCU read-side critical section unless it can
  * prove otherwise.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
  */
 static inline int rcu_read_lock_held(void)
 {
-	if (debug_locks)
-		return lock_is_held(&rcu_lock_map);
-	return 1;
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	return lock_is_held(&rcu_lock_map);
 }
 
 /**
@@ -119,12 +126,14 @@ static inline int rcu_read_lock_held(void)
  * an RCU-bh read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
  * this assumes we are in an RCU-bh read-side critical section unless it can
  * prove otherwise.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
  */
 static inline int rcu_read_lock_bh_held(void)
 {
-	if (debug_locks)
-		return lock_is_held(&rcu_bh_lock_map);
-	return 1;
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	return lock_is_held(&rcu_bh_lock_map);
 }
 
 /**
@@ -135,15 +144,19 @@ static inline int rcu_read_lock_bh_held(void)
  * this assumes we are in an RCU-sched read-side critical section unless it
  * can prove otherwise.  Note that disabling of preemption (including
  * disabling irqs) counts as an RCU-sched read-side critical section.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
  */
 #ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
 	int lockdep_opinion = 0;
 
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active;
+	return lockdep_opinion || preempt_count() != 0;
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
@@ -174,7 +187,7 @@ static inline int rcu_read_lock_bh_held(void)
 #ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
-	return preempt_count() != 0 || !rcu_scheduler_active;
+	return !rcu_scheduler_active || preempt_count() != 0;
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
@@ -198,7 +211,7 @@ static inline int rcu_read_lock_sched_held(void)
  */
 #define rcu_dereference_check(p, c) \
 	({ \
-		if (debug_locks && !(c)) \
+		if (debug_lockdep_rcu_enabled() && !(c)) \
 			lockdep_rcu_dereference(__FILE__, __LINE__); \
 		rcu_dereference_raw(p); \
 	})
-- 
cgit v1.2.3


From 9df93939b735dd273e49cbee290b9f4738500ef4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Jan 2010 21:58:48 +0100
Subject: ext3: Use bitops to read/modify EXT3_I(inode)->i_state

At several places we modify EXT3_I(inode)->i_state without holding i_mutex
(ext3_release_file, ext3_bmap, ext3_journalled_writepage, ext3_do_update_inode,
...). These modifications are racy and we can lose updates to i_state. So
convert handling of i_state to use bitops which are atomic.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/file.c            |  4 ++--
 fs/ext3/inode.c           | 18 +++++++++---------
 fs/ext3/xattr.c           | 14 +++++++-------
 include/linux/ext3_fs.h   | 33 +++++++++++++++++++++++++--------
 include/linux/ext3_fs_i.h |  2 +-
 5 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 388bbdfa0b4e..a86d3302cdc2 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -33,9 +33,9 @@
  */
 static int ext3_release_file (struct inode * inode, struct file * filp)
 {
-	if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE)) {
 		filemap_flush(inode->i_mapping);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE;
+		ext3_clear_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
 	}
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 455e6e6e5cb9..44b53386ab8b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1378,7 +1378,7 @@ static int ext3_journalled_write_end(struct file *file,
 	 */
 	if (pos + len > inode->i_size && ext3_can_truncate(inode))
 		ext3_orphan_add(handle, inode);
-	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+	ext3_set_inode_state(inode, EXT3_STATE_JDATA);
 	if (inode->i_size > EXT3_I(inode)->i_disksize) {
 		EXT3_I(inode)->i_disksize = inode->i_size;
 		ret2 = ext3_mark_inode_dirty(handle, inode);
@@ -1417,7 +1417,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 	journal_t *journal;
 	int err;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) {
 		/*
 		 * This is a REALLY heavyweight approach, but the use of
 		 * bmap on dirty files is expected to be extremely rare:
@@ -1436,7 +1436,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 		 * everything they get.
 		 */
 
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
+		ext3_clear_inode_state(inode, EXT3_STATE_JDATA);
 		journal = EXT3_JOURNAL(inode);
 		journal_lock_updates(journal);
 		err = journal_flush(journal);
@@ -1670,7 +1670,7 @@ static int ext3_journalled_writepage(struct page *page,
 				PAGE_CACHE_SIZE, NULL, write_end_fn);
 		if (ret == 0)
 			ret = err;
-		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		ext3_set_inode_state(inode, EXT3_STATE_JDATA);
 		unlock_page(page);
 	} else {
 		/*
@@ -2402,7 +2402,7 @@ void ext3_truncate(struct inode *inode)
 		goto out_notrans;
 
 	if (inode->i_size == 0 && ext3_should_writeback_data(inode))
-		ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE;
+		ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
 
 	/*
 	 * We have to lock the EOF page here, because lock_page() nests
@@ -2721,7 +2721,7 @@ int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
 {
 	/* We have all inode data except xattrs in memory here. */
 	return __ext3_get_inode_loc(inode, iloc,
-		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
+		!ext3_test_inode_state(inode, EXT3_STATE_XATTR));
 }
 
 void ext3_set_inode_flags(struct inode *inode)
@@ -2893,7 +2893,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 					EXT3_GOOD_OLD_INODE_SIZE +
 					ei->i_extra_isize;
 			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
-				 ei->i_state |= EXT3_STATE_XATTR;
+				 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
 		}
 	} else
 		ei->i_extra_isize = 0;
@@ -2955,7 +2955,7 @@ again:
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
-	if (ei->i_state & EXT3_STATE_NEW)
+	if (ext3_test_inode_state(inode, EXT3_STATE_NEW))
 		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
 
 	ext3_get_inode_flags(ei);
@@ -3052,7 +3052,7 @@ again:
 	rc = ext3_journal_dirty_metadata(handle, bh);
 	if (!err)
 		err = rc;
-	ei->i_state &= ~EXT3_STATE_NEW;
+	ext3_clear_inode_state(inode, EXT3_STATE_NEW);
 
 	atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
 out_brelse:
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 66895ccf76c7..2d2fb2a85961 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -274,7 +274,7 @@ ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
 		return -ENODATA;
 	error = ext3_get_inode_loc(inode, &iloc);
 	if (error)
@@ -403,7 +403,7 @@ ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
 		return 0;
 	error = ext3_get_inode_loc(inode, &iloc);
 	if (error)
@@ -882,7 +882,7 @@ ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
 	is->s.base = is->s.first = IFIRST(header);
 	is->s.here = is->s.first;
 	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) {
 		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
 		if (error)
 			return error;
@@ -914,10 +914,10 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
 	header = IHDR(inode, ext3_raw_inode(&is->iloc));
 	if (!IS_LAST_ENTRY(s->first)) {
 		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
-		EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+		ext3_set_inode_state(inode, EXT3_STATE_XATTR);
 	} else {
 		header->h_magic = cpu_to_le32(0);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+		ext3_clear_inode_state(inode, EXT3_STATE_XATTR);
 	}
 	return 0;
 }
@@ -967,10 +967,10 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 	if (error)
 		goto cleanup;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) {
 		struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
 		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
+		ext3_clear_inode_state(inode, EXT3_STATE_NEW);
 	}
 
 	error = ext3_xattr_ibody_find(inode, &i, &is);
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 6b049030fbe6..e6590f8f0b3c 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -202,14 +202,6 @@ static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
 		return flags & EXT3_OTHER_FLMASK;
 }
 
-/*
- * Inode dynamic state flags
- */
-#define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
-#define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
-#define EXT3_STATE_XATTR		0x00000004 /* has in-inode xattrs */
-#define EXT3_STATE_FLUSH_ON_CLOSE	0x00000008
-
 /* Used to pass group descriptor data when online resize is done */
 struct ext3_new_group_input {
 	__u32 group;            /* Group number for this data */
@@ -560,6 +552,31 @@ static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
 		(ino >= EXT3_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
 }
+
+/*
+ * Inode dynamic state flags
+ */
+enum {
+	EXT3_STATE_JDATA,		/* journaled data exists */
+	EXT3_STATE_NEW,			/* inode is newly created */
+	EXT3_STATE_XATTR,		/* has in-inode xattrs */
+	EXT3_STATE_FLUSH_ON_CLOSE,	/* flush dirty pages on close */
+};
+
+static inline int ext3_test_inode_state(struct inode *inode, int bit)
+{
+	return test_bit(bit, &EXT3_I(inode)->i_state);
+}
+
+static inline void ext3_set_inode_state(struct inode *inode, int bit)
+{
+	set_bit(bit, &EXT3_I(inode)->i_state);
+}
+
+static inline void ext3_clear_inode_state(struct inode *inode, int bit)
+{
+	clear_bit(bit, &EXT3_I(inode)->i_state);
+}
 #else
 /* Assume that user mode programs are passing in an ext3fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 93e7428156ba..7679acdb519a 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -87,7 +87,7 @@ struct ext3_inode_info {
 	 * near to their parent directory's inode.
 	 */
 	__u32	i_block_group;
-	__u32	i_state;		/* Dynamic state flags for ext3 */
+	unsigned long	i_state;	/* Dynamic state flags for ext3 */
 
 	/* block reservation info */
 	struct ext3_block_alloc_info *i_block_alloc_info;
-- 
cgit v1.2.3


From c7e8d4d6dceeb6fd236991f590d3fa6f97c59874 Mon Sep 17 00:00:00 2001
From: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Date: Fri, 5 Feb 2010 14:13:33 +0100
Subject: jbd[2]: remove references to BUFFER_DEBUG

CONFIG_BUFFER_DEBUG seems to have been removed from the documentation
somewhere around 2.4.15 and seemingly hasn't been available even
longer. It is, however, still referenced at one place from the jbd
code (one is a copy of the other header). Time to clean it up

Signed-off-by: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/jbd.h  | 11 -----------
 include/linux/jbd2.h | 11 -----------
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 331530cd3cc6..f3aa59cb675d 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -246,19 +246,8 @@ typedef struct journal_superblock_s
 
 #define J_ASSERT(assert)	BUG_ON(!(assert))
 
-#if defined(CONFIG_BUFFER_DEBUG)
-void buffer_assertion_failure(struct buffer_head *bh);
-#define J_ASSERT_BH(bh, expr)						\
-	do {								\
-		if (!(expr))						\
-			buffer_assertion_failure(bh);			\
-		J_ASSERT(expr);						\
-	} while (0)
-#define J_ASSERT_JH(jh, expr)	J_ASSERT_BH(jh2bh(jh), expr)
-#else
 #define J_ASSERT_BH(bh, expr)	J_ASSERT(expr)
 #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
-#endif
 
 #if defined(JBD_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)		J_ASSERT(expr)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 638ce4554c76..4cf619161ed0 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -284,19 +284,8 @@ typedef struct journal_superblock_s
 
 #define J_ASSERT(assert)	BUG_ON(!(assert))
 
-#if defined(CONFIG_BUFFER_DEBUG)
-void buffer_assertion_failure(struct buffer_head *bh);
-#define J_ASSERT_BH(bh, expr)						\
-	do {								\
-		if (!(expr))						\
-			buffer_assertion_failure(bh);			\
-		J_ASSERT(expr);						\
-	} while (0)
-#define J_ASSERT_JH(jh, expr)	J_ASSERT_BH(jh2bh(jh), expr)
-#else
 #define J_ASSERT_BH(bh, expr)	J_ASSERT(expr)
 #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
-#endif
 
 #if defined(JBD2_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)		J_ASSERT(expr)
-- 
cgit v1.2.3


From c469070aea5a0ada45a836937c776fd3083dae2b Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Tue, 9 Feb 2010 17:53:36 +0100
Subject: quota: manage reserved space when quota is not active [v2]

Since we implemented generic reserved space management interface,
then it is possible to account reserved space even when quota
is not active (similar to i_blocks/i_bytes).

Without this patch following testcase result in massive comlain from
WARN_ON in dquot_claim_space()

TEST_CASE:
mount /dev/sdb /mnt -oquota
dd if=/dev/zero of=/mnt/test bs=1M count=1
quotaon /mnt
# fs_reserved_spave == 1Mb
# quota_reserved_space == 0, because quota was disabled
dd if=/dev/zero of=/mnt/test seek=1 bs=1M count=1
# fs_reserved_spave == 2Mb
# quota_reserved_space == 1Mb
sync  # ->dquot_claim_space() -> WARN_ON

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 10 ++++++----
 include/linux/quotaops.h | 11 +++++++++--
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f6eaf0d8fd6a..f11255b18b58 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1354,28 +1354,30 @@ static qsize_t *inode_reserved_space(struct inode * inode)
 	return inode->i_sb->dq_op->get_reserved_space(inode);
 }
 
-static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+void inode_add_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) += number;
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_add_rsv_space);
 
-
-static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+void inode_claim_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) -= number;
 	__inode_add_bytes(inode, number);
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_claim_rsv_space);
 
-static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+void inode_sub_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) -= number;
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_sub_rsv_space);
 
 static qsize_t inode_get_rsv_space(struct inode *inode)
 {
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 3ebb23153640..a529d86e7e73 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -26,6 +26,10 @@ static inline void writeout_quota_sb(struct super_block *sb, int type)
 		sb->s_qcop->quota_sync(sb, type);
 }
 
+void inode_add_rsv_space(struct inode *inode, qsize_t number);
+void inode_claim_rsv_space(struct inode *inode, qsize_t number);
+void inode_sub_rsv_space(struct inode *inode, qsize_t number);
+
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
@@ -42,7 +46,6 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number);
 int dquot_reserve_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_claim_space(struct inode *inode, qsize_t number);
 void dquot_release_reserved_space(struct inode *inode, qsize_t number);
-qsize_t dquot_get_reserved_space(struct inode *inode);
 
 int dquot_free_space(struct inode *inode, qsize_t number);
 int dquot_free_inode(const struct inode *inode, qsize_t number);
@@ -199,6 +202,8 @@ static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
 		if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA)
 			return 1;
 	}
+	else
+		inode_add_rsv_space(inode, nr);
 	return 0;
 }
 
@@ -221,7 +226,7 @@ static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
 		if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA)
 			return 1;
 	} else
-		inode_add_bytes(inode, nr);
+		inode_claim_rsv_space(inode, nr);
 
 	mark_inode_dirty(inode);
 	return 0;
@@ -235,6 +240,8 @@ void vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_active(inode->i_sb))
 		inode->i_sb->dq_op->release_rsv(inode, nr);
+	else
+		inode_sub_rsv_space(inode, nr);
 }
 
 static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
-- 
cgit v1.2.3


From 8c4e4acd660a09e571a71583b5bbe1eee700c9ad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 16 Feb 2010 03:44:51 -0500
Subject: quota: clean up Q_XQUOTASYNC

Currently Q_XQUOTASYNC calls into the quota_sync method, but XFS does something
entirely different in it than the rest of the filesystems.  xfs_quota which
calls Q_XQUOTASYNC expects an asynchronous data writeout to flush delayed
allocations, while the "VFS" quota support wants to flush changes to the quota
file.

So make Q_XQUOTASYNC call into the writeback code directly and make the
quota_sync method optional as XFS doesn't need in the sense expected by the
rest of the quota code.

GFS2 was using limited XFS-style quota and has a quota_sync method fitting
neither the style used by vfs_quota_sync nor xfs_fs_quota_sync.  I left it
in for now as per discussion with Steve it expects to be called from the
sync path this way.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota.c                | 11 +++++++----
 fs/xfs/linux-2.6/xfs_quotaops.c | 15 ---------------
 include/linux/quotaops.h        |  2 +-
 3 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index d0efe302b1c1..3d31228082ea 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,7 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/types.h>
+#include <linux/writeback.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
@@ -52,7 +53,7 @@ void sync_quota_sb(struct super_block *sb, int type)
 {
 	int cnt;
 
-	if (!sb->s_qcop->quota_sync)
+	if (!sb->s_qcop || !sb->s_qcop->quota_sync)
 		return;
 
 	sb->s_qcop->quota_sync(sb, type);
@@ -318,9 +319,11 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 	case Q_XGETQUOTA:
 		return quota_getxquota(sb, type, id, addr);
 	case Q_XQUOTASYNC:
-		if (!sb->s_qcop->quota_sync)
-			return -ENOSYS;
-		return sb->s_qcop->quota_sync(sb, type);
+		/* caller already holds s_umount */
+		if (sb->s_flags & MS_RDONLY)
+			return -EROFS;
+		writeback_inodes_sb(sb);
+		return 0;
 	default:
 		return -EINVAL;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 3d4a0c84d634..07d67c624922 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -43,20 +43,6 @@ xfs_quota_type(int type)
 	}
 }
 
-STATIC int
-xfs_fs_quota_sync(
-	struct super_block	*sb,
-	int			type)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	if (sb->s_flags & MS_RDONLY)
-		return -EROFS;
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
-	return -xfs_sync_data(mp, 0);
-}
-
 STATIC int
 xfs_fs_get_xstate(
 	struct super_block	*sb,
@@ -151,7 +137,6 @@ xfs_fs_set_xquota(
 }
 
 const struct quotactl_ops xfs_quotactl_operations = {
-	.quota_sync		= xfs_fs_quota_sync,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
 	.get_xquota		= xfs_fs_get_xquota,
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a529d86e7e73..69d26bc0f884 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -22,7 +22,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 void sync_quota_sb(struct super_block *sb, int type);
 static inline void writeout_quota_sb(struct super_block *sb, int type)
 {
-	if (sb->s_qcop->quota_sync)
+	if (sb->s_qcop && sb->s_qcop->quota_sync)
 		sb->s_qcop->quota_sync(sb, type);
 }
 
-- 
cgit v1.2.3


From 5fb324ad24febe57a8a2e62903dcb7bad546ea71 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 16 Feb 2010 03:44:52 -0500
Subject: quota: move code from sync_quota_sb into vfs_quota_sync

Currenly sync_quota_sb does a lot of sync and truncate action that only
applies to "VFS" style quotas and is actively harmful for the sync
performance in XFS.  Move it into vfs_quota_sync and add a wait parameter
to ->quota_sync to tell if we need it or not.

My audit of the GFS2 code says it's also not needed given the way GFS2
implements quotas, but I'd be happy if this can get a detailed review.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/gfs2/quota.c          |  9 +++++++--
 fs/gfs2/quota.h          |  2 +-
 fs/gfs2/super.c          |  2 +-
 fs/gfs2/sys.c            |  2 +-
 fs/quota/dquot.c         | 29 ++++++++++++++++++++++++++++-
 fs/quota/quota.c         | 46 +++++-----------------------------------------
 fs/sync.c                | 14 +++++++-------
 include/linux/quota.h    |  2 +-
 include/linux/quotaops.h | 17 +----------------
 9 files changed, 52 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e3bf6eab8750..6dbcbad6ab17 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1083,7 +1083,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	}
 }
 
-int gfs2_quota_sync(struct super_block *sb, int type)
+int gfs2_quota_sync(struct super_block *sb, int type, int wait)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_quota_data **qda;
@@ -1127,6 +1127,11 @@ int gfs2_quota_sync(struct super_block *sb, int type)
 	return error;
 }
 
+static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
+{
+	return gfs2_quota_sync(sb, type, 0);
+}
+
 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
 {
 	struct gfs2_quota_data *qd;
@@ -1382,7 +1387,7 @@ int gfs2_quotad(void *data)
 					   &tune->gt_statfs_quantum);
 
 		/* Update quota file */
-		quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
+		quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t,
 				   &quotad_timeo, &tune->gt_quota_quantum);
 
 		/* Check for & recover partially truncated inodes */
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e271fa07ad02..195f60c8bd14 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -25,7 +25,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
 extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 			      u32 uid, u32 gid);
 
-extern int gfs2_quota_sync(struct super_block *sb, int type);
+extern int gfs2_quota_sync(struct super_block *sb, int type, int wait);
 extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
 
 extern int gfs2_quota_init(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9dd3da22c0a..a8c2bcd0fcc8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -764,7 +764,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	int error;
 
 	flush_workqueue(gfs2_delete_workqueue);
-	gfs2_quota_sync(sdp->sd_vfs, 0);
+	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
 	gfs2_statfs_sync(sdp->sd_vfs, 0);
 
 	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 0dc34621f6a6..4496cc37a0fa 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -167,7 +167,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
 	if (simple_strtol(buf, NULL, 0) != 1)
 		return -EINVAL;
 
-	gfs2_quota_sync(sdp->sd_vfs, 0);
+	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
 	return len;
 }
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 6c849de5dc8f..4c2213f7ed36 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -570,7 +570,7 @@ out:
 }
 EXPORT_SYMBOL(dquot_scan_active);
 
-int vfs_quota_sync(struct super_block *sb, int type)
+int vfs_quota_sync(struct super_block *sb, int type, int wait)
 {
 	struct list_head *dirty;
 	struct dquot *dquot;
@@ -615,6 +615,33 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	spin_unlock(&dq_list_lock);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
+	if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
+		return 0;
+
+	/* This is not very clever (and fast) but currently I don't know about
+	 * any other simple way of getting quota data to disk and we must get
+	 * them there for userspace to be visible... */
+	if (sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, 1);
+	sync_blockdev(sb->s_bdev);
+
+	/*
+	 * Now when everything is written we can discard the pagecache so
+	 * that userspace sees the changes.
+	 */
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (type != -1 && cnt != type)
+			continue;
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+		mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
+				  I_MUTEX_QUOTA);
+		truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
+		mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
+	}
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+
 	return 0;
 }
 EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 3d31228082ea..0593b229656c 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -48,44 +48,6 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 	return security_quotactl(cmd, type, id, sb);
 }
 
-#ifdef CONFIG_QUOTA
-void sync_quota_sb(struct super_block *sb, int type)
-{
-	int cnt;
-
-	if (!sb->s_qcop || !sb->s_qcop->quota_sync)
-		return;
-
-	sb->s_qcop->quota_sync(sb, type);
-
-	if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
-		return;
-	/* This is not very clever (and fast) but currently I don't know about
-	 * any other simple way of getting quota data to disk and we must get
-	 * them there for userspace to be visible... */
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
-
-	/*
-	 * Now when everything is written we can discard the pagecache so
-	 * that userspace sees the changes.
-	 */
-	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (type != -1 && cnt != type)
-			continue;
-		if (!sb_has_quota_active(sb, cnt))
-			continue;
-		mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
-				  I_MUTEX_QUOTA);
-		truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
-		mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
-	}
-	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-}
-#endif
-
 static int quota_sync_all(int type)
 {
 	struct super_block *sb;
@@ -101,6 +63,9 @@ static int quota_sync_all(int type)
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!sb->s_qcop || !sb->s_qcop->quota_sync)
+			continue;
+
 		/* This test just improves performance so it needn't be
 		 * reliable... */
 		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -119,7 +84,7 @@ restart:
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root)
-			sync_quota_sb(sb, type);
+			sb->s_qcop->quota_sync(sb, type, 1);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (__put_super_and_need_restart(sb))
@@ -306,8 +271,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 	case Q_SYNC:
 		if (!sb->s_qcop->quota_sync)
 			return -ENOSYS;
-		sync_quota_sb(sb, type);
-		return 0;
+		return sb->s_qcop->quota_sync(sb, type, 1);
 	case Q_XQUOTAON:
 	case Q_XQUOTAOFF:
 	case Q_XQUOTARM:
diff --git a/fs/sync.c b/fs/sync.c
index 418727a2a239..f557d71cb097 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -34,14 +34,14 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	if (!sb->s_bdi)
 		return 0;
 
-	/* Avoid doing twice syncing and cache pruning for quota sync */
-	if (!wait) {
-		writeout_quota_sb(sb, -1);
-		writeback_inodes_sb(sb);
-	} else {
-		sync_quota_sb(sb, -1);
+	if (sb->s_qcop && sb->s_qcop->quota_sync)
+		sb->s_qcop->quota_sync(sb, -1, wait);
+
+	if (wait)
 		sync_inodes_sb(sb);
-	}
+	else
+		writeback_inodes_sb(sb);
+
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
 	return __sync_blockdev(sb->s_bdev, wait);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index a6861f117480..570348cbccb1 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -324,7 +324,7 @@ struct dquot_operations {
 struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, char *, int);
 	int (*quota_off)(struct super_block *, int, int);
-	int (*quota_sync)(struct super_block *, int);
+	int (*quota_sync)(struct super_block *, int, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 69d26bc0f884..8cfd0d44c994 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -19,13 +19,6 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 /*
  * declaration of quota_function calls in kernel.
  */
-void sync_quota_sb(struct super_block *sb, int type);
-static inline void writeout_quota_sb(struct super_block *sb, int type)
-{
-	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, type);
-}
-
 void inode_add_rsv_space(struct inode *inode, qsize_t number);
 void inode_claim_rsv_space(struct inode *inode, qsize_t number);
 void inode_sub_rsv_space(struct inode *inode, qsize_t number);
@@ -67,7 +60,7 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int vfs_quota_off(struct super_block *sb, int type, int remount);
 int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
-int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_quota_sync(struct super_block *sb, int type, int wait);
 int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
@@ -340,14 +333,6 @@ static inline void vfs_dq_free_inode(struct inode *inode)
 {
 }
 
-static inline void sync_quota_sb(struct super_block *sb, int type)
-{
-}
-
-static inline void writeout_quota_sb(struct super_block *sb, int type)
-{
-}
-
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	return 0;
-- 
cgit v1.2.3


From ad1e6e8da9fe8cb7ecfde8eabacedc3b50fceae4 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Tue, 16 Feb 2010 08:31:49 +0300
Subject: quota: sb_quota state flags cleanup

- remove hardcoded USRQUOTA/GRPQUOTA flags
- convert int to bool for appropriate functions

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         |  3 ++-
 include/linux/quota.h    | 15 +++++++--------
 include/linux/quotaops.h | 31 +++++++++++++++++--------------
 3 files changed, 26 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 4c2213f7ed36..5a831dc5ab28 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1301,7 +1301,7 @@ int dquot_initialize(struct inode *inode, int type)
 {
 	unsigned int id = 0;
 	int cnt, ret = 0;
-	struct dquot *got[MAXQUOTAS] = { NULL, NULL };
+	struct dquot *got[MAXQUOTAS];
 	struct super_block *sb = inode->i_sb;
 	qsize_t rsv;
 
@@ -1312,6 +1312,7 @@ int dquot_initialize(struct inode *inode, int type)
 
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		got[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
 		switch (cnt) {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 570348cbccb1..92547a57e25a 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -357,26 +357,25 @@ enum {
 #define DQUOT_STATE_FLAGS	(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
 				 DQUOT_SUSPENDED)
 /* Other quota flags */
-#define DQUOT_QUOTA_SYS_FILE	(1 << 6)	/* Quota file is a special
+#define DQUOT_STATE_LAST	(_DQUOT_STATE_FLAGS * MAXQUOTAS)
+#define DQUOT_QUOTA_SYS_FILE	(1 << DQUOT_STATE_LAST)
+						/* Quota file is a special
 						 * system file and user cannot
 						 * touch it. Filesystem is
 						 * responsible for setting
 						 * S_NOQUOTA, S_NOATIME flags
 						 */
-#define DQUOT_NEGATIVE_USAGE	(1 << 7)	/* Allow negative quota usage */
+#define DQUOT_NEGATIVE_USAGE	(1 << (DQUOT_STATE_LAST + 1))
+					       /* Allow negative quota usage */
 
 static inline unsigned int dquot_state_flag(unsigned int flags, int type)
 {
-	if (type == USRQUOTA)
-		return flags;
-	return flags << _DQUOT_STATE_FLAGS;
+	return flags << _DQUOT_STATE_FLAGS * type;
 }
 
 static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
 {
-	if (type == USRQUOTA)
-		return flags;
-	return flags >> _DQUOT_STATE_FLAGS;
+	return (flags >> _DQUOT_STATE_FLAGS * type) & DQUOT_STATE_FLAGS;
 }
 
 #ifdef CONFIG_QUOTA_NETLINK_INTERFACE
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 8cfd0d44c994..e563a20cff4f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -79,53 +79,56 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
  * Functions for checking status of quota
  */
 
-static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
+static inline bool sb_has_quota_usage_enabled(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_USAGE_ENABLED, type);
 }
 
-static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
+static inline bool sb_has_quota_limits_enabled(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
 }
 
-static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+static inline bool sb_has_quota_suspended(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_SUSPENDED, type);
 }
 
-static inline int sb_any_quota_suspended(struct super_block *sb)
+static inline unsigned sb_any_quota_suspended(struct super_block *sb)
 {
-	return sb_has_quota_suspended(sb, USRQUOTA) ||
-		sb_has_quota_suspended(sb, GRPQUOTA);
+	unsigned type, tmsk = 0;
+	for (type = 0; type < MAXQUOTAS; type++)
+		tmsk |= sb_has_quota_suspended(sb, type) << type;
+	return tmsk;
 }
 
 /* Does kernel know about any quota information for given sb + type? */
-static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+static inline bool sb_has_quota_loaded(struct super_block *sb, int type)
 {
 	/* Currently if anything is on, then quota usage is on as well */
 	return sb_has_quota_usage_enabled(sb, type);
 }
 
-static inline int sb_any_quota_loaded(struct super_block *sb)
+static inline unsigned sb_any_quota_loaded(struct super_block *sb)
 {
-	return sb_has_quota_loaded(sb, USRQUOTA) ||
-		sb_has_quota_loaded(sb, GRPQUOTA);
+	unsigned type, tmsk = 0;
+	for (type = 0; type < MAXQUOTAS; type++)
+		tmsk |= sb_has_quota_loaded(sb, type) << type;
+	return	tmsk;
 }
 
-static inline int sb_has_quota_active(struct super_block *sb, int type)
+static inline bool sb_has_quota_active(struct super_block *sb, int type)
 {
 	return sb_has_quota_loaded(sb, type) &&
 	       !sb_has_quota_suspended(sb, type);
 }
 
-static inline int sb_any_quota_active(struct super_block *sb)
+static inline unsigned sb_any_quota_active(struct super_block *sb)
 {
-	return sb_has_quota_active(sb, USRQUOTA) ||
-	       sb_has_quota_active(sb, GRPQUOTA);
+	return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb);
 }
 
 /*
-- 
cgit v1.2.3


From 8ddd69d6df4758bf0cab981481af24cc84419567 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Tue, 16 Feb 2010 08:31:50 +0300
Subject: quota: generalize quota transfer interface

Current quota transfer interface support only uid/gid.
This patch extend interface in order to support various quotas types
The goal is accomplished without changes in most frequently used
vfs_dq_transfer() func.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 32 ++++++++++++++++++++------------
 include/linux/quota.h    |  2 +-
 include/linux/quotaops.h |  2 +-
 3 files changed, 22 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5a831dc5ab28..4d2041fddefc 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1695,15 +1695,13 @@ EXPORT_SYMBOL(dquot_free_inode);
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
  */
-int dquot_transfer(struct inode *inode, struct iattr *iattr)
+int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
 {
 	qsize_t space, cur_space;
 	qsize_t rsv_space = 0;
 	struct dquot *transfer_from[MAXQUOTAS];
 	struct dquot *transfer_to[MAXQUOTAS];
 	int cnt, ret = QUOTA_OK;
-	int chuid = iattr->ia_valid & ATTR_UID && inode->i_uid != iattr->ia_uid,
-	    chgid = iattr->ia_valid & ATTR_GID && inode->i_gid != iattr->ia_gid;
 	char warntype_to[MAXQUOTAS];
 	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
@@ -1717,13 +1715,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		transfer_to[cnt] = NULL;
 		warntype_to[cnt] = QUOTA_NL_NOWARN;
 	}
-	if (chuid)
-		transfer_to[USRQUOTA] = dqget(inode->i_sb, iattr->ia_uid,
-					      USRQUOTA);
-	if (chgid)
-		transfer_to[GRPQUOTA] = dqget(inode->i_sb, iattr->ia_gid,
-					      GRPQUOTA);
-
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (mask & (1 << cnt))
+			transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
+	}
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */
 		up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1799,12 +1794,25 @@ over_quota:
 }
 EXPORT_SYMBOL(dquot_transfer);
 
-/* Wrapper for transferring ownership of an inode */
+/* Wrapper for transferring ownership of an inode for uid/gid only
+ * Called from FSXXX_setattr()
+ */
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 {
+	qid_t chid[MAXQUOTAS];
+	unsigned long mask = 0;
+
+	if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) {
+		mask |= 1 << USRQUOTA;
+		chid[USRQUOTA] = iattr->ia_uid;
+	}
+	if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) {
+		mask |= 1 << GRPQUOTA;
+		chid[GRPQUOTA] = iattr->ia_gid;
+	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		vfs_dq_init(inode);
-		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+		if (inode->i_sb->dq_op->transfer(inode, chid, mask) == NO_QUOTA)
 			return 1;
 	}
 	return 0;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 92547a57e25a..edf34f2fe87d 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -301,7 +301,7 @@ struct dquot_operations {
 	int (*alloc_inode) (const struct inode *, qsize_t);
 	int (*free_space) (struct inode *, qsize_t);
 	int (*free_inode) (const struct inode *, qsize_t);
-	int (*transfer) (struct inode *, struct iattr *);
+	int (*transfer) (struct inode *, qid_t *, unsigned long);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e563a20cff4f..e1cae204b5d9 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -43,7 +43,7 @@ void dquot_release_reserved_space(struct inode *inode, qsize_t number);
 int dquot_free_space(struct inode *inode, qsize_t number);
 int dquot_free_inode(const struct inode *inode, qsize_t number);
 
-int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask);
 int dquot_commit(struct dquot *dquot);
 int dquot_acquire(struct dquot *dquot);
 int dquot_release(struct dquot *dquot);
-- 
cgit v1.2.3


From 5dd4056db84387975140ff2568eaa0406f07985e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:00 -0500
Subject: dquot: cleanup space allocation / freeing routines

Get rid of the alloc_space, free_space, reserve_space, claim_space and
release_rsv dquot operations - they are always called from the filesystem
and if a filesystem really needs their own (which none currently does)
it can just call into it's own routine directly.

Move shared logic into the common __dquot_alloc_space,
dquot_claim_space_nodirty and __dquot_free_space low-level methods,
and rationalize the wrappers around it to move as much as possible
code into the common block for CONFIG_QUOTA vs not.  Also rename
all these helpers to be named dquot_* instead of vfs_dq_*.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |   6 +-
 fs/ext2/balloc.c                  |  12 ++-
 fs/ext2/xattr.c                   |  10 +-
 fs/ext3/balloc.c                  |  11 +-
 fs/ext3/inode.c                   |   2 +-
 fs/ext3/super.c                   |   2 -
 fs/ext3/xattr.c                   |   8 +-
 fs/ext4/inode.c                   |  20 ++--
 fs/ext4/mballoc.c                 |   6 +-
 fs/ext4/super.c                   |   5 -
 fs/ext4/xattr.c                   |   8 +-
 fs/jfs/jfs_dtree.c                |  28 ++---
 fs/jfs/jfs_extent.c               |  16 +--
 fs/jfs/jfs_xtree.c                |  21 ++--
 fs/jfs/xattr.c                    |  17 ++--
 fs/ocfs2/alloc.c                  |  13 ++-
 fs/ocfs2/aops.c                   |  11 +-
 fs/ocfs2/dir.c                    |  37 +++----
 fs/ocfs2/file.c                   |  11 +-
 fs/ocfs2/namei.c                  |   9 +-
 fs/ocfs2/quota_global.c           |   2 -
 fs/quota/dquot.c                  |  79 +++++----------
 fs/reiserfs/bitmap.c              |  10 +-
 fs/reiserfs/stree.c               |  20 ++--
 fs/reiserfs/super.c               |   2 -
 fs/udf/balloc.c                   |  35 ++++---
 fs/ufs/balloc.c                   |  24 +++--
 include/linux/quota.h             |   8 --
 include/linux/quotaops.h          | 208 +++++++++++---------------------------
 29 files changed, 258 insertions(+), 383 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 18b9d0ca0630..1192fde11638 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -462,9 +462,7 @@ in sys_read() and friends.
 prototypes:
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*alloc_space) (struct inode *, qsize_t, int);
 	int (*alloc_inode) (const struct inode *, unsigned long);
-	int (*free_space) (struct inode *, qsize_t);
 	int (*free_inode) (const struct inode *, unsigned long);
 	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);
@@ -481,9 +479,7 @@ What filesystem should expect from the generic quota functions:
 		FS recursion	Held locks when called
 initialize:	yes		maybe dqonoff_sem
 drop:		yes		-
-alloc_space:	->mark_dirty()	-
 alloc_inode:	->mark_dirty()	-
-free_space:	->mark_dirty()	-
 free_inode:	->mark_dirty()	-
 transfer:	yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
@@ -495,7 +491,7 @@ write_info:	yes		dqonoff_sem
 FS recursion means calling ->quota_read() and ->quota_write() from superblock
 operations.
 
-->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called
+->alloc_inode(), ->free_inode() are called
 only directly by the filesystem and do not call any fs functions only
 the ->mark_dirty() operation.
 
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 7f8d2e5a7ea6..1d081f0cfec2 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -570,7 +570,7 @@ do_more:
 error_return:
 	brelse(bitmap_bh);
 	release_blocks(sb, freed);
-	vfs_dq_free_block(inode, freed);
+	dquot_free_block(inode, freed);
 }
 
 /**
@@ -1236,6 +1236,7 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
 	unsigned short windowsz = 0;
 	unsigned long ngroups;
 	unsigned long num = *count;
+	int ret;
 
 	*errp = -ENOSPC;
 	sb = inode->i_sb;
@@ -1247,8 +1248,9 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (vfs_dq_alloc_block(inode, num)) {
-		*errp = -EDQUOT;
+	ret = dquot_alloc_block(inode, num);
+	if (ret) {
+		*errp = ret;
 		return 0;
 	}
 
@@ -1409,7 +1411,7 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
-	vfs_dq_free_block(inode, *count-num);
+	dquot_free_block(inode, *count-num);
 	*count = num;
 	return ret_block;
 
@@ -1420,7 +1422,7 @@ out:
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
-		vfs_dq_free_block(inode, *count);
+		dquot_free_block(inode, *count);
 	brelse(bitmap_bh);
 	return 0;
 }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 904f00642f84..e44dc92609be 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -644,8 +644,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 				   the inode.  */
 				ea_bdebug(new_bh, "reusing block");
 
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1)) {
+				error = dquot_alloc_block(inode, 1);
+				if (error) {
 					unlock_buffer(new_bh);
 					goto cleanup;
 				}
@@ -702,7 +702,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 		 * as if nothing happened and cleanup the unused block */
 		if (error && error != -ENOSPC) {
 			if (new_bh && new_bh != old_bh)
-				vfs_dq_free_block(inode, 1);
+				dquot_free_block(inode, 1);
 			goto cleanup;
 		}
 	} else
@@ -734,7 +734,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 			le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
 			if (ce)
 				mb_cache_entry_release(ce);
-			vfs_dq_free_block(inode, 1);
+			dquot_free_block(inode, 1);
 			mark_buffer_dirty(old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
 				le32_to_cpu(HDR(old_bh)->h_refcount));
@@ -797,7 +797,7 @@ ext2_xattr_delete_inode(struct inode *inode)
 		mark_buffer_dirty(bh);
 		if (IS_SYNC(inode))
 			sync_dirty_buffer(bh);
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 	}
 	EXT2_I(inode)->i_file_acl = 0;
 
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 27967f92e820..161da2d3f890 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -676,7 +676,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
 	}
 	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
 	if (dquot_freed_blocks)
-		vfs_dq_free_block(inode, dquot_freed_blocks);
+		dquot_free_block(inode, dquot_freed_blocks);
 	return;
 }
 
@@ -1502,8 +1502,9 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (vfs_dq_alloc_block(inode, num)) {
-		*errp = -EDQUOT;
+	err = dquot_alloc_block(inode, num);
+	if (err) {
+		*errp = err;
 		return 0;
 	}
 
@@ -1713,7 +1714,7 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
-	vfs_dq_free_block(inode, *count-num);
+	dquot_free_block(inode, *count-num);
 	*count = num;
 	return ret_block;
 
@@ -1728,7 +1729,7 @@ out:
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
-		vfs_dq_free_block(inode, *count);
+		dquot_free_block(inode, *count);
 	brelse(bitmap_bh);
 	return 0;
 }
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index eda9121d7d57..20f02d69365c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3336,7 +3336,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * i_size has been changed by generic_commit_write() and we thus need
  * to include the updated inode in the current transaction.
  *
- * Also, vfs_dq_alloc_space() will always dirty the inode when blocks
+ * Also, dquot_alloc_space() will always dirty the inode when blocks
  * are allocated to the file.
  *
  * If the inode is marked synchronous, we don't honour that here - doing
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 5c54e5f685d4..8c13910a3782 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -752,9 +752,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ext3_write_dquot,
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 2d2fb2a85961..534a94c3a933 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -500,7 +500,7 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode,
 		error = ext3_journal_dirty_metadata(handle, bh);
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
@@ -775,8 +775,8 @@ inserted:
 			else {
 				/* The old block is released after updating
 				   the inode. */
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1))
+				error = dquot_alloc_block(inode, 1);
+				if (error)
 					goto cleanup;
 				error = ext3_journal_get_write_access(handle,
 								      new_bh);
@@ -850,7 +850,7 @@ cleanup:
 	return error;
 
 cleanup_dquot:
-	vfs_dq_free_block(inode, 1);
+	dquot_free_block(inode, 1);
 	goto cleanup;
 
 bad_block:
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e11952404e02..9f607ea411c8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1093,9 +1093,9 @@ void ext4_da_update_reserve_space(struct inode *inode,
 
 	/* Update quota subsystem */
 	if (quota_claim) {
-		vfs_dq_claim_block(inode, used);
+		dquot_claim_block(inode, used);
 		if (mdb_free)
-			vfs_dq_release_reservation_block(inode, mdb_free);
+			dquot_release_reservation_block(inode, mdb_free);
 	} else {
 		/*
 		 * We did fallocate with an offset that is already delayed
@@ -1106,8 +1106,8 @@ void ext4_da_update_reserve_space(struct inode *inode,
 		 * that
 		 */
 		if (allocated_meta_blocks)
-			vfs_dq_claim_block(inode, allocated_meta_blocks);
-		vfs_dq_release_reservation_block(inode, mdb_free + used);
+			dquot_claim_block(inode, allocated_meta_blocks);
+		dquot_release_reservation_block(inode, mdb_free + used);
 	}
 
 	/*
@@ -1836,6 +1836,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned long md_needed, md_reserved;
+	int ret;
 
 	/*
 	 * recalculate the amount of metadata blocks to reserve
@@ -1853,11 +1854,12 @@ repeat:
 	 * later. Real quota accounting is done at pages writeout
 	 * time.
 	 */
-	if (vfs_dq_reserve_block(inode, md_needed + 1))
-		return -EDQUOT;
+	ret = dquot_reserve_block(inode, md_needed + 1);
+	if (ret)
+		return ret;
 
 	if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
-		vfs_dq_release_reservation_block(inode, md_needed + 1);
+		dquot_release_reservation_block(inode, md_needed + 1);
 		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
 			yield();
 			goto repeat;
@@ -1914,7 +1916,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-	vfs_dq_release_reservation_block(inode, to_free);
+	dquot_release_reservation_block(inode, to_free);
 }
 
 static void ext4_da_page_release_reservation(struct page *page,
@@ -5641,7 +5643,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * i_size has been changed by generic_commit_write() and we thus need
  * to include the updated inode in the current transaction.
  *
- * Also, vfs_dq_alloc_block() will always dirty the inode when blocks
+ * Also, dquot_alloc_block() will always dirty the inode when blocks
  * are allocated to the file.
  *
  * If the inode is marked synchronous, we don't honour that here - doing
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d34afad3e137..0b905781e8e6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4254,7 +4254,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 			return 0;
 		}
 		reserv_blks = ar->len;
-		while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) {
+		while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
 			ar->flags |= EXT4_MB_HINT_NOPREALLOC;
 			ar->len--;
 		}
@@ -4331,7 +4331,7 @@ out2:
 	kmem_cache_free(ext4_ac_cachep, ac);
 out1:
 	if (inquota && ar->len < inquota)
-		vfs_dq_free_block(ar->inode, inquota - ar->len);
+		dquot_free_block(ar->inode, inquota - ar->len);
 out3:
 	if (!ar->len) {
 		if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
@@ -4646,7 +4646,7 @@ do_more:
 	sb->s_dirt = 1;
 error_return:
 	if (freed)
-		vfs_dq_free_block(inode, freed);
+		dquot_free_block(inode, freed);
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
 	if (ac)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 735c20d5fd56..fa8f4deda652 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1014,15 +1014,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 static const struct dquot_operations ext4_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
-	.reserve_space	= dquot_reserve_space,
-	.claim_space	= dquot_claim_space,
-	.release_rsv	= dquot_release_reserved_space,
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
 	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ext4_write_dquot,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f3a2f7ed45aa..ab3a95ee5e7e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -494,7 +494,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		error = ext4_handle_dirty_metadata(handle, inode, bh);
 		if (IS_SYNC(inode))
 			ext4_handle_sync(handle);
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
@@ -787,8 +787,8 @@ inserted:
 			else {
 				/* The old block is released after updating
 				   the inode. */
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1))
+				error = dquot_alloc_block(inode, 1);
+				if (error)
 					goto cleanup;
 				error = ext4_journal_get_write_access(handle,
 								      new_bh);
@@ -876,7 +876,7 @@ cleanup:
 	return error;
 
 cleanup_dquot:
-	vfs_dq_free_block(inode, 1);
+	dquot_free_block(inode, 1);
 	goto cleanup;
 
 bad_block:
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 925871e9887b..0e4623be70ce 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -381,10 +381,10 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 		 * It's time to move the inline table to an external
 		 * page and begin to build the xtree
 		 */
-		if (vfs_dq_alloc_block(ip, sbi->nbperpage))
+		if (dquot_alloc_block(ip, sbi->nbperpage))
 			goto clean_up;
 		if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
-			vfs_dq_free_block(ip, sbi->nbperpage);
+			dquot_free_block(ip, sbi->nbperpage);
 			goto clean_up;
 		}
 
@@ -408,7 +408,7 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 			memcpy(&jfs_ip->i_dirtable, temp_table,
 			       sizeof (temp_table));
 			dbFree(ip, xaddr, sbi->nbperpage);
-			vfs_dq_free_block(ip, sbi->nbperpage);
+			dquot_free_block(ip, sbi->nbperpage);
 			goto clean_up;
 		}
 		ip->i_size = PSIZE;
@@ -1027,10 +1027,9 @@ static int dtSplitUp(tid_t tid,
 			n = xlen;
 
 		/* Allocate blocks to quota. */
-		if (vfs_dq_alloc_block(ip, n)) {
-			rc = -EDQUOT;
+		rc = dquot_alloc_block(ip, n);
+		if (rc)
 			goto extendOut;
-		}
 		quota_allocation += n;
 
 		if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
@@ -1308,7 +1307,7 @@ static int dtSplitUp(tid_t tid,
 
 	/* Rollback quota allocation */
 	if (rc && quota_allocation)
-		vfs_dq_free_block(ip, quota_allocation);
+		dquot_free_block(ip, quota_allocation);
 
       dtSplitUp_Exit:
 
@@ -1369,9 +1368,10 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 		return -EIO;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
@@ -1892,6 +1892,7 @@ static int dtSplitRoot(tid_t tid,
 	struct dt_lock *dtlck;
 	struct tlock *tlck;
 	struct lv *lv;
+	int rc;
 
 	/* get split root page */
 	smp = split->mp;
@@ -1916,9 +1917,10 @@ static int dtSplitRoot(tid_t tid,
 	rp = rmp->data;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	BT_MARK_DIRTY(rmp, ip);
@@ -2287,7 +2289,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 	xlen = lengthPXD(&fp->header.self);
 
 	/* Free quota allocation. */
-	vfs_dq_free_block(ip, xlen);
+	dquot_free_block(ip, xlen);
 
 	/* free/invalidate its buffer page */
 	discard_metapage(fmp);
@@ -2363,7 +2365,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 				xlen = lengthPXD(&p->header.self);
 
 				/* Free quota allocation */
-				vfs_dq_free_block(ip, xlen);
+				dquot_free_block(ip, xlen);
 
 				/* free/invalidate its buffer page */
 				discard_metapage(mp);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 41d6045dbeb0..5d3bbd10f8db 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -141,10 +141,11 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 	}
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nxlen)) {
+	rc = dquot_alloc_block(ip, nxlen);
+	if (rc) {
 		dbFree(ip, nxaddr, (s64) nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
-		return -EDQUOT;
+		return rc;
 	}
 
 	/* determine the value of the extent flag */
@@ -164,7 +165,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 	 */
 	if (rc) {
 		dbFree(ip, nxaddr, nxlen);
-		vfs_dq_free_block(ip, nxlen);
+		dquot_free_block(ip, nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return (rc);
 	}
@@ -256,10 +257,11 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 		goto exit;
 
 	/* Allocat blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nxlen)) {
+	rc = dquot_alloc_block(ip, nxlen);
+	if (rc) {
 		dbFree(ip, nxaddr, (s64) nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
-		return -EDQUOT;
+		return rc;
 	}
 
 	delta = nxlen - xlen;
@@ -297,7 +299,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 		/* extend the extent */
 		if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
 			dbFree(ip, xaddr + xlen, delta);
-			vfs_dq_free_block(ip, nxlen);
+			dquot_free_block(ip, nxlen);
 			goto exit;
 		}
 	} else {
@@ -308,7 +310,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 		 */
 		if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
 			dbFree(ip, nxaddr, nxlen);
-			vfs_dq_free_block(ip, nxlen);
+			dquot_free_block(ip, nxlen);
 			goto exit;
 		}
 	}
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index d654a6458648..6c50871e6220 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -585,10 +585,10 @@ int xtInsert(tid_t tid,		/* transaction id */
 			hint = addressXAD(xad) + lengthXAD(xad) - 1;
 		} else
 			hint = 0;
-		if ((rc = vfs_dq_alloc_block(ip, xlen)))
+		if ((rc = dquot_alloc_block(ip, xlen)))
 			goto out;
 		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
-			vfs_dq_free_block(ip, xlen);
+			dquot_free_block(ip, xlen);
 			goto out;
 		}
 	}
@@ -617,7 +617,7 @@ int xtInsert(tid_t tid,		/* transaction id */
 			/* undo data extent allocation */
 			if (*xaddrp == 0) {
 				dbFree(ip, xaddr, (s64) xlen);
-				vfs_dq_free_block(ip, xlen);
+				dquot_free_block(ip, xlen);
 			}
 			return rc;
 		}
@@ -985,10 +985,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
 	rbn = addressPXD(pxd);
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
-		rc = -EDQUOT;
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc)
 		goto clean_up;
-	}
 
 	quota_allocation += lengthPXD(pxd);
 
@@ -1195,7 +1194,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
 
 	/* Rollback quota allocation. */
 	if (quota_allocation)
-		vfs_dq_free_block(ip, quota_allocation);
+		dquot_free_block(ip, quota_allocation);
 
 	return (rc);
 }
@@ -1235,6 +1234,7 @@ xtSplitRoot(tid_t tid,
 	struct pxdlist *pxdlist;
 	struct tlock *tlck;
 	struct xtlock *xtlck;
+	int rc;
 
 	sp = &JFS_IP(ip)->i_xtroot;
 
@@ -1252,9 +1252,10 @@ xtSplitRoot(tid_t tid,
 		return -EIO;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
@@ -3680,7 +3681,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 		ip->i_size = newsize;
 
 	/* update quota allocation to reflect freed blocks */
-	vfs_dq_free_block(ip, nfreed);
+	dquot_free_block(ip, nfreed);
 
 	/*
 	 * free tlock of invalidated pages
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index fad364548bc9..1f594ab21895 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -260,14 +260,14 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size,
 	nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits;
 
 	/* Allocate new blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nblocks)) {
-		return -EDQUOT;
-	}
+	rc = dquot_alloc_block(ip, nblocks);
+	if (rc)
+		return rc;
 
 	rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno);
 	if (rc) {
 		/*Rollback quota allocation. */
-		vfs_dq_free_block(ip, nblocks);
+		dquot_free_block(ip, nblocks);
 		return rc;
 	}
 
@@ -332,7 +332,7 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size,
 
       failed:
 	/* Rollback quota allocation. */
-	vfs_dq_free_block(ip, nblocks);
+	dquot_free_block(ip, nblocks);
 
 	dbFree(ip, blkno, nblocks);
 	return rc;
@@ -538,7 +538,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
 
 	if (blocks_needed > current_blocks) {
 		/* Allocate new blocks to quota. */
-		if (vfs_dq_alloc_block(inode, blocks_needed))
+		rc = dquot_alloc_block(inode, blocks_needed);
+		if (rc)
 			return -EDQUOT;
 
 		quota_allocation = blocks_needed;
@@ -602,7 +603,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
       clean_up:
 	/* Rollback quota allocation */
 	if (quota_allocation)
-		vfs_dq_free_block(inode, quota_allocation);
+		dquot_free_block(inode, quota_allocation);
 
 	return (rc);
 }
@@ -677,7 +678,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
 
 	/* If old blocks exist, they must be removed from quota allocation. */
 	if (old_blocks)
-		vfs_dq_free_block(inode, old_blocks);
+		dquot_free_block(inode, old_blocks);
 
 	inode->i_ctime = CURRENT_TIME;
 
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d17bdc718f74..20538dd832a4 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5712,7 +5712,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
-	vfs_dq_free_space_nodirty(inode,
+	dquot_free_space_nodirty(inode,
 				  ocfs2_clusters_to_bytes(inode->i_sb, len));
 
 	ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
@@ -6935,7 +6935,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	vfs_dq_free_space_nodirty(inode,
+	dquot_free_space_nodirty(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
@@ -7300,11 +7300,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		unsigned int page_end;
 		u64 phys;
 
-		if (vfs_dq_alloc_space_nodirty(inode,
-				       ocfs2_clusters_to_bytes(osb->sb, 1))) {
-			ret = -EDQUOT;
+		ret = dquot_alloc_space_nodirty(inode,
+				       ocfs2_clusters_to_bytes(osb->sb, 1));
+		if (ret)
 			goto out_commit;
-		}
 		did_quota = 1;
 
 		ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
@@ -7380,7 +7379,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(inode,
+		dquot_free_space_nodirty(inode,
 					  ocfs2_clusters_to_bytes(osb->sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 7e9df11260f4..7d04c171567d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1763,10 +1763,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
 	wc->w_handle = handle;
 
-	if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
-			ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
-		ret = -EDQUOT;
-		goto out_commit;
+	if (clusters_to_alloc) {
+		ret = dquot_alloc_space_nodirty(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
+		if (ret)
+			goto out_commit;
 	}
 	/*
 	 * We don't want this to fail in ocfs2_write_end(), so do it
@@ -1809,7 +1810,7 @@ success:
 	return 0;
 out_quota:
 	if (clusters_to_alloc)
-		vfs_dq_free_space(inode,
+		dquot_free_space(inode,
 			  ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
 out_commit:
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 28c3ec238796..a63ea4d74e67 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2964,12 +2964,10 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				ocfs2_clusters_to_bytes(osb->sb,
-							alloc + dx_alloc))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+		ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
@@ -3178,7 +3176,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir, bytes_allocated);
+		dquot_free_space_nodirty(dir, bytes_allocated);
 
 	ocfs2_commit_trans(osb, handle);
 
@@ -3221,11 +3219,10 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 	if (extend) {
 		u32 offset = OCFS2_I(dir)->ip_clusters;
 
-		if (vfs_dq_alloc_space_nodirty(dir,
-					ocfs2_clusters_to_bytes(sb, 1))) {
-			status = -EDQUOT;
+		status = dquot_alloc_space_nodirty(dir,
+					ocfs2_clusters_to_bytes(sb, 1));
+		if (status)
 			goto bail;
-		}
 		did_quota = 1;
 
 		status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
@@ -3254,7 +3251,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 	status = 0;
 bail:
 	if (did_quota && status < 0)
-		vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
+		dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
 	mlog_exit(status);
 	return status;
 }
@@ -3889,11 +3886,10 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				       ocfs2_clusters_to_bytes(dir->i_sb, 1))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+				       ocfs2_clusters_to_bytes(dir->i_sb, 1));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
@@ -3983,7 +3979,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir,
+		dquot_free_space_nodirty(dir,
 				ocfs2_clusters_to_bytes(dir->i_sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
@@ -4165,11 +4161,10 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				       ocfs2_clusters_to_bytes(osb->sb, 1))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+				       ocfs2_clusters_to_bytes(osb->sb, 1));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	/*
@@ -4229,7 +4224,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir,
+		dquot_free_space_nodirty(dir,
 					  ocfs2_clusters_to_bytes(dir->i_sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 558ce0312421..6cf3d8d18369 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -629,11 +629,10 @@ restart_all:
 	}
 
 restarted_transaction:
-	if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
-	    clusters_to_add))) {
-		status = -EDQUOT;
+	status = dquot_alloc_space_nodirty(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
+	if (status)
 		goto leave;
-	}
 	did_quota = 1;
 
 	/* reserve a write to the file entry early on - that we if we
@@ -674,7 +673,7 @@ restarted_transaction:
 	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 	/* Release unused quota reservation */
-	vfs_dq_free_space(inode,
+	dquot_free_space(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
 	did_quota = 0;
 
@@ -710,7 +709,7 @@ restarted_transaction:
 
 leave:
 	if (status < 0 && did_quota)
-		vfs_dq_free_space(inode,
+		dquot_free_space(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
 	if (handle) {
 		ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 50fb26a6a5f5..13adaa1f40cd 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1716,11 +1716,10 @@ static int ocfs2_symlink(struct inode *dir,
 		u32 offset = 0;
 
 		inode->i_op = &ocfs2_symlink_inode_operations;
-		if (vfs_dq_alloc_space_nodirty(inode,
-		    ocfs2_clusters_to_bytes(osb->sb, 1))) {
-			status = -EDQUOT;
+		status = dquot_alloc_space_nodirty(inode,
+		    ocfs2_clusters_to_bytes(osb->sb, 1));
+		if (status)
 			goto bail;
-		}
 		did_quota = 1;
 		status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
 					      new_fe_bh,
@@ -1788,7 +1787,7 @@ static int ocfs2_symlink(struct inode *dir,
 	d_instantiate(dentry, inode);
 bail:
 	if (status < 0 && did_quota)
-		vfs_dq_free_space_nodirty(inode,
+		dquot_free_space_nodirty(inode,
 					ocfs2_clusters_to_bytes(osb->sb, 1));
 	if (status < 0 && did_quota_inode)
 		vfs_dq_free_inode(inode);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index b437dc0c4cad..aa66fb277225 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -853,9 +853,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ocfs2_write_dquot,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 10d021dd37c1..baf202c012cc 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1464,28 +1464,29 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
 }
 
 /*
- * Following four functions update i_blocks+i_bytes fields and
- * quota information (together with appropriate checks)
- * NOTE: We absolutely rely on the fact that caller dirties
- * the inode (usually macros in quotaops.h care about this) and
- * holds a handle for the current transaction so that dquot write and
- * inode write go into the same transaction.
+ * This functions updates i_blocks+i_bytes fields and quota information
+ * (together with appropriate checks).
+ *
+ * NOTE: We absolutely rely on the fact that caller dirties the inode
+ * (usually helpers in quotaops.h care about this) and holds a handle for
+ * the current transaction so that dquot write and inode write go into the
+ * same transaction.
  */
 
 /*
  * This operation can block, but only after everything is updated
  */
 int __dquot_alloc_space(struct inode *inode, qsize_t number,
-			int warn, int reserve)
+		int warn, int reserve)
 {
-	int cnt, ret = QUOTA_OK;
+	int cnt, ret = 0;
 	char warntype[MAXQUOTAS];
 
 	/*
 	 * First test before acquiring mutex - solves deadlocks when we
 	 * re-enter the quota code and are already holding the mutex
 	 */
-	if (IS_NOQUOTA(inode)) {
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_incr_space(inode, number, reserve);
 		goto out;
 	}
@@ -1498,9 +1499,9 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
-		    == NO_QUOTA) {
-			ret = NO_QUOTA;
+		if (check_bdq(inode->i_dquot[cnt], number, !warn, warntype+cnt)
+				== NO_QUOTA) {
+			ret = -EDQUOT;
 			spin_unlock(&dq_data_lock);
 			goto out_flush_warn;
 		}
@@ -1525,18 +1526,7 @@ out_flush_warn:
 out:
 	return ret;
 }
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
-	return __dquot_alloc_space(inode, number, warn, 0);
-}
-EXPORT_SYMBOL(dquot_alloc_space);
-
-int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
-{
-	return __dquot_alloc_space(inode, number, warn, 1);
-}
-EXPORT_SYMBOL(dquot_reserve_space);
+EXPORT_SYMBOL(__dquot_alloc_space);
 
 /*
  * This operation can block, but only after everything is updated
@@ -1578,14 +1568,16 @@ warn_put_all:
 }
 EXPORT_SYMBOL(dquot_alloc_inode);
 
-int dquot_claim_space(struct inode *inode, qsize_t number)
+/*
+ * Convert in-memory reserved quotas to real consumed quotas
+ */
+int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
 	int cnt;
-	int ret = QUOTA_OK;
 
-	if (IS_NOQUOTA(inode)) {
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_claim_rsv_space(inode, number);
-		goto out;
+		return 0;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1601,24 +1593,23 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return ret;
+	return 0;
 }
-EXPORT_SYMBOL(dquot_claim_space);
+EXPORT_SYMBOL(dquot_claim_space_nodirty);
 
 /*
  * This operation can block, but only after everything is updated
  */
-int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
+void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode)) {
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_decr_space(inode, number, reserve);
-		return QUOTA_OK;
+		return;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1641,24 +1632,8 @@ int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 out_unlock:
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	return QUOTA_OK;
-}
-
-int dquot_free_space(struct inode *inode, qsize_t number)
-{
-	return  __dquot_free_space(inode, number, 0);
-}
-EXPORT_SYMBOL(dquot_free_space);
-
-/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
-	__dquot_free_space(inode, number, 1);
-
 }
-EXPORT_SYMBOL(dquot_release_reserved_space);
+EXPORT_SYMBOL(__dquot_free_space);
 
 /*
  * This operation can block, but only after everything is updated
@@ -1840,9 +1815,7 @@ EXPORT_SYMBOL(dquot_commit_info);
 const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= dquot_commit,
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 65c872761177..dc014f7def05 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -425,7 +425,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 
 	journal_mark_dirty(th, s, sbh);
 	if (for_unformatted)
-		vfs_dq_free_block_nodirty(inode, 1);
+		dquot_free_block_nodirty(inode, 1);
 }
 
 void reiserfs_free_block(struct reiserfs_transaction_handle *th,
@@ -1049,7 +1049,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
 			       amount_needed, hint->inode->i_uid);
 #endif
 		quota_ret =
-		    vfs_dq_alloc_block_nodirty(hint->inode, amount_needed);
+		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
 		if (quota_ret)	/* Quota exceeded? */
 			return QUOTA_EXCEEDED;
 		if (hint->preallocate && hint->prealloc_size) {
@@ -1058,7 +1058,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
 				       "reiserquota: allocating (prealloc) %d blocks id=%u",
 				       hint->prealloc_size, hint->inode->i_uid);
 #endif
-			quota_ret = vfs_dq_prealloc_block_nodirty(hint->inode,
+			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
 							 hint->prealloc_size);
 			if (quota_ret)
 				hint->preallocate = hint->prealloc_size = 0;
@@ -1092,7 +1092,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
 					       hint->inode->i_uid);
 #endif
 				/* Free not allocated blocks */
-				vfs_dq_free_block_nodirty(hint->inode,
+				dquot_free_block_nodirty(hint->inode,
 					amount_needed + hint->prealloc_size -
 					nr_allocated);
 			}
@@ -1125,7 +1125,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
 			       REISERFS_I(hint->inode)->i_prealloc_count,
 			       hint->inode->i_uid);
 #endif
-		vfs_dq_free_block_nodirty(hint->inode, amount_needed +
+		dquot_free_block_nodirty(hint->inode, amount_needed +
 					 hint->prealloc_size - nr_allocated -
 					 REISERFS_I(hint->inode)->
 					 i_prealloc_count);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 5fa7118f04e1..313d39d639eb 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1299,7 +1299,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
 		       "reiserquota delete_item(): freeing %u, id=%u type=%c",
 		       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
 #endif
-	vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
+	dquot_free_space_nodirty(inode, quota_cut_bytes);
 
 	/* Return deleted body length */
 	return ret_value;
@@ -1383,7 +1383,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
 					       quota_cut_bytes, inode->i_uid,
 					       key2type(key));
 #endif
-				vfs_dq_free_space_nodirty(inode,
+				dquot_free_space_nodirty(inode,
 							 quota_cut_bytes);
 			}
 			break;
@@ -1733,7 +1733,7 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
 		       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
 		       quota_cut_bytes, inode->i_uid, '?');
 #endif
-	vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
+	dquot_free_space_nodirty(inode, quota_cut_bytes);
 	return ret_value;
 }
 
@@ -1968,9 +1968,10 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
 		       key2type(&(key->on_disk_key)));
 #endif
 
-	if (vfs_dq_alloc_space_nodirty(inode, pasted_size)) {
+	retval = dquot_alloc_space_nodirty(inode, pasted_size);
+	if (retval) {
 		pathrelse(search_path);
-		return -EDQUOT;
+		return retval;
 	}
 	init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
 		       pasted_size);
@@ -2024,7 +2025,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
 		       pasted_size, inode->i_uid,
 		       key2type(&(key->on_disk_key)));
 #endif
-	vfs_dq_free_space_nodirty(inode, pasted_size);
+	dquot_free_space_nodirty(inode, pasted_size);
 	return retval;
 }
 
@@ -2062,9 +2063,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
 #endif
 		/* We can't dirty inode here. It would be immediately written but
 		 * appropriate stat item isn't inserted yet... */
-		if (vfs_dq_alloc_space_nodirty(inode, quota_bytes)) {
+		retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+		if (retval) {
 			pathrelse(path);
-			return -EDQUOT;
+			return retval;
 		}
 	}
 	init_tb_struct(th, &s_ins_balance, th->t_super, path,
@@ -2113,6 +2115,6 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
 		       quota_bytes, inode->i_uid, head2type(ih));
 #endif
 	if (inode)
-		vfs_dq_free_space_nodirty(inode, quota_bytes);
+		dquot_free_space_nodirty(inode, quota_bytes);
 	return retval;
 }
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b4a7dd03bdb9..ea4a77e9d7f5 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -618,9 +618,7 @@ static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
-	.alloc_space = dquot_alloc_space,
 	.alloc_inode = dquot_alloc_inode,
-	.free_space = dquot_free_space,
 	.free_inode = dquot_free_inode,
 	.transfer = dquot_transfer,
 	.write_dquot = reiserfs_write_dquot,
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 82372e332f08..e2ff180173a2 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -208,7 +208,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
 					((char *)bh->b_data)[(bit + i) >> 3]);
 			} else {
 				if (inode)
-					vfs_dq_free_block(inode, 1);
+					dquot_free_block(inode, 1);
 				udf_add_free_space(sb, sbi->s_partition, 1);
 			}
 		}
@@ -260,11 +260,11 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
 		while (bit < (sb->s_blocksize << 3) && block_count > 0) {
 			if (!udf_test_bit(bit, bh->b_data))
 				goto out;
-			else if (vfs_dq_prealloc_block(inode, 1))
+			else if (dquot_prealloc_block(inode, 1))
 				goto out;
 			else if (!udf_clear_bit(bit, bh->b_data)) {
 				udf_debug("bit already cleared for block %d\n", bit);
-				vfs_dq_free_block(inode, 1);
+				dquot_free_block(inode, 1);
 				goto out;
 			}
 			block_count--;
@@ -390,10 +390,14 @@ got_block:
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (inode && vfs_dq_alloc_block(inode, 1)) {
-		mutex_unlock(&sbi->s_alloc_mutex);
-		*err = -EDQUOT;
-		return 0;
+	if (inode) {
+		int ret = dquot_alloc_block(inode, 1);
+
+		if (ret) {
+			mutex_unlock(&sbi->s_alloc_mutex);
+			*err = ret;
+			return 0;
+		}
 	}
 
 	newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) -
@@ -449,7 +453,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 	/* We do this up front - There are some error conditions that
 	   could occure, but.. oh well */
 	if (inode)
-		vfs_dq_free_block(inode, count);
+		dquot_free_block(inode, count);
 	udf_add_free_space(sb, sbi->s_partition, count);
 
 	start = bloc->logicalBlockNum + offset;
@@ -694,7 +698,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 		epos.offset -= adsize;
 
 		alloc_count = (elen >> sb->s_blocksize_bits);
-		if (inode && vfs_dq_prealloc_block(inode,
+		if (inode && dquot_prealloc_block(inode,
 			alloc_count > block_count ? block_count : alloc_count))
 			alloc_count = 0;
 		else if (alloc_count > block_count) {
@@ -797,12 +801,13 @@ static int udf_table_new_block(struct super_block *sb,
 	newblock = goal_eloc.logicalBlockNum;
 	goal_eloc.logicalBlockNum++;
 	goal_elen -= sb->s_blocksize;
-
-	if (inode && vfs_dq_alloc_block(inode, 1)) {
-		brelse(goal_epos.bh);
-		mutex_unlock(&sbi->s_alloc_mutex);
-		*err = -EDQUOT;
-		return 0;
+	if (inode) {
+		*err = dquot_alloc_block(inode, 1);
+		if (*err) {
+			brelse(goal_epos.bh);
+			mutex_unlock(&sbi->s_alloc_mutex);
+			return 0;
+		}
 	}
 
 	if (goal_elen)
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 54c16ec95dff..5cfa4d85ccf2 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -85,7 +85,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
 				   "bit already cleared for fragment %u", i);
 	}
 	
-	vfs_dq_free_block(inode, count);
+	dquot_free_block(inode, count);
 
 	
 	fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
@@ -195,7 +195,7 @@ do_more:
 		ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
 		if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
 			ufs_clusteracct (sb, ucpi, blkno, 1);
-		vfs_dq_free_block(inode, uspi->s_fpb);
+		dquot_free_block(inode, uspi->s_fpb);
 
 		fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
 		uspi->cs_total.cs_nbfree++;
@@ -511,6 +511,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 	struct ufs_cg_private_info * ucpi;
 	struct ufs_cylinder_group * ucg;
 	unsigned cgno, fragno, fragoff, count, fragsize, i;
+	int ret;
 	
 	UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
 	     (unsigned long long)fragment, oldcount, newcount);
@@ -556,8 +557,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 		fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1);
 	for (i = oldcount; i < newcount; i++)
 		ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i);
-	if (vfs_dq_alloc_block(inode, count)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, count);
+	if (ret) {
+		*err = ret;
 		return 0;
 	}
 
@@ -596,6 +598,7 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
 	struct ufs_cylinder_group * ucg;
 	unsigned oldcg, i, j, k, allocsize;
 	u64 result;
+	int ret;
 	
 	UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
 	     inode->i_ino, cgno, (unsigned long long)goal, count);
@@ -664,7 +667,7 @@ cg_found:
 		for (i = count; i < uspi->s_fpb; i++)
 			ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
 		i = uspi->s_fpb - count;
-		vfs_dq_free_block(inode, i);
+		dquot_free_block(inode, i);
 
 		fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
 		uspi->cs_total.cs_nffree += i;
@@ -676,8 +679,9 @@ cg_found:
 	result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
 	if (result == INVBLOCK)
 		return 0;
-	if (vfs_dq_alloc_block(inode, count)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, count);
+	if (ret) {
+		*err = ret;
 		return 0;
 	}
 	for (i = 0; i < count; i++)
@@ -714,6 +718,7 @@ static u64 ufs_alloccg_block(struct inode *inode,
 	struct ufs_super_block_first * usb1;
 	struct ufs_cylinder_group * ucg;
 	u64 result, blkno;
+	int ret;
 
 	UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
 
@@ -747,8 +752,9 @@ gotit:
 	ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
 	if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
 		ufs_clusteracct (sb, ucpi, blkno, -1);
-	if (vfs_dq_alloc_block(inode, uspi->s_fpb)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, uspi->s_fpb);
+	if (ret) {
+		*err = ret;
 		return INVBLOCK;
 	}
 
diff --git a/include/linux/quota.h b/include/linux/quota.h
index edf34f2fe87d..1b14ad287fe3 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -297,9 +297,7 @@ struct quota_format_ops {
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*alloc_space) (struct inode *, qsize_t, int);
 	int (*alloc_inode) (const struct inode *, qsize_t);
-	int (*free_space) (struct inode *, qsize_t);
 	int (*free_inode) (const struct inode *, qsize_t);
 	int (*transfer) (struct inode *, qid_t *, unsigned long);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
@@ -309,12 +307,6 @@ struct dquot_operations {
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
-	/* reserve quota for delayed block allocation */
-	int (*reserve_space) (struct inode *, qsize_t, int);
-	/* claim reserved quota for delayed alloc */
-	int (*claim_space) (struct inode *, qsize_t);
-	/* release rsved quota for delayed alloc */
-	void (*release_rsv) (struct inode *, qsize_t);
 	/* get reserved quota for delayed alloc, value returned is managed by
 	 * quota code only */
 	qsize_t *(*get_reserved_space) (struct inode *);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e1cae204b5d9..47e85682e118 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -33,14 +33,13 @@ int dquot_scan_active(struct super_block *sb,
 struct dquot *dquot_alloc(struct super_block *sb, int type);
 void dquot_destroy(struct dquot *dquot);
 
-int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_alloc_inode(const struct inode *inode, qsize_t number);
+int __dquot_alloc_space(struct inode *inode, qsize_t number,
+		int warn, int reserve);
+void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
 
-int dquot_reserve_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_claim_space(struct inode *inode, qsize_t number);
-void dquot_release_reserved_space(struct inode *inode, qsize_t number);
+int dquot_alloc_inode(const struct inode *inode, qsize_t number);
 
-int dquot_free_space(struct inode *inode, qsize_t number);
+int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
 int dquot_free_inode(const struct inode *inode, qsize_t number);
 
 int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask);
@@ -149,60 +148,6 @@ static inline void vfs_dq_init(struct inode *inode)
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
-/* The following allocation/freeing/transfer functions *must* be called inside
- * a transaction (deadlocks possible otherwise) */
-static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
-			return 1;
-	}
-	else
-		inode_add_bytes(inode, nr);
-	return 0;
-}
-
-static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
-{
-	int ret;
-        if (!(ret =  vfs_dq_prealloc_space_nodirty(inode, nr)))
-		mark_inode_dirty(inode);
-	return ret;
-}
-
-static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
-			return 1;
-	}
-	else
-		inode_add_bytes(inode, nr);
-	return 0;
-}
-
-static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
-{
-	int ret;
-	if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
-		mark_inode_dirty(inode);
-	return ret;
-}
-
-static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA)
-			return 1;
-	}
-	else
-		inode_add_rsv_space(inode, nr);
-	return 0;
-}
-
 static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	if (sb_any_quota_active(inode->i_sb)) {
@@ -213,47 +158,6 @@ static inline int vfs_dq_alloc_inode(struct inode *inode)
 	return 0;
 }
 
-/*
- * Convert in-memory reserved quotas to real consumed quotas
- */
-static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA)
-			return 1;
-	} else
-		inode_claim_rsv_space(inode, nr);
-
-	mark_inode_dirty(inode);
-	return 0;
-}
-
-/*
- * Release reserved (in-memory) quotas
- */
-static inline
-void vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->release_rsv(inode, nr);
-	else
-		inode_sub_rsv_space(inode, nr);
-}
-
-static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->free_space(inode, nr);
-	else
-		inode_sub_bytes(inode, nr);
-}
-
-static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
-{
-	vfs_dq_free_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-}
-
 static inline void vfs_dq_free_inode(struct inode *inode)
 {
 	if (sb_any_quota_active(inode->i_sb))
@@ -351,105 +255,109 @@ static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 	return 0;
 }
 
-static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
+static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
+		int warn, int reserve)
 {
-	inode_add_bytes(inode, nr);
+	if (!reserve)
+		inode_add_bytes(inode, number);
 	return 0;
 }
 
-static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
+static inline void __dquot_free_space(struct inode *inode, qsize_t number,
+		int reserve)
 {
-	vfs_dq_prealloc_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-	return 0;
+	if (!reserve)
+		inode_sub_bytes(inode, number);
 }
 
-static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
-	inode_add_bytes(inode, nr);
+	inode_add_bytes(inode, number);
 	return 0;
 }
 
-static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
+#endif /* CONFIG_QUOTA */
+
+static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_alloc_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-	return 0;
+	return __dquot_alloc_space(inode, nr, 1, 0);
 }
 
-static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
 {
-	return 0;
+	int ret;
+
+	ret = dquot_alloc_space_nodirty(inode, nr);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_alloc_space(inode, nr);
+	return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
-static inline
-int vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
 {
-	return 0;
+	return dquot_alloc_space(inode, nr << inode->i_blkbits);
 }
 
-static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	inode_sub_bytes(inode, nr);
+	return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0);
 }
 
-static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
+static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_free_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-}	
-
-#endif /* CONFIG_QUOTA */
+	int ret;
 
-static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
-{
-	return vfs_dq_prealloc_space_nodirty(inode, nr << inode->i_blkbits);
+	ret = dquot_prealloc_block_nodirty(inode, nr);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
+static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_prealloc_space(inode, nr << inode->i_blkbits);
+	return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1);
 }
 
-static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits);
-}
+	int ret;
 
-static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
-{
-	return vfs_dq_alloc_space(inode, nr << inode->i_blkbits);
+	ret = dquot_claim_space_nodirty(inode, nr << inode->i_blkbits);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
+static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_reserve_space(inode, nr << inode->i_blkbits);
+	__dquot_free_space(inode, nr, 0);
 }
 
-static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr)
+static inline void dquot_free_space(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_claim_space(inode, nr << inode->i_blkbits);
+	dquot_free_space_nodirty(inode, nr);
+	mark_inode_dirty(inode);
 }
 
-static inline
-void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr)
+static inline void dquot_free_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_release_reservation_space(inode, nr << inode->i_blkbits);
+	dquot_free_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
-static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
+static inline void dquot_free_block(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_free_space_nodirty(inode, nr << inode->i_blkbits);
+	dquot_free_space(inode, nr << inode->i_blkbits);
 }
 
-static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
+static inline void dquot_release_reservation_block(struct inode *inode,
+		qsize_t nr)
 {
-	vfs_dq_free_space(inode, nr << inode->i_blkbits);
+	__dquot_free_space(inode, nr << inode->i_blkbits, 1);
 }
 
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From 63936ddaa16b9486e2d426ed7b09f559a5c60f87 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:01 -0500
Subject: dquot: cleanup inode allocation / freeing routines

Get rid of the alloc_inode and free_inode dquot operations - they are
always called from the filesystem and if a filesystem really needs
their own (which none currently does) it can just call into it's
own routine directly.

Also get rid of the vfs_dq_alloc/vfs_dq_free wrappers and always
call the lowlevel dquot_alloc_inode / dqout_free_inode routines
directly, which now lose the number argument which is always 1.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |  8 --------
 fs/ext2/ialloc.c                  | 10 +++++-----
 fs/ext3/ialloc.c                  | 10 +++++-----
 fs/ext3/super.c                   |  2 --
 fs/ext4/ialloc.c                  | 10 +++++-----
 fs/ext4/super.c                   |  2 --
 fs/jfs/inode.c                    |  2 +-
 fs/jfs/jfs_inode.c                |  6 +++---
 fs/ocfs2/inode.c                  |  2 +-
 fs/ocfs2/namei.c                  | 30 +++++++++---------------------
 fs/ocfs2/quota_global.c           |  2 --
 fs/quota/dquot.c                  | 29 +++++++++++++----------------
 fs/reiserfs/inode.c               | 10 +++++-----
 fs/reiserfs/super.c               |  2 --
 fs/udf/ialloc.c                   | 10 ++++++----
 fs/ufs/ialloc.c                   |  7 ++++---
 include/linux/quota.h             |  2 --
 include/linux/quotaops.h          | 24 ++++--------------------
 18 files changed, 61 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 1192fde11638..4428f55f2131 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -462,8 +462,6 @@ in sys_read() and friends.
 prototypes:
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*alloc_inode) (const struct inode *, unsigned long);
-	int (*free_inode) (const struct inode *, unsigned long);
 	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
@@ -479,8 +477,6 @@ What filesystem should expect from the generic quota functions:
 		FS recursion	Held locks when called
 initialize:	yes		maybe dqonoff_sem
 drop:		yes		-
-alloc_inode:	->mark_dirty()	-
-free_inode:	->mark_dirty()	-
 transfer:	yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
@@ -491,10 +487,6 @@ write_info:	yes		dqonoff_sem
 FS recursion means calling ->quota_read() and ->quota_write() from superblock
 operations.
 
-->alloc_inode(), ->free_inode() are called
-only directly by the filesystem and do not call any fs functions only
-the ->mark_dirty() operation.
-
 More details about quota locking can be found in fs/dquot.c.
 
 --------------------------- vm_operations_struct -----------------------------
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 15387c9c17d8..d12f9809559c 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -121,7 +121,7 @@ void ext2_free_inode (struct inode * inode)
 	if (!is_bad_inode(inode)) {
 		/* Quota is already initialized in iput() */
 		ext2_xattr_delete_inode(inode);
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 		vfs_dq_drop(inode);
 	}
 
@@ -586,10 +586,10 @@ got:
 		goto fail_drop;
 	}
 
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext2_init_acl(inode, dir);
 	if (err)
@@ -605,7 +605,7 @@ got:
 	return inode;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
 	vfs_dq_drop(inode);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index b39991285136..8bf00e997c38 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -125,7 +125,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	 */
 	vfs_dq_init(inode);
 	ext3_xattr_delete_inode(handle, inode);
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 	vfs_dq_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
@@ -588,10 +588,10 @@ got:
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext3_init_acl(handle, inode, dir);
 	if (err)
@@ -619,7 +619,7 @@ really_out:
 	return ret;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
 	vfs_dq_drop(inode);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c13910a3782..8b8bc4f9cb14 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -752,8 +752,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f3624ead4f6c..b0d744cf8b95 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -219,7 +219,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	 */
 	vfs_dq_init(inode);
 	ext4_xattr_delete_inode(handle, inode);
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 	vfs_dq_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
@@ -1034,10 +1034,10 @@ got:
 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
 
 	ret = inode;
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext4_init_acl(handle, inode, dir);
 	if (err)
@@ -1074,7 +1074,7 @@ really_out:
 	return ret;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
 	vfs_dq_drop(inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fa8f4deda652..d231da8798e3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1017,8 +1017,6 @@ static const struct dquot_operations ext4_quota_operations = {
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
-	.alloc_inode	= dquot_alloc_inode,
-	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ext4_write_dquot,
 	.acquire_dquot	= ext4_acquire_dquot,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b2ae190a77ba..2562d18988f7 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -159,7 +159,7 @@ void jfs_delete_inode(struct inode *inode)
 		 * Free the inode from the quota allocation.
 		 */
 		vfs_dq_init(inode);
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 		vfs_dq_drop(inode);
 	}
 
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index dc0e02159ac9..7762f33e062b 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -116,10 +116,10 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	/*
 	 * Allocate inode to quota.
 	 */
-	if (vfs_dq_alloc_inode(inode)) {
-		rc = -EDQUOT;
+	vfs_dq_init(inode);
+	rc = dquot_alloc_inode(inode);
+	if (rc)
 		goto fail_drop;
-	}
 
 	inode->i_mode = mode;
 	/* inherit flags from parent */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 88459bdd1ff3..cb7f67d8441a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -665,7 +665,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 	}
 
 	ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 	status = ocfs2_free_dinode(handle, inode_alloc_inode,
 				   inode_alloc_bh, di);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 13adaa1f40cd..99766b6418eb 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -348,13 +348,9 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto leave;
-	}
 	did_quota_inode = 1;
 
 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
@@ -431,7 +427,7 @@ static int ocfs2_mknod(struct inode *dir,
 	status = 0;
 leave:
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
@@ -1688,13 +1684,9 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto bail;
-	}
 	did_quota_inode = 1;
 
 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry,
@@ -1790,7 +1782,7 @@ bail:
 		dquot_free_space_nodirty(inode,
 					ocfs2_clusters_to_bytes(osb->sb, 1));
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
@@ -2098,13 +2090,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 		goto leave;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto leave;
-	}
 	did_quota_inode = 1;
 
 	inode->i_nlink = 0;
@@ -2139,7 +2127,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 	insert_inode_hash(inode);
 leave:
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index aa66fb277225..ed96b3eeb13c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -853,8 +853,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index baf202c012cc..ed131318b849 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1531,15 +1531,15 @@ EXPORT_SYMBOL(__dquot_alloc_space);
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_inode(const struct inode *inode, qsize_t number)
+int dquot_alloc_inode(const struct inode *inode)
 {
-	int cnt, ret = NO_QUOTA;
+	int cnt, ret = -EDQUOT;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return 0;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1547,7 +1547,7 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_idq(inode->i_dquot[cnt], number, warntype+cnt)
+		if (check_idq(inode->i_dquot[cnt], 1, warntype+cnt)
 		    == NO_QUOTA)
 			goto warn_put_all;
 	}
@@ -1555,12 +1555,12 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		dquot_incr_inodes(inode->i_dquot[cnt], number);
+		dquot_incr_inodes(inode->i_dquot[cnt], 1);
 	}
-	ret = QUOTA_OK;
+	ret = 0;
 warn_put_all:
 	spin_unlock(&dq_data_lock);
-	if (ret == QUOTA_OK)
+	if (ret == 0)
 		mark_all_dquot_dirty(inode->i_dquot);
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1638,29 +1638,28 @@ EXPORT_SYMBOL(__dquot_free_space);
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_inode(const struct inode *inode, qsize_t number)
+void dquot_free_inode(const struct inode *inode)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return;
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
-		dquot_decr_inodes(inode->i_dquot[cnt], number);
+		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
+		dquot_decr_inodes(inode->i_dquot[cnt], 1);
 	}
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	return QUOTA_OK;
 }
 EXPORT_SYMBOL(dquot_free_inode);
 
@@ -1815,8 +1814,6 @@ EXPORT_SYMBOL(dquot_commit_info);
 const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2df0f5c7c60b..f56a3d2e6497 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -54,7 +54,7 @@ void reiserfs_delete_inode(struct inode *inode)
 		 * after delete_object so that quota updates go into the same transaction as
 		 * stat data deletion */
 		if (!err) 
-			vfs_dq_free_inode(inode);
+			dquot_free_inode(inode);
 
 		if (journal_end(&th, inode->i_sb, jbegin_count))
 			goto out;
@@ -1765,10 +1765,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 
 	BUG_ON(!th->t_trans_id);
 
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto out_end_trans;
-	}
 	if (!dir->i_nlink) {
 		err = -EPERM;
 		goto out_bad_inode;
@@ -1959,7 +1959,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	INODE_PKEY(inode)->k_objectid = 0;
 
 	/* Quota change must be inside a transaction for journaling */
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
       out_end_trans:
 	journal_end(th, th->t_super, th->t_blocks_allocated);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ea4a77e9d7f5..e942ceecf2b8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -618,8 +618,6 @@ static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
-	.alloc_inode = dquot_alloc_inode,
-	.free_inode = dquot_free_inode,
 	.transfer = dquot_transfer,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index c10fa39f97e2..e1856b89c9c8 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -36,7 +36,7 @@ void udf_free_inode(struct inode *inode)
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 	vfs_dq_drop(inode);
 
 	clear_inode(inode);
@@ -61,7 +61,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	struct super_block *sb = dir->i_sb;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct inode *inode;
-	int block;
+	int block, ret;
 	uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
 	struct udf_inode_info *iinfo;
 	struct udf_inode_info *dinfo = UDF_I(dir);
@@ -153,12 +153,14 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 
-	if (vfs_dq_alloc_inode(inode)) {
+	vfs_dq_init(inode);
+	ret = dquot_alloc_inode(inode);
+	if (ret) {
 		vfs_dq_drop(inode);
 		inode->i_flags |= S_NOQUOTA;
 		inode->i_nlink = 0;
 		iput(inode);
-		*err = -EDQUOT;
+		*err = ret;
 		return NULL;
 	}
 
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 3527c00fef0d..02f77882c573 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -95,7 +95,7 @@ void ufs_free_inode (struct inode * inode)
 
 	is_directory = S_ISDIR(inode->i_mode);
 
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 	vfs_dq_drop(inode);
 
 	clear_inode (inode);
@@ -355,9 +355,10 @@ cg_found:
 
 	unlock_super (sb);
 
-	if (vfs_dq_alloc_inode(inode)) {
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err) {
 		vfs_dq_drop(inode);
-		err = -EDQUOT;
 		goto fail_without_unlock;
 	}
 
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 1b14ad287fe3..e3b07895d327 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -297,8 +297,6 @@ struct quota_format_ops {
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*alloc_inode) (const struct inode *, qsize_t);
-	int (*free_inode) (const struct inode *, qsize_t);
 	int (*transfer) (struct inode *, qid_t *, unsigned long);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 47e85682e118..9ce7f051a4ba 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -37,10 +37,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		int warn, int reserve);
 void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
 
-int dquot_alloc_inode(const struct inode *inode, qsize_t number);
+int dquot_alloc_inode(const struct inode *inode);
 
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
-int dquot_free_inode(const struct inode *inode, qsize_t number);
+void dquot_free_inode(const struct inode *inode);
 
 int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask);
 int dquot_commit(struct dquot *dquot);
@@ -148,22 +148,6 @@ static inline void vfs_dq_init(struct inode *inode)
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
-static inline int vfs_dq_alloc_inode(struct inode *inode)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		vfs_dq_init(inode);
-		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
-			return 1;
-	}
-	return 0;
-}
-
-static inline void vfs_dq_free_inode(struct inode *inode)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->free_inode(inode, 1);
-}
-
 /* Cannot be called inside a transaction */
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
@@ -231,12 +215,12 @@ static inline void vfs_dq_drop(struct inode *inode)
 {
 }
 
-static inline int vfs_dq_alloc_inode(struct inode *inode)
+static inline int dquot_alloc_inode(const struct inode *inode)
 {
 	return 0;
 }
 
-static inline void vfs_dq_free_inode(struct inode *inode)
+static inline void dquot_free_inode(const struct inode *inode)
 {
 }
 
-- 
cgit v1.2.3


From b43fa8284d7790d9cca32c9c55e24f29be2fa33b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:03 -0500
Subject: dquot: cleanup dquot transfer routine

Get rid of the transfer dquot operation - it is now always called from
the filesystem and if a filesystem really needs it's own (which none
currently does) it can just call into it's own routine directly.

Rename the now static low-level dquot_transfer helper to __dquot_transfer
and vfs_dq_transfer to dquot_transfer to have a consistent namespace,
and make the new dquot_transfer return a normal negative errno value
which all callers expect.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |  2 --
 drivers/staging/pohmelfs/inode.c  |  2 +-
 fs/ext2/inode.c                   |  2 +-
 fs/ext3/inode.c                   |  2 +-
 fs/ext3/super.c                   |  1 -
 fs/ext4/inode.c                   |  2 +-
 fs/ext4/super.c                   |  1 -
 fs/jfs/file.c                     |  5 +++--
 fs/ocfs2/file.c                   |  4 ++--
 fs/ocfs2/quota_global.c           |  1 -
 fs/quota/dquot.c                  | 12 +++++-------
 fs/reiserfs/inode.c               |  3 +--
 fs/reiserfs/super.c               |  1 -
 fs/udf/file.c                     |  2 +-
 fs/ufs/truncate.c                 |  2 +-
 include/linux/quota.h             |  1 -
 include/linux/quotaops.h          |  5 ++---
 17 files changed, 19 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4428f55f2131..4574e0272bdd 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -462,7 +462,6 @@ in sys_read() and friends.
 prototypes:
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
 	int (*release_dquot) (struct dquot *);
@@ -477,7 +476,6 @@ What filesystem should expect from the generic quota functions:
 		FS recursion	Held locks when called
 initialize:	yes		maybe dqonoff_sem
 drop:		yes		-
-transfer:	yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 release_dquot:	yes		dqonoff_sem or dqptr_sem
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index f69b7783027f..11fc4d5c43e1 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -969,7 +969,7 @@ int pohmelfs_setattr_raw(struct inode *inode, struct iattr *attr)
 
 	if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 	    (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
-		err = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		err = dquot_transfer(inode, attr);
 		if (err)
 			goto err_out_exit;
 	}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 71b032c65a02..3cfcfd9a131a 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1459,7 +1459,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 		return error;
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
-		error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, iattr);
 		if (error)
 			return error;
 	}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 20f02d69365c..14d40a4dd6f0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3160,7 +3160,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error) {
 			ext3_journal_stop(handle);
 			return error;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8b8bc4f9cb14..f7d4a2c19dee 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -752,7 +752,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.transfer	= dquot_transfer,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
 	.release_dquot	= ext3_release_dquot,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9f607ea411c8..6a002a6d0624 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5263,7 +5263,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error) {
 			ext4_journal_stop(handle);
 			return error;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d231da8798e3..b4253fb7bab6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1017,7 +1017,6 @@ static const struct dquot_operations ext4_quota_operations = {
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
-	.transfer	= dquot_transfer,
 	.write_dquot	= ext4_write_dquot,
 	.acquire_dquot	= ext4_acquire_dquot,
 	.release_dquot	= ext4_release_dquot,
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index a4229e49330e..2c201783836f 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -100,8 +100,9 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
-		if (vfs_dq_transfer(inode, iattr))
-			return -EDQUOT;
+		rc = dquot_transfer(inode, iattr);
+		if (rc)
+			return rc;
 	}
 
 	rc = inode_setattr(inode, iattr);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6cf3d8d18369..472e8f8bc892 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1020,7 +1020,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		/*
 		 * Gather pointers to quota structures so that allocation /
 		 * freeing of quota structures happens here and not inside
-		 * vfs_dq_transfer() where we have problems with lock ordering
+		 * dquot_transfer() where we have problems with lock ordering
 		 */
 		if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
 		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
@@ -1053,7 +1053,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 			mlog_errno(status);
 			goto bail_unlock;
 		}
-		status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		status = dquot_transfer(inode, attr);
 		if (status < 0)
 			goto bail_commit;
 	} else {
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index ed96b3eeb13c..b654bd103b6f 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -853,7 +853,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.transfer	= dquot_transfer,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
 	.release_dquot	= ocfs2_release_dquot,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ed131318b849..78ce4c48ad77 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1669,7 +1669,7 @@ EXPORT_SYMBOL(dquot_free_inode);
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
  */
-int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
+static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
 {
 	qsize_t space, cur_space;
 	qsize_t rsv_space = 0;
@@ -1766,12 +1766,11 @@ over_quota:
 	ret = NO_QUOTA;
 	goto warn_put_all;
 }
-EXPORT_SYMBOL(dquot_transfer);
 
 /* Wrapper for transferring ownership of an inode for uid/gid only
  * Called from FSXXX_setattr()
  */
-int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+int dquot_transfer(struct inode *inode, struct iattr *iattr)
 {
 	qid_t chid[MAXQUOTAS];
 	unsigned long mask = 0;
@@ -1786,12 +1785,12 @@ int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		vfs_dq_init(inode);
-		if (inode->i_sb->dq_op->transfer(inode, chid, mask) == NO_QUOTA)
-			return 1;
+		if (__dquot_transfer(inode, chid, mask) == NO_QUOTA)
+			return -EDQUOT;
 	}
 	return 0;
 }
-EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(dquot_transfer);
 
 /*
  * Write info of quota file to disk
@@ -1814,7 +1813,6 @@ EXPORT_SYMBOL(dquot_commit_info);
 const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.transfer	= dquot_transfer,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f56a3d2e6497..99a5e5a8ab5a 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3134,8 +3134,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 						  jbegin_count);
 				if (error)
 					goto out;
-				error =
-				    vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+				error = dquot_transfer(inode, attr);
 				if (error) {
 					journal_end(&th, inode->i_sb,
 						    jbegin_count);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e942ceecf2b8..97c3e8ed7db6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -618,7 +618,6 @@ static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
-	.transfer = dquot_transfer,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
 	.release_dquot = reiserfs_release_dquot,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 35ca47281faa..2df7fcb677b3 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -229,7 +229,7 @@ static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
             (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
-		error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, iattr);
 		if (error)
 			return error;
 	}
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 56ab31f00bd0..87bbab685901 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -520,7 +520,7 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error)
 			return error;
 	}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e3b07895d327..422e6aa78edc 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -297,7 +297,6 @@ struct quota_format_ops {
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*transfer) (struct inode *, qid_t *, unsigned long);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 9ce7f051a4ba..fa27b7218c82 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -42,7 +42,6 @@ int dquot_alloc_inode(const struct inode *inode);
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
 void dquot_free_inode(const struct inode *inode);
 
-int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask);
 int dquot_commit(struct dquot *dquot);
 int dquot_acquire(struct dquot *dquot);
 int dquot_release(struct dquot *dquot);
@@ -66,7 +65,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 
 void vfs_dq_drop(struct inode *inode);
-int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
 static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
@@ -234,7 +233,7 @@ static inline int vfs_dq_quota_on_remount(struct super_block *sb)
 	return 0;
 }
 
-static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 9f7547580263d4a55efe06ce5cfd567f568be6e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:05 -0500
Subject: dquot: cleanup dquot drop routine

Get rid of the drop dquot operation - it is now always called from
the filesystem and if a filesystem really needs it's own (which none
currently does) it can just call into it's own routine directly.

Rename the now static low-level dquot_drop helper to __dquot_drop
and vfs_dq_drop to dquot_drop to have a consistent namespace.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |  2 --
 fs/ext2/ialloc.c                  |  4 +--
 fs/ext2/super.c                   |  2 +-
 fs/ext3/ialloc.c                  |  4 +--
 fs/ext3/super.c                   |  3 +--
 fs/ext4/ialloc.c                  |  4 +--
 fs/ext4/super.c                   |  3 +--
 fs/jfs/inode.c                    |  2 +-
 fs/jfs/jfs_inode.c                |  2 +-
 fs/jfs/super.c                    |  2 +-
 fs/ocfs2/inode.c                  |  2 +-
 fs/ocfs2/quota_global.c           |  1 -
 fs/quota/dquot.c                  | 52 +++++++++++++++++++--------------------
 fs/reiserfs/inode.c               |  2 +-
 fs/reiserfs/namei.c               |  2 +-
 fs/reiserfs/super.c               |  3 +--
 fs/udf/ialloc.c                   |  4 +--
 fs/udf/inode.c                    |  2 +-
 fs/ufs/ialloc.c                   |  4 +--
 fs/ufs/super.c                    |  2 +-
 include/linux/quota.h             |  1 -
 include/linux/quotaops.h          |  5 ++--
 22 files changed, 49 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4574e0272bdd..fa10e4bf8e5e 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -461,7 +461,6 @@ in sys_read() and friends.
 --------------------------- dquot_operations -------------------------------
 prototypes:
 	int (*initialize) (struct inode *, int);
-	int (*drop) (struct inode *);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
 	int (*release_dquot) (struct dquot *);
@@ -475,7 +474,6 @@ What filesystem should expect from the generic quota functions:
 
 		FS recursion	Held locks when called
 initialize:	yes		maybe dqonoff_sem
-drop:		yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 release_dquot:	yes		dqonoff_sem or dqptr_sem
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index d12f9809559c..88b71972c626 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -122,7 +122,7 @@ void ext2_free_inode (struct inode * inode)
 		/* Quota is already initialized in iput() */
 		ext2_xattr_delete_inode(inode);
 		dquot_free_inode(inode);
-		vfs_dq_drop(inode);
+		dquot_drop(inode);
 	}
 
 	es = EXT2_SB(sb)->s_es;
@@ -608,7 +608,7 @@ fail_free_drop:
 	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 98815d2a5664..42e4a303b675 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -195,7 +195,7 @@ static void ext2_clear_inode(struct inode *inode)
 {
 	struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
 
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	ext2_discard_reservation(inode);
 	EXT2_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 8bf00e997c38..7d7238f9f6f3 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -126,7 +126,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	vfs_dq_init(inode);
 	ext3_xattr_delete_inode(handle, inode);
 	dquot_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
 
@@ -622,7 +622,7 @@ fail_free_drop:
 	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2277b1a98e62..0163d0dae124 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -529,7 +529,7 @@ static void ext3_clear_inode(struct inode *inode)
 {
 	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
 
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	ext3_discard_reservation(inode);
 	EXT3_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
@@ -753,7 +753,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 
 static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
 	.release_dquot	= ext3_release_dquot,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b0d744cf8b95..ca8986e4b528 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -220,7 +220,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	vfs_dq_init(inode);
 	ext4_xattr_delete_inode(handle, inode);
 	dquot_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
 
@@ -1077,7 +1077,7 @@ fail_free_drop:
 	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 56554c8850ec..035516c80df2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -761,7 +761,7 @@ static void destroy_inodecache(void)
 
 static void ext4_clear_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	if (EXT4_JOURNAL(inode))
 		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
@@ -1014,7 +1014,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 
 static const struct dquot_operations ext4_quota_operations = {
 	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 2562d18988f7..22fa412c5289 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -160,7 +160,7 @@ void jfs_delete_inode(struct inode *inode)
 		 */
 		vfs_dq_init(inode);
 		dquot_free_inode(inode);
-		vfs_dq_drop(inode);
+		dquot_drop(inode);
 	}
 
 	clear_inode(inode);
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 7762f33e062b..72b30895422c 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -162,7 +162,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	return inode;
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 fail_unlock:
 	inode->i_nlink = 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4086fa593419..266699deb1c6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -133,7 +133,7 @@ static void jfs_destroy_inode(struct inode *inode)
 
 static void jfs_clear_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 }
 
 static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 13eb5d467c40..00eb6a095e68 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1087,7 +1087,7 @@ void ocfs2_clear_inode(struct inode *inode)
 	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
 			"Inode=%lu\n", inode->i_ino);
 
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	/* To preven remote deletes we hold open lock before, now it
 	 * is time to unlock PR and EX open locks. */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index b654bd103b6f..4dca38f487cf 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -852,7 +852,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 
 const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
 	.release_dquot	= ocfs2_release_dquot,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 78ce4c48ad77..cd83c5b871ba 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1358,7 +1358,7 @@ EXPORT_SYMBOL(dquot_initialize);
 /*
  * 	Release all quotas referenced by inode
  */
-int dquot_drop(struct inode *inode)
+static void __dquot_drop(struct inode *inode)
 {
 	int cnt;
 	struct dquot *put[MAXQUOTAS];
@@ -1370,32 +1370,31 @@ int dquot_drop(struct inode *inode)
 	}
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	dqput_all(put);
-	return 0;
 }
-EXPORT_SYMBOL(dquot_drop);
 
-/* Wrapper to remove references to quota structures from inode */
-void vfs_dq_drop(struct inode *inode)
-{
-	/* Here we can get arbitrary inode from clear_inode() so we have
-	 * to be careful. OTOH we don't need locking as quota operations
-	 * are allowed to change only at mount time */
-	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
-	    && inode->i_sb->dq_op->drop) {
-		int cnt;
-		/* Test before calling to rule out calls from proc and such
-                 * where we are not allowed to block. Note that this is
-		 * actually reliable test even without the lock - the caller
-		 * must assure that nobody can come after the DQUOT_DROP and
-		 * add quota pointers back anyway */
-		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-			if (inode->i_dquot[cnt])
-				break;
-		if (cnt < MAXQUOTAS)
-			inode->i_sb->dq_op->drop(inode);
-	}
-}
-EXPORT_SYMBOL(vfs_dq_drop);
+void dquot_drop(struct inode *inode)
+{
+	int cnt;
+
+	if (IS_NOQUOTA(inode))
+		return;
+
+	/*
+	 * Test before calling to rule out calls from proc and such
+	 * where we are not allowed to block. Note that this is
+	 * actually reliable test even without the lock - the caller
+	 * must assure that nobody can come after the DQUOT_DROP and
+	 * add quota pointers back anyway.
+	 */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (inode->i_dquot[cnt])
+			break;
+	}
+
+	if (cnt < MAXQUOTAS)
+		__dquot_drop(inode);
+}
+EXPORT_SYMBOL(dquot_drop);
 
 /*
  * inode_reserved_space is managed internally by quota, and protected by
@@ -1812,7 +1811,6 @@ EXPORT_SYMBOL(dquot_commit_info);
  */
 const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
@@ -2029,7 +2027,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		 * When S_NOQUOTA is set, remove dquot references as no more
 		 * references can be added
 		 */
-		sb->dq_op->drop(inode);
+		__dquot_drop(inode);
 	}
 
 	error = -EIO;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 99a5e5a8ab5a..f07c3b69247d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1964,7 +1964,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
       out_end_trans:
 	journal_end(th, th->t_super, th->t_blocks_allocated);
 	/* Drop can be outside and it needs more credits so it's better to have it outside */
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	make_bad_inode(inode);
 
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 9d4dcf0b07cb..9dea84e8a79a 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -546,7 +546,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 */
 static int drop_new_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	make_bad_inode(inode);
 	inode->i_flags |= S_NOQUOTA;
 	iput(inode);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6b24e70e329b..34f7cd0cb02d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -580,7 +580,7 @@ out:
 
 static void reiserfs_clear_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 }
 
 #ifdef CONFIG_QUOTA
@@ -623,7 +623,6 @@ static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 
 static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
-	.drop = dquot_drop,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
 	.release_dquot = reiserfs_release_dquot,
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index e1856b89c9c8..15c6e992e587 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -37,7 +37,7 @@ void udf_free_inode(struct inode *inode)
 	 * as writing the quota to disk may need the lock as well.
 	 */
 	dquot_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	clear_inode(inode);
 
@@ -156,7 +156,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	vfs_dq_init(inode);
 	ret = dquot_alloc_inode(inode);
 	if (ret) {
-		vfs_dq_drop(inode);
+		dquot_drop(inode);
 		inode->i_flags |= S_NOQUOTA;
 		inode->i_nlink = 0;
 		iput(inode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 859389a3832b..1199e8e21ee2 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -109,7 +109,7 @@ void udf_clear_inode(struct inode *inode)
 			(unsigned long long)iinfo->i_lenExtents);
 	}
 
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	kfree(iinfo->i_ext.i_data);
 	iinfo->i_ext.i_data = NULL;
 }
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 02f77882c573..67b4bdb056fb 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -96,7 +96,7 @@ void ufs_free_inode (struct inode * inode)
 	is_directory = S_ISDIR(inode->i_mode);
 
 	dquot_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	clear_inode (inode);
 
@@ -358,7 +358,7 @@ cg_found:
 	vfs_dq_init(inode);
 	err = dquot_alloc_inode(inode);
 	if (err) {
-		vfs_dq_drop(inode);
+		dquot_drop(inode);
 		goto fail_without_unlock;
 	}
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 95d61cb3a5b8..66b63a751615 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1434,7 +1434,7 @@ static void destroy_inodecache(void)
 
 static void ufs_clear_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 }
 
 #ifdef CONFIG_QUOTA
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 422e6aa78edc..aec2e9dac2d7 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -296,7 +296,6 @@ struct quota_format_ops {
 /* Operations working with dquots */
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
-	int (*drop) (struct inode *);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index fa27b7218c82..a5ebd1abccd8 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -24,7 +24,7 @@ void inode_claim_rsv_space(struct inode *inode, qsize_t number);
 void inode_sub_rsv_space(struct inode *inode, qsize_t number);
 
 int dquot_initialize(struct inode *inode, int type);
-int dquot_drop(struct inode *inode);
+void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
 void dqput(struct dquot *dquot);
 int dquot_scan_active(struct super_block *sb,
@@ -64,7 +64,6 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 
-void vfs_dq_drop(struct inode *inode);
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
@@ -210,7 +209,7 @@ static inline void vfs_dq_init(struct inode *inode)
 {
 }
 
-static inline void vfs_dq_drop(struct inode *inode)
+static inline void dquot_drop(struct inode *inode)
 {
 }
 
-- 
cgit v1.2.3


From 907f4554e2521cb28b0009d17167760650a9561c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:06 -0500
Subject: dquot: move dquot initialization responsibility into the filesystem

Currently various places in the VFS call vfs_dq_init directly.  This means
we tie the quota code into the VFS.  Get rid of that and make the
filesystem responsible for the initialization.   For most metadata operations
this is a straight forward move into the methods, but for truncate and
open it's a bit more complicated.

For truncate we currently only call vfs_dq_init for the sys_truncate case
because open already takes care of it for ftruncate and open(O_TRUNC) - the
new code causes an additional vfs_dq_init for those which is harmless.

For open the initialization is moved from do_filp_open into the open method,
which means it happens slightly earlier now, and only for regular files.
The latter is fine because we don't need to initialize it for operations
on special files, and we already do it as part of the namespace operations
for directories.

Add a dquot_file_open helper that filesystems that support generic quotas
can use to fill in ->open.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/file.c           |  4 ++--
 fs/ext2/inode.c          |  5 +++++
 fs/ext2/namei.c          | 51 ++++++++++++++++++++++++++++++++----------------
 fs/ext3/file.c           |  2 +-
 fs/ext3/inode.c          |  5 +++++
 fs/ext3/namei.c          | 18 +++++++++++++++++
 fs/ext4/file.c           |  2 +-
 fs/ext4/inode.c          |  5 +++++
 fs/ext4/namei.c          | 17 ++++++++++++++++
 fs/inode.c               |  3 ---
 fs/jfs/file.c            |  4 +++-
 fs/jfs/inode.c           |  3 +++
 fs/jfs/namei.c           | 15 ++++++++++++++
 fs/namei.c               | 16 ---------------
 fs/nfsd/vfs.c            |  4 ----
 fs/ocfs2/file.c          |  5 +++++
 fs/ocfs2/inode.c         |  2 ++
 fs/ocfs2/namei.c         | 11 +++++++++++
 fs/open.c                |  5 +----
 fs/quota/dquot.c         | 14 +++++++++++++
 fs/reiserfs/file.c       |  2 +-
 fs/reiserfs/inode.c      |  5 +++++
 fs/reiserfs/namei.c      | 17 ++++++++++++++++
 fs/reiserfs/xattr.c      |  4 ----
 fs/udf/file.c            |  5 ++++-
 fs/udf/inode.c           |  4 ++++
 fs/udf/namei.c           | 17 ++++++++++++++++
 fs/ufs/file.c            |  2 +-
 fs/ufs/inode.c           |  4 ++++
 fs/ufs/namei.c           | 18 +++++++++++++++++
 fs/ufs/truncate.c        |  3 +++
 include/linux/quotaops.h |  4 ++++
 32 files changed, 220 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 586e3589d4c2..d11f6e484519 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -70,7 +70,7 @@ const struct file_operations ext2_file_operations = {
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
 	.mmap		= generic_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 	.splice_read	= generic_file_splice_read,
@@ -87,7 +87,7 @@ const struct file_operations ext2_xip_file_operations = {
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
 	.mmap		= xip_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 };
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 3cfcfd9a131a..c87840c33e17 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -58,6 +58,8 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
  */
 void ext2_delete_inode (struct inode * inode)
 {
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -1457,6 +1459,9 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
+
+	if (iattr->ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		error = dquot_transfer(inode, iattr);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index dd7175ce5606..5923df7b22af 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -99,24 +100,27 @@ struct dentry *ext2_get_parent(struct dentry *child)
  */
 static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
 {
-	struct inode * inode = ext2_new_inode (dir, mode);
-	int err = PTR_ERR(inode);
-	if (!IS_ERR(inode)) {
-		inode->i_op = &ext2_file_inode_operations;
-		if (ext2_use_xip(inode->i_sb)) {
-			inode->i_mapping->a_ops = &ext2_aops_xip;
-			inode->i_fop = &ext2_xip_file_operations;
-		} else if (test_opt(inode->i_sb, NOBH)) {
-			inode->i_mapping->a_ops = &ext2_nobh_aops;
-			inode->i_fop = &ext2_file_operations;
-		} else {
-			inode->i_mapping->a_ops = &ext2_aops;
-			inode->i_fop = &ext2_file_operations;
-		}
-		mark_inode_dirty(inode);
-		err = ext2_add_nondir(dentry, inode);
+	struct inode *inode;
+
+	vfs_dq_init(dir);
+
+	inode = ext2_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &ext2_file_inode_operations;
+	if (ext2_use_xip(inode->i_sb)) {
+		inode->i_mapping->a_ops = &ext2_aops_xip;
+		inode->i_fop = &ext2_xip_file_operations;
+	} else if (test_opt(inode->i_sb, NOBH)) {
+		inode->i_mapping->a_ops = &ext2_nobh_aops;
+		inode->i_fop = &ext2_file_operations;
+	} else {
+		inode->i_mapping->a_ops = &ext2_aops;
+		inode->i_fop = &ext2_file_operations;
 	}
-	return err;
+	mark_inode_dirty(inode);
+	return ext2_add_nondir(dentry, inode);
 }
 
 static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
@@ -127,6 +131,8 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 	inode = ext2_new_inode (dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -151,6 +157,8 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
 	if (l > sb->s_blocksize)
 		goto out;
 
+	vfs_dq_init(dir);
+
 	inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
@@ -194,6 +202,8 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	if (inode->i_nlink >= EXT2_LINK_MAX)
 		return -EMLINK;
 
+	vfs_dq_init(dir);
+
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
@@ -216,6 +226,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT2_LINK_MAX)
 		goto out;
 
+	vfs_dq_init(dir);
+
 	inode_inc_link_count(dir);
 
 	inode = ext2_new_inode (dir, S_IFDIR | mode);
@@ -262,6 +274,8 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
 	struct page * page;
 	int err = -ENOENT;
 
+	vfs_dq_init(dir);
+
 	de = ext2_find_entry (dir, &dentry->d_name, &page);
 	if (!de)
 		goto out;
@@ -304,6 +318,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
 		goto out;
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index a86d3302cdc2..3c7fb11a3b29 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -62,7 +62,7 @@ const struct file_operations ext3_file_operations = {
 	.compat_ioctl	= ext3_compat_ioctl,
 #endif
 	.mmap		= generic_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 14d40a4dd6f0..d7962b0c57b3 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -196,6 +196,9 @@ void ext3_delete_inode (struct inode * inode)
 {
 	handle_t *handle;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -3148,6 +3151,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		return error;
 
+	if (ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 7b0e44f7d66f..a492b371b134 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1696,6 +1696,8 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	struct inode * inode;
 	int err, retries = 0;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1730,6 +1732,8 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1766,6 +1770,8 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -2060,7 +2066,9 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
+	vfs_dq_init(dir);
 	vfs_dq_init(dentry->d_inode);
+
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2119,7 +2127,9 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
+	vfs_dq_init(dir);
 	vfs_dq_init(dentry->d_inode);
+
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2174,6 +2184,8 @@ static int ext3_symlink (struct inode * dir,
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
@@ -2228,6 +2240,9 @@ static int ext3_link (struct dentry * old_dentry,
 
 	if (inode->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
+
+	vfs_dq_init(dir);
+
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
 	 * otherwise has the potential to corrupt the orphan inode list.
@@ -2278,6 +2293,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	struct ext3_dir_entry_2 * old_de, * new_de;
 	int retval, flush_file = 0;
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 9630583cef28..85fa464a24ad 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -127,7 +127,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 			sb->s_dirt = 1;
 		}
 	}
-	return generic_file_open(inode, filp);
+	return dquot_file_open(inode, filp);
 }
 
 const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6a002a6d0624..eaa22ae9f1f6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -170,6 +170,9 @@ void ext4_delete_inode(struct inode *inode)
 	handle_t *handle;
 	int err;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	if (ext4_should_order_data(inode))
 		ext4_begin_ordered_truncate(inode, 0);
 	truncate_inode_pages(&inode->i_data, 0);
@@ -5251,6 +5254,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		return error;
 
+	if (ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 17a17e10dd60..20f55c2e7571 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1766,6 +1766,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
 	struct inode *inode;
 	int err, retries = 0;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1800,6 +1802,8 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1837,6 +1841,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (EXT4_DIR_LINK_MAX(dir))
 		return -EMLINK;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -2136,7 +2142,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
+	vfs_dq_init(dir);
 	vfs_dq_init(dentry->d_inode);
+
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2195,7 +2203,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
+	vfs_dq_init(dir);
 	vfs_dq_init(dentry->d_inode);
+
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2250,6 +2260,8 @@ static int ext4_symlink(struct inode *dir,
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
@@ -2308,6 +2320,8 @@ static int ext4_link(struct dentry *old_dentry,
 	if (inode->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
+	vfs_dq_init(dir);
+
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
 	 * otherwise has the potential to corrupt the orphan inode list.
@@ -2358,6 +2372,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ext4_dir_entry_2 *old_de, *new_de;
 	int retval, force_da_alloc = 0;
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
diff --git a/fs/inode.c b/fs/inode.c
index f1aef3482b0e..407bf392e20a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -8,7 +8,6 @@
 #include <linux/mm.h>
 #include <linux/dcache.h>
 #include <linux/init.h>
-#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/module.h>
@@ -1210,8 +1209,6 @@ void generic_delete_inode(struct inode *inode)
 
 	if (op->delete_inode) {
 		void (*delete)(struct inode *) = op->delete_inode;
-		if (!is_bad_inode(inode))
-			vfs_dq_init(inode);
 		/* Filesystems implementing their own
 		 * s_op->delete_inode are required to call
 		 * truncate_inode_pages and clear_inode()
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 2c201783836f..f19bb33eb1eb 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -48,7 +48,7 @@ static int jfs_open(struct inode *inode, struct file *file)
 {
 	int rc;
 
-	if ((rc = generic_file_open(inode, file)))
+	if ((rc = dquot_file_open(inode, file)))
 		return rc;
 
 	/*
@@ -98,6 +98,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (rc)
 		return rc;
 
+	if (iattr->ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		rc = dquot_transfer(inode, iattr);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 22fa412c5289..1aa2dda16590 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -146,6 +146,9 @@ void jfs_delete_inode(struct inode *inode)
 {
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	if (!is_bad_inode(inode) &&
 	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
 		truncate_inode_pages(&inode->i_data, 0);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 1d1390afe55e..b7cc29da50b4 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -85,6 +85,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
 	jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name);
 
+	vfs_dq_init(dip);
+
 	/*
 	 * search parent directory for entry/freespace
 	 * (dtSearch() returns parent directory page pinned)
@@ -215,6 +217,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
 	jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
+	vfs_dq_init(dip);
+
 	/* link count overflow on parent directory ? */
 	if (dip->i_nlink == JFS_LINK_MAX) {
 		rc = -EMLINK;
@@ -356,6 +360,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 	jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
+	vfs_dq_init(dip);
 	vfs_dq_init(ip);
 
 	/* directory must be empty to be removed */
@@ -483,6 +488,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 	jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
+	vfs_dq_init(dip);
 	vfs_dq_init(ip);
 
 	if ((rc = get_UCSname(&dname, dentry)))
@@ -805,6 +811,8 @@ static int jfs_link(struct dentry *old_dentry,
 	if (ip->i_nlink == 0)
 		return -ENOENT;
 
+	vfs_dq_init(dir);
+
 	tid = txBegin(ip->i_sb, 0);
 
 	mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
@@ -896,6 +904,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
 	jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name);
 
+	vfs_dq_init(dip);
+
 	ssize = strlen(name) + 1;
 
 	/*
@@ -1087,6 +1097,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
 		 new_dentry->d_name.name);
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_ip = old_dentry->d_inode;
 	new_ip = new_dentry->d_inode;
 
@@ -1360,6 +1373,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 
 	jfs_info("jfs_mknod: %s", dentry->d_name.name);
 
+	vfs_dq_init(dir);
+
 	if ((rc = get_UCSname(&dname, dentry)))
 		goto out;
 
diff --git a/fs/namei.c b/fs/namei.c
index a4855af776a8..06abd2bf473c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,7 +19,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
-#include <linux/quotaops.h>
 #include <linux/pagemap.h>
 #include <linux/fsnotify.h>
 #include <linux/personality.h>
@@ -1461,7 +1460,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	error = security_inode_create(dir, dentry, mode);
 	if (error)
 		return error;
-	vfs_dq_init(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -1813,9 +1811,6 @@ ok:
 		}
 	}
 	if (!IS_ERR(filp)) {
-		if (acc_mode & MAY_WRITE)
-			vfs_dq_init(nd.path.dentry->d_inode);
-
 		if (will_truncate) {
 			error = handle_truncate(&nd.path);
 			if (error) {
@@ -1996,7 +1991,6 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -2095,7 +2089,6 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error)
 		fsnotify_mkdir(dir, dentry);
@@ -2181,8 +2174,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (!dir->i_op->rmdir)
 		return -EPERM;
 
-	vfs_dq_init(dir);
-
 	mutex_lock(&dentry->d_inode->i_mutex);
 	dentry_unhash(dentry);
 	if (d_mountpoint(dentry))
@@ -2268,8 +2259,6 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 	if (!dir->i_op->unlink)
 		return -EPERM;
 
-	vfs_dq_init(dir);
-
 	mutex_lock(&dentry->d_inode->i_mutex);
 	if (d_mountpoint(dentry))
 		error = -EBUSY;
@@ -2379,7 +2368,6 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->symlink(dir, dentry, oldname);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -2463,7 +2451,6 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 		return error;
 
 	mutex_lock(&inode->i_mutex);
-	vfs_dq_init(dir);
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	mutex_unlock(&inode->i_mutex);
 	if (!error)
@@ -2662,9 +2649,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (!old_dir->i_op->rename)
 		return -EPERM;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
-
 	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
 
 	if (is_dir)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8715d194561a..09e9fc043600 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -20,7 +20,6 @@
 #include <linux/fcntl.h>
 #include <linux/namei.h>
 #include <linux/delay.h>
-#include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
@@ -377,7 +376,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 			put_write_access(inode);
 			goto out_nfserr;
 		}
-		vfs_dq_init(inode);
 	}
 
 	/* sanitize the mode change */
@@ -745,8 +743,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			flags = O_RDWR|O_LARGEFILE;
 		else
 			flags = O_WRONLY|O_LARGEFILE;
-
-		vfs_dq_init(inode);
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
 			    flags, current_cred());
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 472e8f8bc892..126198f5a67c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -107,6 +107,9 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
 		   file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
 
+	if (file->f_mode & FMODE_WRITE)
+		vfs_dq_init(inode);
+
 	spin_lock(&oi->ip_lock);
 
 	/* Check that the inode hasn't been wiped from disk by another
@@ -977,6 +980,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 
 	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
 	if (size_change) {
+		vfs_dq_init(inode);
+
 		status = ocfs2_rw_lock(inode, 1);
 		if (status < 0) {
 			mlog_errno(status);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 00eb6a095e68..77681a690d16 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -971,6 +971,8 @@ void ocfs2_delete_inode(struct inode *inode)
 		goto bail;
 	}
 
+	vfs_dq_init(inode);
+
 	if (!ocfs2_inode_is_valid_to_delete(inode)) {
 		/* It's probably not necessary to truncate_inode_pages
 		 * here but we do it for safety anyway (it will most
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 99766b6418eb..8b5b142eb638 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -244,6 +244,8 @@ static int ocfs2_mknod(struct inode *dir,
 		   (unsigned long)dev, dentry->d_name.len,
 		   dentry->d_name.name);
 
+	vfs_dq_init(dir);
+
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
@@ -632,6 +634,8 @@ static int ocfs2_link(struct dentry *old_dentry,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
+	vfs_dq_init(dir);
+
 	err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
 	if (err < 0) {
 		if (err != -ENOENT)
@@ -787,6 +791,8 @@ static int ocfs2_unlink(struct inode *dir,
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
 		   dentry->d_name.len, dentry->d_name.name);
 
+	vfs_dq_init(dir);
+
 	BUG_ON(dentry->d_parent->d_inode != dir);
 
 	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -1047,6 +1053,9 @@ static int ocfs2_rename(struct inode *old_dir,
 		   old_dentry->d_name.len, old_dentry->d_name.name,
 		   new_dentry->d_name.len, new_dentry->d_name.name);
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	osb = OCFS2_SB(old_dir->i_sb);
 
 	if (new_inode) {
@@ -1595,6 +1604,8 @@ static int ocfs2_symlink(struct inode *dir,
 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
 
+	vfs_dq_init(dir);
+
 	sb = dir->i_sb;
 	osb = OCFS2_SB(sb);
 
diff --git a/fs/open.c b/fs/open.c
index 040cef72bc00..b740c4244833 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -8,7 +8,6 @@
 #include <linux/mm.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
-#include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -278,10 +277,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error)
 		error = security_path_truncate(&path, length, 0);
-	if (!error) {
-		vfs_dq_init(inode);
+	if (!error)
 		error = do_truncate(path.dentry, length, 0, NULL);
-	}
 
 put_write_and_out:
 	put_write_access(inode);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd83c5b871ba..6244bca45c9d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1820,6 +1820,20 @@ const struct dquot_operations dquot_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
+/*
+ * Generic helper for ->open on filesystems supporting disk quotas.
+ */
+int dquot_file_open(struct inode *inode, struct file *file)
+{
+	int error;
+
+	error = generic_file_open(inode, file);
+	if (!error && (file->f_mode & FMODE_WRITE))
+		vfs_dq_init(inode);
+	return error;
+}
+EXPORT_SYMBOL(dquot_file_open);
+
 /*
  * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
  */
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index da2dba082e2d..1d9c12714c5c 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -289,7 +289,7 @@ const struct file_operations reiserfs_file_operations = {
 	.compat_ioctl = reiserfs_compat_ioctl,
 #endif
 	.mmap = reiserfs_file_mmap,
-	.open = generic_file_open,
+	.open = dquot_file_open,
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
 	.aio_read = generic_file_aio_read,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f07c3b69247d..06995cb48e39 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -34,6 +34,9 @@ void reiserfs_delete_inode(struct inode *inode)
 	int depth;
 	int err;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
@@ -3073,6 +3076,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
 	if (attr->ia_valid & ATTR_SIZE) {
+		vfs_dq_init(inode);
+
 		/* version 2 items will be caught by the s_maxbytes check
 		 ** done for us in vmtruncate
 		 */
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 9dea84e8a79a..c55e1b9fee5f 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -594,6 +594,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
 
+	vfs_dq_init(dir);
+
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -666,6 +668,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -739,6 +743,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
 
+	vfs_dq_init(dir);
+
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
 	/* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
 	REISERFS_I(dir)->new_packing_locality = 1;
@@ -842,6 +848,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
 	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
+	vfs_dq_init(dir);
+
 	reiserfs_write_lock(dir->i_sb);
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
 	if (retval)
@@ -923,6 +931,8 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
 	unsigned long savelink;
 	int depth;
 
+	vfs_dq_init(dir);
+
 	inode = dentry->d_inode;
 
 	/* in this transaction we can be doing at max two balancings and update
@@ -1024,6 +1034,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
 
+	vfs_dq_init(parent_dir);
+
 	if (!(inode = new_inode(parent_dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -1111,6 +1123,8 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
+	vfs_dq_init(dir);
+
 	reiserfs_write_lock(dir->i_sb);
 	if (inode->i_nlink >= REISERFS_LINK_MAX) {
 		//FIXME: sd_nlink is 32 bit for new files
@@ -1235,6 +1249,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	    JOURNAL_PER_BALANCE_CNT * 3 + 5 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 81f09fab8ae4..37d034ca7d99 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -61,7 +61,6 @@
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 	return dir->i_op->create(dir, dentry, mode, NULL);
 }
 #endif
@@ -69,7 +68,6 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 	return dir->i_op->mkdir(dir, dentry, mode);
 }
 
@@ -81,7 +79,6 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 
 	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
 					I_MUTEX_CHILD, dir->i_sb);
@@ -97,7 +94,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 
 	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
 					I_MUTEX_CHILD, dir->i_sb);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2df7fcb677b3..013fa44d9a5e 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -208,7 +208,7 @@ const struct file_operations udf_file_operations = {
 	.read			= do_sync_read,
 	.aio_read		= generic_file_aio_read,
 	.ioctl			= udf_ioctl,
-	.open			= generic_file_open,
+	.open			= dquot_file_open,
 	.mmap			= generic_file_mmap,
 	.write			= do_sync_write,
 	.aio_write		= udf_file_aio_write,
@@ -227,6 +227,9 @@ static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (error)
 		return error;
 
+	if (iattr->ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
+
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
             (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		error = dquot_transfer(inode, iattr);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1199e8e21ee2..f19520268404 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -36,6 +36,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
 
@@ -70,6 +71,9 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 void udf_delete_inode(struct inode *inode)
 {
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index cd2115060fdc..e360c3fc4ae4 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -563,6 +563,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
 	int err;
 	struct udf_inode_info *iinfo;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	inode = udf_new_inode(dir, mode, &err);
 	if (!inode) {
@@ -616,6 +618,8 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	err = -EIO;
 	inode = udf_new_inode(dir, mode, &err);
@@ -662,6 +666,8 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct udf_inode_info *dinfo = UDF_I(dir);
 	struct udf_inode_info *iinfo;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	err = -EMLINK;
 	if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
@@ -799,6 +805,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
 	struct fileIdentDesc *fi, cfi;
 	struct kernel_lb_addr tloc;
 
+	vfs_dq_init(dir);
+
 	retval = -ENOENT;
 	lock_kernel();
 	fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -845,6 +853,8 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
 	struct fileIdentDesc cfi;
 	struct kernel_lb_addr tloc;
 
+	vfs_dq_init(dir);
+
 	retval = -ENOENT;
 	lock_kernel();
 	fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -899,6 +909,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	struct buffer_head *bh;
 	struct udf_inode_info *iinfo;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	inode = udf_new_inode(dir, S_IFLNK, &err);
 	if (!inode)
@@ -1069,6 +1081,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	int err;
 	struct buffer_head *bh;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
 		unlock_kernel();
@@ -1131,6 +1145,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct kernel_lb_addr tloc;
 	struct udf_inode_info *old_iinfo = UDF_I(old_inode);
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	lock_kernel();
 	ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
 	if (ofi) {
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 73655c61240a..d84762f3028e 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -40,7 +40,7 @@ const struct file_operations ufs_file_operations = {
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.open           = generic_file_open,
+	.open           = dquot_file_open,
 	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7cf33379fd46..fff8edab382f 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -36,6 +36,7 @@
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -908,6 +909,9 @@ void ufs_delete_inode (struct inode * inode)
 {
 	loff_t old_i_size;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 	if (is_bad_inode(inode))
 		goto no_delete;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 4c26d9e8bc94..c33cb90c516d 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -30,6 +30,7 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -84,6 +85,9 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
 	int err;
 
 	UFSD("BEGIN\n");
+
+	vfs_dq_init(dir);
+
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
 
@@ -107,6 +111,9 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
 
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
+
+	vfs_dq_init(dir);
+
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -131,6 +138,8 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
 	if (l > sb->s_blocksize)
 		goto out_notlocked;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
 	err = PTR_ERR(inode);
@@ -176,6 +185,8 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 	}
 
+	vfs_dq_init(dir);
+
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
@@ -193,6 +204,8 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= UFS_LINK_MAX)
 		goto out;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	inode_inc_link_count(dir);
 
@@ -237,6 +250,8 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
 	struct page *page;
 	int err = -ENOENT;
 
+	vfs_dq_init(dir);
+
 	de = ufs_find_entry(dir, &dentry->d_name, &page);
 	if (!de)
 		goto out;
@@ -281,6 +296,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ufs_dir_entry *old_de;
 	int err = -ENOENT;
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
 		goto out;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 87bbab685901..e5ef8a3ec230 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -527,6 +527,9 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (ia_valid & ATTR_SIZE &&
 	    attr->ia_size != i_size_read(inode)) {
 		loff_t old_i_size = inode->i_size;
+
+		vfs_dq_init(inode);
+
 		error = vmtruncate(inode, attr->ia_size);
 		if (error)
 			return error;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a5ebd1abccd8..93ac788345e2 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -48,6 +48,8 @@ int dquot_release(struct dquot *dquot);
 int dquot_commit_info(struct super_block *sb, int type);
 int dquot_mark_dquot_dirty(struct dquot *dquot);
 
+int dquot_file_open(struct inode *inode, struct file *file);
+
 int vfs_quota_on(struct super_block *sb, int type, int format_id,
  	char *path, int remount);
 int vfs_quota_enable(struct inode *inode, int type, int format_id,
@@ -342,4 +344,6 @@ static inline void dquot_release_reservation_block(struct inode *inode,
 	__dquot_free_space(inode, nr << inode->i_blkbits, 1);
 }
 
+#define dquot_file_open		generic_file_open
+
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From 871a293155a24554e153538d36e3a80fa169aefb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:07 -0500
Subject: dquot: cleanup dquot initialize routine

Get rid of the initialize dquot operation - it is now always called from
the filesystem and if a filesystem really needs it's own (which none
currently does) it can just call into it's own routine directly.

Rename the now static low-level dquot_initialize helper to __dquot_initialize
and vfs_dq_init to dquot_initialize to have a consistent namespace.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |  2 --
 fs/ext2/file.c                    |  1 +
 fs/ext2/ialloc.c                  |  2 +-
 fs/ext2/inode.c                   |  4 ++--
 fs/ext2/namei.c                   | 16 ++++++++--------
 fs/ext3/file.c                    |  1 +
 fs/ext3/ialloc.c                  |  4 ++--
 fs/ext3/inode.c                   |  6 +++---
 fs/ext3/namei.c                   | 24 ++++++++++++------------
 fs/ext3/super.c                   |  5 ++---
 fs/ext4/file.c                    |  1 +
 fs/ext4/ialloc.c                  |  4 ++--
 fs/ext4/inode.c                   |  4 ++--
 fs/ext4/namei.c                   | 24 ++++++++++++------------
 fs/ext4/super.c                   |  5 ++---
 fs/jfs/file.c                     |  2 +-
 fs/jfs/inode.c                    |  4 ++--
 fs/jfs/jfs_inode.c                |  2 +-
 fs/jfs/namei.c                    | 24 ++++++++++++------------
 fs/ocfs2/file.c                   |  4 ++--
 fs/ocfs2/inode.c                  |  2 +-
 fs/ocfs2/namei.c                  | 14 +++++++-------
 fs/ocfs2/quota_global.c           |  1 -
 fs/ocfs2/refcounttree.c           |  2 +-
 fs/quota/dquot.c                  | 32 ++++++++++++++++++++------------
 fs/reiserfs/inode.c               |  6 +++---
 fs/reiserfs/namei.c               | 22 +++++++++++-----------
 fs/reiserfs/super.c               |  3 +--
 fs/udf/file.c                     |  2 +-
 fs/udf/ialloc.c                   |  2 +-
 fs/udf/inode.c                    |  2 +-
 fs/udf/namei.c                    | 18 +++++++++---------
 fs/ufs/file.c                     |  1 +
 fs/ufs/ialloc.c                   |  2 +-
 fs/ufs/inode.c                    |  2 +-
 fs/ufs/namei.c                    | 16 ++++++++--------
 fs/ufs/truncate.c                 |  2 +-
 include/linux/quota.h             |  1 -
 include/linux/quotaops.h          | 17 ++++-------------
 39 files changed, 141 insertions(+), 145 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index fa10e4bf8e5e..06bbbed71206 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -460,7 +460,6 @@ in sys_read() and friends.
 
 --------------------------- dquot_operations -------------------------------
 prototypes:
-	int (*initialize) (struct inode *, int);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
 	int (*release_dquot) (struct dquot *);
@@ -473,7 +472,6 @@ a proper locking wrt the filesystem and call the generic quota operations.
 What filesystem should expect from the generic quota functions:
 
 		FS recursion	Held locks when called
-initialize:	yes		maybe dqonoff_sem
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 release_dquot:	yes		dqonoff_sem or dqptr_sem
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index d11f6e484519..5d198d0697fb 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -20,6 +20,7 @@
 
 #include <linux/time.h>
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 88b71972c626..ad7d572ee8dc 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -586,7 +586,7 @@ got:
 		goto fail_drop;
 	}
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err)
 		goto fail_drop;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c87840c33e17..45ff49f0a4b5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -59,7 +59,7 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
 void ext2_delete_inode (struct inode * inode)
 {
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -1461,7 +1461,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 		return error;
 
 	if (iattr->ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		error = dquot_transfer(inode, iattr);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 5923df7b22af..71efb0e9a3f2 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -102,7 +102,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
 {
 	struct inode *inode;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ext2_new_inode(dir, mode);
 	if (IS_ERR(inode))
@@ -131,7 +131,7 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ext2_new_inode (dir, mode);
 	err = PTR_ERR(inode);
@@ -157,7 +157,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
 	if (l > sb->s_blocksize)
 		goto out;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
 	err = PTR_ERR(inode);
@@ -202,7 +202,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	if (inode->i_nlink >= EXT2_LINK_MAX)
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
@@ -226,7 +226,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT2_LINK_MAX)
 		goto out;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode_inc_link_count(dir);
 
@@ -274,7 +274,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
 	struct page * page;
 	int err = -ENOENT;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	de = ext2_find_entry (dir, &dentry->d_name, &page);
 	if (!de)
@@ -318,8 +318,8 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 3c7fb11a3b29..f55df0e61cbd 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -21,6 +21,7 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
+#include <linux/quotaops.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include "xattr.h"
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 7d7238f9f6f3..ef9008b885b5 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -123,7 +123,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	ext3_xattr_delete_inode(handle, inode);
 	dquot_free_inode(inode);
 	dquot_drop(inode);
@@ -588,7 +588,7 @@ got:
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err)
 		goto fail_drop;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index d7962b0c57b3..ffbbc65e3f68 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -197,7 +197,7 @@ void ext3_delete_inode (struct inode * inode)
 	handle_t *handle;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 
@@ -3152,7 +3152,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 		return error;
 
 	if (ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
@@ -3250,7 +3250,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 		ret = 2 * (bpp + indirects) + 2;
 
 #ifdef CONFIG_QUOTA
-	/* We know that structure was already allocated during vfs_dq_init so
+	/* We know that structure was already allocated during dquot_initialize so
 	 * we will be updating only the data blocks + inodes */
 	ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 #endif
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index a492b371b134..ee184084ca42 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1696,7 +1696,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	struct inode * inode;
 	int err, retries = 0;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -1732,7 +1732,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -1770,7 +1770,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2066,8 +2066,8 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
-	vfs_dq_init(dir);
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
 
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
@@ -2127,8 +2127,8 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
-	vfs_dq_init(dir);
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
 
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
@@ -2184,7 +2184,7 @@ static int ext3_symlink (struct inode * dir,
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2241,7 +2241,7 @@ static int ext3_link (struct dentry * old_dentry,
 	if (inode->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
@@ -2293,15 +2293,15 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	struct ext3_dir_entry_2 * old_de, * new_de;
 	int retval, flush_file = 0;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	if (new_dentry->d_inode)
-		vfs_dq_init(new_dentry->d_inode);
+		dquot_initialize(new_dentry->d_inode);
 	handle = ext3_journal_start(old_dir, 2 *
 					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0163d0dae124..e844accbf55d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -752,7 +752,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
 static const struct dquot_operations ext3_quota_operations = {
-	.initialize	= dquot_initialize,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
 	.release_dquot	= ext3_release_dquot,
@@ -1480,7 +1479,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		}
 
 		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		if (inode->i_nlink) {
 			printk(KERN_DEBUG
 				"%s: truncating inode %lu to %Ld bytes\n",
@@ -2736,7 +2735,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
  * Process 1                         Process 2
  * ext3_create()                     quota_sync()
  *   journal_start()                   write_dquot()
- *   vfs_dq_init()                       down(dqio_mutex)
+ *   dquot_initialize()                       down(dqio_mutex)
  *     down(dqio_mutex)                    journal_start()
  *
  */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 85fa464a24ad..a08a12998c49 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,6 +23,7 @@
 #include <linux/jbd2.h>
 #include <linux/mount.h>
 #include <linux/path.h>
+#include <linux/quotaops.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ca8986e4b528..9bb2bb9f67ad 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -217,7 +217,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	ext4_xattr_delete_inode(handle, inode);
 	dquot_free_inode(inode);
 	dquot_drop(inode);
@@ -1034,7 +1034,7 @@ got:
 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
 
 	ret = inode;
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err)
 		goto fail_drop;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index eaa22ae9f1f6..bec222ca9ba4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -171,7 +171,7 @@ void ext4_delete_inode(struct inode *inode)
 	int err;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	if (ext4_should_order_data(inode))
 		ext4_begin_ordered_truncate(inode, 0);
@@ -5255,7 +5255,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 		return error;
 
 	if (ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 20f55c2e7571..7f3d2d75a0dc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1766,7 +1766,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
 	struct inode *inode;
 	int err, retries = 0;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -1802,7 +1802,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -1841,7 +1841,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (EXT4_DIR_LINK_MAX(dir))
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2142,8 +2142,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
-	vfs_dq_init(dir);
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
 
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
@@ -2203,8 +2203,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
-	vfs_dq_init(dir);
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
 
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
@@ -2260,7 +2260,7 @@ static int ext4_symlink(struct inode *dir,
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2320,7 +2320,7 @@ static int ext4_link(struct dentry *old_dentry,
 	if (inode->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
@@ -2372,15 +2372,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ext4_dir_entry_2 *old_de, *new_de;
 	int retval, force_da_alloc = 0;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	if (new_dentry->d_inode)
-		vfs_dq_init(new_dentry->d_inode);
+		dquot_initialize(new_dentry->d_inode);
 	handle = ext4_journal_start(old_dir, 2 *
 					EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 035516c80df2..edcf3b0239d1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1013,7 +1013,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
 static const struct dquot_operations ext4_quota_operations = {
-	.initialize	= dquot_initialize,
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
@@ -1931,7 +1930,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 		}
 
 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		if (inode->i_nlink) {
 			ext4_msg(sb, KERN_DEBUG,
 				"%s: truncating inode %lu to %lld bytes",
@@ -3700,7 +3699,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
  * Process 1                         Process 2
  * ext4_create()                     quota_sync()
  *   jbd2_journal_start()                  write_dquot()
- *   vfs_dq_init()                         down(dqio_mutex)
+ *   dquot_initialize()                         down(dqio_mutex)
  *     down(dqio_mutex)                    jbd2_journal_start()
  *
  */
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f19bb33eb1eb..14ba982b3f24 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -99,7 +99,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		return rc;
 
 	if (iattr->ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		rc = dquot_transfer(inode, iattr);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 1aa2dda16590..c694a5f15380 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -147,7 +147,7 @@ void jfs_delete_inode(struct inode *inode)
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	if (!is_bad_inode(inode) &&
 	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
@@ -161,7 +161,7 @@ void jfs_delete_inode(struct inode *inode)
 		/*
 		 * Free the inode from the quota allocation.
 		 */
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		dquot_free_inode(inode);
 		dquot_drop(inode);
 	}
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 72b30895422c..829921b67765 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -116,7 +116,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	/*
 	 * Allocate inode to quota.
 	 */
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	rc = dquot_alloc_inode(inode);
 	if (rc)
 		goto fail_drop;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index b7cc29da50b4..4a3e9f39c21d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -85,7 +85,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
 	jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name);
 
-	vfs_dq_init(dip);
+	dquot_initialize(dip);
 
 	/*
 	 * search parent directory for entry/freespace
@@ -217,7 +217,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
 	jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
-	vfs_dq_init(dip);
+	dquot_initialize(dip);
 
 	/* link count overflow on parent directory ? */
 	if (dip->i_nlink == JFS_LINK_MAX) {
@@ -360,8 +360,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 	jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
-	vfs_dq_init(dip);
-	vfs_dq_init(ip);
+	dquot_initialize(dip);
+	dquot_initialize(ip);
 
 	/* directory must be empty to be removed */
 	if (!dtEmpty(ip)) {
@@ -488,8 +488,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 	jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
-	vfs_dq_init(dip);
-	vfs_dq_init(ip);
+	dquot_initialize(dip);
+	dquot_initialize(ip);
 
 	if ((rc = get_UCSname(&dname, dentry)))
 		goto out;
@@ -811,7 +811,7 @@ static int jfs_link(struct dentry *old_dentry,
 	if (ip->i_nlink == 0)
 		return -ENOENT;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	tid = txBegin(ip->i_sb, 0);
 
@@ -904,7 +904,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
 	jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name);
 
-	vfs_dq_init(dip);
+	dquot_initialize(dip);
 
 	ssize = strlen(name) + 1;
 
@@ -1097,8 +1097,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
 		 new_dentry->d_name.name);
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_ip = old_dentry->d_inode;
 	new_ip = new_dentry->d_inode;
@@ -1149,7 +1149,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	} else if (new_ip) {
 		IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
 		/* Init inode for quota operations. */
-		vfs_dq_init(new_ip);
+		dquot_initialize(new_ip);
 	}
 
 	/*
@@ -1373,7 +1373,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 
 	jfs_info("jfs_mknod: %s", dentry->d_name.name);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	if ((rc = get_UCSname(&dname, dentry)))
 		goto out;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 126198f5a67c..364105291282 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -108,7 +108,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
 		   file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
 
 	if (file->f_mode & FMODE_WRITE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	spin_lock(&oi->ip_lock);
 
@@ -980,7 +980,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 
 	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
 	if (size_change) {
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		status = ocfs2_rw_lock(inode, 1);
 		if (status < 0) {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 77681a690d16..278a223aae14 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -971,7 +971,7 @@ void ocfs2_delete_inode(struct inode *inode)
 		goto bail;
 	}
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 
 	if (!ocfs2_inode_is_valid_to_delete(inode)) {
 		/* It's probably not necessary to truncate_inode_pages
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8b5b142eb638..d9cd4e373a53 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -212,7 +212,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
 	} else
 		inode->i_gid = current_fsgid();
 	inode->i_mode = mode;
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	return inode;
 }
 
@@ -244,7 +244,7 @@ static int ocfs2_mknod(struct inode *dir,
 		   (unsigned long)dev, dentry->d_name.len,
 		   dentry->d_name.name);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
@@ -634,7 +634,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
 	if (err < 0) {
@@ -791,7 +791,7 @@ static int ocfs2_unlink(struct inode *dir,
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
 		   dentry->d_name.len, dentry->d_name.name);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	BUG_ON(dentry->d_parent->d_inode != dir);
 
@@ -1053,8 +1053,8 @@ static int ocfs2_rename(struct inode *old_dir,
 		   old_dentry->d_name.len, old_dentry->d_name.name,
 		   new_dentry->d_name.len, new_dentry->d_name.name);
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	osb = OCFS2_SB(old_dir->i_sb);
 
@@ -1604,7 +1604,7 @@ static int ocfs2_symlink(struct inode *dir,
 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	sb = dir->i_sb;
 	osb = OCFS2_SB(sb);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4dca38f487cf..355f41d1d520 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -851,7 +851,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 }
 
 const struct dquot_operations ocfs2_quota_operations = {
-	.initialize	= dquot_initialize,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
 	.release_dquot	= ocfs2_release_dquot,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 8ae65c9c020c..f3ae10cde841 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4390,7 +4390,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	mutex_lock(&inode->i_mutex);
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 	error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
 	mutex_unlock(&inode->i_mutex);
 	if (!error)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 6244bca45c9d..3c0a7e0dff78 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -230,6 +230,7 @@ struct dqstats dqstats;
 EXPORT_SYMBOL(dqstats);
 
 static qsize_t inode_get_rsv_space(struct inode *inode);
+static void __dquot_initialize(struct inode *inode, int type);
 
 static inline unsigned int
 hashfn(const struct super_block *sb, unsigned int id, int type)
@@ -890,7 +891,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 		spin_unlock(&inode_lock);
 
 		iput(old_inode);
-		sb->dq_op->initialize(inode, type);
+		__dquot_initialize(inode, type);
 		/* We hold a reference to 'inode' so it couldn't have been
 		 * removed from s_inodes list while we dropped the inode_lock.
 		 * We cannot iput the inode now as we can be holding the last
@@ -1293,22 +1294,26 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
 }
 
 /*
- *	Initialize quota pointers in inode
- *	We do things in a bit complicated way but by that we avoid calling
- *	dqget() and thus filesystem callbacks under dqptr_sem.
+ * Initialize quota pointers in inode
+ *
+ * We do things in a bit complicated way but by that we avoid calling
+ * dqget() and thus filesystem callbacks under dqptr_sem.
+ *
+ * It is better to call this function outside of any transaction as it
+ * might need a lot of space in journal for dquot structure allocation.
  */
-int dquot_initialize(struct inode *inode, int type)
+static void __dquot_initialize(struct inode *inode, int type)
 {
 	unsigned int id = 0;
-	int cnt, ret = 0;
+	int cnt;
 	struct dquot *got[MAXQUOTAS];
 	struct super_block *sb = inode->i_sb;
 	qsize_t rsv;
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return 0;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return;
 
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1351,7 +1356,11 @@ out_err:
 	up_write(&sb_dqopt(sb)->dqptr_sem);
 	/* Drop unused references */
 	dqput_all(got);
-	return ret;
+}
+
+void dquot_initialize(struct inode *inode)
+{
+	__dquot_initialize(inode, -1);
 }
 EXPORT_SYMBOL(dquot_initialize);
 
@@ -1783,7 +1792,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		chid[GRPQUOTA] = iattr->ia_gid;
 	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		if (__dquot_transfer(inode, chid, mask) == NO_QUOTA)
 			return -EDQUOT;
 	}
@@ -1810,7 +1819,6 @@ EXPORT_SYMBOL(dquot_commit_info);
  * Definitions of diskquota operations.
  */
 const struct dquot_operations dquot_operations = {
-	.initialize	= dquot_initialize,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
@@ -1829,7 +1837,7 @@ int dquot_file_open(struct inode *inode, struct file *file)
 
 	error = generic_file_open(inode, file);
 	if (!error && (file->f_mode & FMODE_WRITE))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	return error;
 }
 EXPORT_SYMBOL(dquot_file_open);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 06995cb48e39..b8671a54e8ed 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -35,7 +35,7 @@ void reiserfs_delete_inode(struct inode *inode)
 	int err;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 
@@ -1768,7 +1768,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 
 	BUG_ON(!th->t_trans_id);
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err)
 		goto out_end_trans;
@@ -3076,7 +3076,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
 	if (attr->ia_valid & ATTR_SIZE) {
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		/* version 2 items will be caught by the s_maxbytes check
 		 ** done for us in vmtruncate
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index c55e1b9fee5f..96e4cbbfaa18 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -554,7 +554,7 @@ static int drop_new_inode(struct inode *inode)
 }
 
 /* utility function that does setup for reiserfs_new_inode.
-** vfs_dq_init needs lots of credits so it's better to have it
+** dquot_initialize needs lots of credits so it's better to have it
 ** outside of a transaction, so we had to pull some bits of
 ** reiserfs_new_inode out into this func.
 */
@@ -577,7 +577,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
 	} else {
 		inode->i_gid = current_fsgid();
 	}
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	return 0;
 }
 
@@ -594,7 +594,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
@@ -668,7 +668,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
@@ -743,7 +743,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
 	/* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
@@ -848,7 +848,7 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
 	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	reiserfs_write_lock(dir->i_sb);
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
@@ -931,7 +931,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
 	unsigned long savelink;
 	int depth;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = dentry->d_inode;
 
@@ -1034,7 +1034,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
 
-	vfs_dq_init(parent_dir);
+	dquot_initialize(parent_dir);
 
 	if (!(inode = new_inode(parent_dir->i_sb))) {
 		return -ENOMEM;
@@ -1123,7 +1123,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	reiserfs_write_lock(dir->i_sb);
 	if (inode->i_nlink >= REISERFS_LINK_MAX) {
@@ -1249,8 +1249,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	    JOURNAL_PER_BALANCE_CNT * 3 + 5 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 34f7cd0cb02d..04bf5d791bda 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -246,7 +246,7 @@ static int finish_unfinished(struct super_block *s)
 			retval = remove_save_link_only(s, &save_link_key, 0);
 			continue;
 		}
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		if (truncate && S_ISDIR(inode->i_mode)) {
 			/* We got a truncate request for a dir which is impossible.
@@ -622,7 +622,6 @@ static int reiserfs_write_info(struct super_block *, int);
 static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 
 static const struct dquot_operations reiserfs_quota_operations = {
-	.initialize = dquot_initialize,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
 	.release_dquot = reiserfs_release_dquot,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 013fa44d9a5e..1eb06774ed90 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -228,7 +228,7 @@ static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 		return error;
 
 	if (iattr->ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
             (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 15c6e992e587..fb68c9cd0c3e 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -153,7 +153,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	ret = dquot_alloc_inode(inode);
 	if (ret) {
 		dquot_drop(inode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index f19520268404..c7da1a32b364 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -72,7 +72,7 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
 void udf_delete_inode(struct inode *inode)
 {
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index e360c3fc4ae4..96757e3e3e04 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -563,7 +563,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
 	int err;
 	struct udf_inode_info *iinfo;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	inode = udf_new_inode(dir, mode, &err);
@@ -618,7 +618,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	err = -EIO;
@@ -666,7 +666,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct udf_inode_info *dinfo = UDF_I(dir);
 	struct udf_inode_info *iinfo;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	err = -EMLINK;
@@ -805,7 +805,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
 	struct fileIdentDesc *fi, cfi;
 	struct kernel_lb_addr tloc;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	retval = -ENOENT;
 	lock_kernel();
@@ -853,7 +853,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
 	struct fileIdentDesc cfi;
 	struct kernel_lb_addr tloc;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	retval = -ENOENT;
 	lock_kernel();
@@ -909,7 +909,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	struct buffer_head *bh;
 	struct udf_inode_info *iinfo;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	inode = udf_new_inode(dir, S_IFLNK, &err);
@@ -1081,7 +1081,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	int err;
 	struct buffer_head *bh;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
@@ -1145,8 +1145,8 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct kernel_lb_addr tloc;
 	struct udf_inode_info *old_iinfo = UDF_I(old_inode);
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	lock_kernel();
 	ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index d84762f3028e..a8962cecde5b 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 67b4bdb056fb..230ecf608026 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -355,7 +355,7 @@ cg_found:
 
 	unlock_super (sb);
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err) {
 		dquot_drop(inode);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index fff8edab382f..09aef49beedb 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -910,7 +910,7 @@ void ufs_delete_inode (struct inode * inode)
 	loff_t old_i_size;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 	if (is_bad_inode(inode))
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index c33cb90c516d..118556243e7a 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -86,7 +86,7 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
 
 	UFSD("BEGIN\n");
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
@@ -112,7 +112,7 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
@@ -138,7 +138,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
 	if (l > sb->s_blocksize)
 		goto out_notlocked;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
@@ -185,7 +185,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 	}
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
@@ -204,7 +204,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= UFS_LINK_MAX)
 		goto out;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	inode_inc_link_count(dir);
@@ -250,7 +250,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
 	struct page *page;
 	int err = -ENOENT;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	de = ufs_find_entry(dir, &dentry->d_name, &page);
 	if (!de)
@@ -296,8 +296,8 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ufs_dir_entry *old_de;
 	int err = -ENOENT;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index e5ef8a3ec230..d3b6270cb377 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -528,7 +528,7 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 	    attr->ia_size != i_size_read(inode)) {
 		loff_t old_i_size = inode->i_size;
 
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		error = vmtruncate(inode, attr->ia_size);
 		if (error)
diff --git a/include/linux/quota.h b/include/linux/quota.h
index aec2e9dac2d7..4aa93554f0eb 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -295,7 +295,6 @@ struct quota_format_ops {
 
 /* Operations working with dquots */
 struct dquot_operations {
-	int (*initialize) (struct inode *, int);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 93ac788345e2..e6fa7acce290 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -23,7 +23,7 @@ void inode_add_rsv_space(struct inode *inode, qsize_t number);
 void inode_claim_rsv_space(struct inode *inode, qsize_t number);
 void inode_sub_rsv_space(struct inode *inode, qsize_t number);
 
-int dquot_initialize(struct inode *inode, int type);
+void dquot_initialize(struct inode *inode);
 void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
 void dqput(struct dquot *dquot);
@@ -139,15 +139,6 @@ extern const struct quotactl_ops vfs_quotactl_ops;
 #define sb_dquot_ops (&dquot_operations)
 #define sb_quotactl_ops (&vfs_quotactl_ops)
 
-/* It is better to call this function outside of any transaction as it might
- * need a lot of space in journal for dquot structure allocation. */
-static inline void vfs_dq_init(struct inode *inode)
-{
-	BUG_ON(!inode->i_sb);
-	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
-		inode->i_sb->dq_op->initialize(inode, -1);
-}
-
 /* Cannot be called inside a transaction */
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
@@ -207,7 +198,7 @@ static inline int sb_any_quota_active(struct super_block *sb)
 #define sb_dquot_ops				(NULL)
 #define sb_quotactl_ops				(NULL)
 
-static inline void vfs_dq_init(struct inode *inode)
+static inline void dquot_initialize(struct inode *inode)
 {
 }
 
@@ -260,6 +251,8 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 	return 0;
 }
 
+#define dquot_file_open		generic_file_open
+
 #endif /* CONFIG_QUOTA */
 
 static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
@@ -344,6 +337,4 @@ static inline void dquot_release_reservation_block(struct inode *inode,
 	__dquot_free_space(inode, nr << inode->i_blkbits, 1);
 }
 
-#define dquot_file_open		generic_file_open
-
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From efd8f0e6f6c1faa041f228d7113bd3a9db802d49 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:08 -0500
Subject: quota: stop using QUOTA_OK / NO_QUOTA

Just use 0 / -EDQUOT directly - that's what it translates to anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 48 ++++++++++++++++++++++++------------------------
 include/linux/quota.h |  3 ---
 2 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 3c0a7e0dff78..e0b870f4749f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1181,13 +1181,13 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 	*warntype = QUOTA_NL_NOWARN;
 	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
-		return QUOTA_OK;
+		return 0;
 
 	if (dquot->dq_dqb.dqb_ihardlimit &&
 	    newinodes > dquot->dq_dqb.dqb_ihardlimit &&
             !ignore_hardlimit(dquot)) {
 		*warntype = QUOTA_NL_IHARDWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1196,7 +1196,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 	    get_seconds() >= dquot->dq_dqb.dqb_itime &&
             !ignore_hardlimit(dquot)) {
 		*warntype = QUOTA_NL_ISOFTLONGWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1207,7 +1207,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 		    sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
 	}
 
-	return QUOTA_OK;
+	return 0;
 }
 
 /* needs dq_data_lock */
@@ -1219,7 +1219,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	*warntype = QUOTA_NL_NOWARN;
 	if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
-		return QUOTA_OK;
+		return 0;
 
 	tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
 		+ space;
@@ -1229,7 +1229,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BHARDWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1239,7 +1239,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BSOFTLONGWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1255,10 +1255,10 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 			 * We don't allow preallocation to exceed softlimit so exceeding will
 			 * be always printed
 			 */
-			return NO_QUOTA;
+			return -EDQUOT;
 	}
 
-	return QUOTA_OK;
+	return 0;
 }
 
 static int info_idq_free(struct dquot *dquot, qsize_t inodes)
@@ -1507,9 +1507,9 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_bdq(inode->i_dquot[cnt], number, !warn, warntype+cnt)
-				== NO_QUOTA) {
-			ret = -EDQUOT;
+		ret = check_bdq(inode->i_dquot[cnt], number, !warn,
+				warntype+cnt);
+		if (ret) {
 			spin_unlock(&dq_data_lock);
 			goto out_flush_warn;
 		}
@@ -1541,7 +1541,7 @@ EXPORT_SYMBOL(__dquot_alloc_space);
  */
 int dquot_alloc_inode(const struct inode *inode)
 {
-	int cnt, ret = -EDQUOT;
+	int cnt, ret = 0;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
@@ -1555,8 +1555,8 @@ int dquot_alloc_inode(const struct inode *inode)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_idq(inode->i_dquot[cnt], 1, warntype+cnt)
-		    == NO_QUOTA)
+		ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt);
+		if (ret)
 			goto warn_put_all;
 	}
 
@@ -1565,7 +1565,7 @@ int dquot_alloc_inode(const struct inode *inode)
 			continue;
 		dquot_incr_inodes(inode->i_dquot[cnt], 1);
 	}
-	ret = 0;
+
 warn_put_all:
 	spin_unlock(&dq_data_lock);
 	if (ret == 0)
@@ -1683,14 +1683,14 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
 	qsize_t rsv_space = 0;
 	struct dquot *transfer_from[MAXQUOTAS];
 	struct dquot *transfer_to[MAXQUOTAS];
-	int cnt, ret = QUOTA_OK;
+	int cnt, ret = 0;
 	char warntype_to[MAXQUOTAS];
 	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+		return 0;
 	/* Initialize the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		transfer_from[cnt] = NULL;
@@ -1715,9 +1715,11 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
 		if (!transfer_to[cnt])
 			continue;
 		transfer_from[cnt] = inode->i_dquot[cnt];
-		if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
-		    NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
-		    warntype_to + cnt) == NO_QUOTA)
+		ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
+		if (ret)
+			goto over_quota;
+		ret = check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt);
+		if (ret)
 			goto over_quota;
 	}
 
@@ -1771,7 +1773,6 @@ over_quota:
 	/* Clear dquot pointers we don't want to dqput() */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		transfer_from[cnt] = NULL;
-	ret = NO_QUOTA;
 	goto warn_put_all;
 }
 
@@ -1793,8 +1794,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		dquot_initialize(inode);
-		if (__dquot_transfer(inode, chid, mask) == NO_QUOTA)
-			return -EDQUOT;
+		return __dquot_transfer(inode, chid, mask);
 	}
 	return 0;
 }
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 4aa93554f0eb..b462916b2a0a 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -279,9 +279,6 @@ struct dquot {
 	struct mem_dqblk dq_dqb;	/* Diskquota usage */
 };
 
-#define QUOTA_OK          0
-#define NO_QUOTA          1
-
 /* Operations which must be implemented by each quota format */
 struct quota_format_ops {
 	int (*check_quota_file)(struct super_block *sb, int type);	/* Detect whether file is in our format */
-- 
cgit v1.2.3


From 2b9ddcb8b2ce6a44f0f969000f16b016caa64294 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 4 Mar 2010 19:46:18 +0100
Subject: ALSA: usb/audio.h: Fix field order

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 6bb293684eb8..4d3e450e2b03 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -269,8 +269,8 @@ struct uac_format_type_i_ext_descriptor {
 	__u8 bLength;
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
-	__u8 bSubslotSize;
 	__u8 bFormatType;
+	__u8 bSubslotSize;
 	__u8 bBitResolution;
 	__u8 bHeaderLength;
 	__u8 bControlSize;
-- 
cgit v1.2.3


From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 5 Mar 2010 09:21:37 +0100
Subject: pass writeback_control to ->write_inode

This gives the filesystem more information about the writeback that
is happening.  Trond requested this for the NFS unstable write handling,
and other filesystems might benefit from this too by beeing able to
distinguish between the different callers in more detail.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/adfs.h               |  2 +-
 fs/adfs/inode.c              |  5 +++--
 fs/affs/affs.h               |  3 ++-
 fs/affs/inode.c              |  2 +-
 fs/bfs/inode.c               |  5 +++--
 fs/btrfs/ctree.h             |  2 +-
 fs/btrfs/inode.c             |  4 ++--
 fs/exofs/exofs.h             |  2 +-
 fs/exofs/inode.c             |  4 ++--
 fs/ext2/ext2.h               |  2 +-
 fs/ext2/inode.c              | 11 +++++++++--
 fs/ext3/inode.c              |  4 ++--
 fs/ext4/ext4.h               |  2 +-
 fs/ext4/inode.c              |  6 +++---
 fs/fat/inode.c               |  9 +++++++--
 fs/fs-writeback.c            | 11 +++++------
 fs/gfs2/super.c              |  5 +++--
 fs/hfs/hfs_fs.h              |  2 +-
 fs/hfs/inode.c               |  2 +-
 fs/hfsplus/super.c           |  3 ++-
 fs/jfs/inode.c               |  5 ++++-
 fs/jfs/jfs_inode.h           |  2 +-
 fs/minix/inode.c             |  8 +++++---
 fs/nfs/inode.c               |  5 +++--
 fs/nfs/internal.h            |  2 +-
 fs/ntfs/dir.c                |  2 +-
 fs/ntfs/file.c               |  2 +-
 fs/ntfs/inode.c              |  2 +-
 fs/ntfs/inode.h              |  4 ++--
 fs/ntfs/super.c              |  8 ++++++++
 fs/omfs/inode.c              | 10 ++++++++--
 fs/reiserfs/inode.c          |  4 ++--
 fs/sysv/inode.c              | 10 ++++++++--
 fs/sysv/sysv.h               |  2 +-
 fs/ubifs/dir.c               |  2 +-
 fs/ubifs/file.c              |  8 ++++----
 fs/ubifs/super.c             |  2 +-
 fs/udf/inode.c               |  4 ++--
 fs/udf/udfdecl.h             |  2 +-
 fs/ufs/inode.c               |  5 +++--
 fs/ufs/ufs.h                 |  2 +-
 fs/xfs/linux-2.6/xfs_super.c |  4 ++--
 include/linux/ext3_fs.h      |  2 +-
 include/linux/fs.h           |  2 +-
 include/linux/reiserfs_fs.h  |  2 +-
 45 files changed, 115 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 9cc18775b832..2ff622f6f547 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -121,7 +121,7 @@ struct adfs_discmap {
 
 /* Inode stuff */
 struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
-int adfs_write_inode(struct inode *inode,int unused);
+int adfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 int adfs_notify_change(struct dentry *dentry, struct iattr *attr);
 
 /* map.c */
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 3f57ce4bee5d..0f5e30978135 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -9,6 +9,7 @@
  */
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include "adfs.h"
 
 /*
@@ -360,7 +361,7 @@ out:
  * The adfs-specific inode data has already been updated by
  * adfs_notify_change()
  */
-int adfs_write_inode(struct inode *inode, int wait)
+int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct super_block *sb = inode->i_sb;
 	struct object_info obj;
@@ -375,7 +376,7 @@ int adfs_write_inode(struct inode *inode, int wait)
 	obj.attr	= ADFS_I(inode)->attr;
 	obj.size	= inode->i_size;
 
-	ret = adfs_dir_update(sb, &obj, wait);
+	ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0e40caaba456..861dae68ac12 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -175,7 +175,8 @@ extern void			 affs_delete_inode(struct inode *inode);
 extern void			 affs_clear_inode(struct inode *inode);
 extern struct inode		*affs_iget(struct super_block *sb,
 					unsigned long ino);
-extern int			 affs_write_inode(struct inode *inode, int);
+extern int			 affs_write_inode(struct inode *inode,
+					struct writeback_control *wbc);
 extern int			 affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type);
 
 /* file.c */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3c4ec7d864c4..c9744d771d98 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -166,7 +166,7 @@ bad_inode:
 }
 
 int
-affs_write_inode(struct inode *inode, int unused)
+affs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct super_block	*sb = inode->i_sb;
 	struct buffer_head	*bh;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8f3d9fd89604..f22a7d3dc362 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 #include <asm/uaccess.h>
 #include "bfs.h"
 
@@ -98,7 +99,7 @@ error:
 	return ERR_PTR(-EIO);
 }
 
-static int bfs_write_inode(struct inode *inode, int wait)
+static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
 	unsigned int ino = (u16)inode->i_ino;
@@ -147,7 +148,7 @@ static int bfs_write_inode(struct inode *inode, int wait)
 	di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
 
 	mark_buffer_dirty(bh);
-	if (wait) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh))
 			err = -EIO;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2aa8ec6a0981..8b5cfdd4bfc1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2326,7 +2326,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
-int btrfs_write_inode(struct inode *inode, int wait);
+int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4deb280f8969..c41db6d45ab6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3968,7 +3968,7 @@ err:
 	return ret;
 }
 
-int btrfs_write_inode(struct inode *inode, int wait)
+int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
@@ -3977,7 +3977,7 @@ int btrfs_write_inode(struct inode *inode, int wait)
 	if (root->fs_info->btree_inode == inode)
 		return 0;
 
-	if (wait) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		trans = btrfs_join_transaction(root, 1);
 		btrfs_set_trans_block_group(trans, inode);
 		ret = btrfs_commit_transaction(trans, root);
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 59b8bf2825c7..8442e353309f 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -261,7 +261,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 		struct page **pagep, void **fsdata);
 extern struct inode *exofs_iget(struct super_block *, unsigned long);
 struct inode *exofs_new_inode(struct inode *, int);
-extern int exofs_write_inode(struct inode *, int);
+extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
 extern void exofs_delete_inode(struct inode *);
 
 /* dir.c:                */
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5514f3c2c2f4..a17e4b733e35 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1280,9 +1280,9 @@ out:
 	return ret;
 }
 
-int exofs_write_inode(struct inode *inode, int wait)
+int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return exofs_update_inode(inode, wait);
+	return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 }
 
 /*
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 061914add3cf..0b038e47ad2f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -118,7 +118,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 
 /* inode.c */
 extern struct inode *ext2_iget (struct super_block *, unsigned long);
-extern int ext2_write_inode (struct inode *, int);
+extern int ext2_write_inode (struct inode *, struct writeback_control *);
 extern void ext2_delete_inode (struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 71b032c65a02..36ae1cac767c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -41,6 +41,8 @@ MODULE_AUTHOR("Remy Card and others");
 MODULE_DESCRIPTION("Second Extended Filesystem");
 MODULE_LICENSE("GPL");
 
+static int __ext2_write_inode(struct inode *inode, int do_sync);
+
 /*
  * Test whether an inode is a fast symlink.
  */
@@ -64,7 +66,7 @@ void ext2_delete_inode (struct inode * inode)
 		goto no_delete;
 	EXT2_I(inode)->i_dtime	= get_seconds();
 	mark_inode_dirty(inode);
-	ext2_write_inode(inode, inode_needs_sync(inode));
+	__ext2_write_inode(inode, inode_needs_sync(inode));
 
 	inode->i_size = 0;
 	if (inode->i_blocks)
@@ -1335,7 +1337,7 @@ bad_inode:
 	return ERR_PTR(ret);
 }
 
-int ext2_write_inode(struct inode *inode, int do_sync)
+static int __ext2_write_inode(struct inode *inode, int do_sync)
 {
 	struct ext2_inode_info *ei = EXT2_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -1440,6 +1442,11 @@ int ext2_write_inode(struct inode *inode, int do_sync)
 	return err;
 }
 
+int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int ext2_sync_inode(struct inode *inode)
 {
 	struct writeback_control wbc = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 455e6e6e5cb9..7aca55fcc976 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3096,7 +3096,7 @@ out_brelse:
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext3_write_inode(struct inode *inode, int wait)
+int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	if (current->flags & PF_MEMALLOC)
 		return 0;
@@ -3107,7 +3107,7 @@ int ext3_write_inode(struct inode *inode, int wait)
 		return -EIO;
 	}
 
-	if (!wait)
+	if (wbc->sync_mode != WB_SYNC_ALL)
 		return 0;
 
 	return ext3_force_commit(inode->i_sb);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4cedc91ec59d..50af1a2c65e7 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1416,7 +1416,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
 				struct buffer_head *bh_result, int create);
 
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern int  ext4_write_inode(struct inode *, int);
+extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 				struct kstat *stat);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e11952404e02..d01a6cdbf854 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5177,7 +5177,7 @@ out_brelse:
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext4_write_inode(struct inode *inode, int wait)
+int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err;
 
@@ -5191,7 +5191,7 @@ int ext4_write_inode(struct inode *inode, int wait)
 			return -EIO;
 		}
 
-		if (!wait)
+		if (wbc->sync_mode != WB_SYNC_ALL)
 			return 0;
 
 		err = ext4_force_commit(inode->i_sb);
@@ -5201,7 +5201,7 @@ int ext4_write_inode(struct inode *inode, int wait)
 		err = ext4_get_inode_loc(inode, &iloc);
 		if (err)
 			return err;
-		if (wait)
+		if (wbc->sync_mode == WB_SYNC_ALL)
 			sync_dirty_buffer(iloc.bh);
 		if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
 			ext4_error(inode->i_sb, __func__,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 14da530b05ca..fbeecdc194dc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -577,7 +577,7 @@ static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
 	return i_pos;
 }
 
-static int fat_write_inode(struct inode *inode, int wait)
+static int __fat_write_inode(struct inode *inode, int wait)
 {
 	struct super_block *sb = inode->i_sb;
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -634,9 +634,14 @@ retry:
 	return err;
 }
 
+static int fat_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int fat_sync_inode(struct inode *inode)
 {
-	return fat_write_inode(inode, 1);
+	return __fat_write_inode(inode, 1);
 }
 
 EXPORT_SYMBOL_GPL(fat_sync_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5f2721b1e4be..76fc4d594acb 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -381,10 +381,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
 	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
 
-static int write_inode(struct inode *inode, int sync)
+static int write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
-		return inode->i_sb->s_op->write_inode(inode, sync);
+		return inode->i_sb->s_op->write_inode(inode, wbc);
 	return 0;
 }
 
@@ -421,7 +421,6 @@ static int
 writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct address_space *mapping = inode->i_mapping;
-	int wait = wbc->sync_mode == WB_SYNC_ALL;
 	unsigned dirty;
 	int ret;
 
@@ -439,7 +438,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 		 * We'll have another go at writing back this inode when we
 		 * completed a full scan of b_io.
 		 */
-		if (!wait) {
+		if (wbc->sync_mode != WB_SYNC_ALL) {
 			requeue_io(inode);
 			return 0;
 		}
@@ -466,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	 * This is important for filesystems that modify metadata on data
 	 * I/O completion.
 	 */
-	if (wait) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		int err = filemap_fdatawait(mapping);
 		if (ret == 0)
 			ret = err;
@@ -474,7 +473,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
 	/* Don't write the inode if only I_DIRTY_PAGES was set */
 	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
-		int err = write_inode(inode, wait);
+		int err = write_inode(inode, wbc);
 		if (ret == 0)
 			ret = err;
 	}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e5e22629da67..ca87598ead7f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -22,6 +22,7 @@
 #include <linux/crc32.h>
 #include <linux/time.h>
 #include <linux/wait.h>
+#include <linux/writeback.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -711,7 +712,7 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
  * Returns: errno
  */
 
-static int gfs2_write_inode(struct inode *inode, int sync)
+static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -745,7 +746,7 @@ static int gfs2_write_inode(struct inode *inode, int sync)
 do_unlock:
 	gfs2_glock_dq_uninit(&gh);
 do_flush:
-	if (sync != 0)
+	if (wbc->sync_mode == WB_SYNC_ALL)
 		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
 	return ret;
 }
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 052387e11671..fe35e3b626c4 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -188,7 +188,7 @@ extern const struct address_space_operations hfs_btree_aops;
 
 extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
 extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
-extern int hfs_write_inode(struct inode *, int);
+extern int hfs_write_inode(struct inode *, struct writeback_control *);
 extern int hfs_inode_setattr(struct dentry *, struct iattr *);
 extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
 			__be32 log_size, __be32 phys_size, u32 clump_size);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a1cbff2b4d99..14f5cb1b9fdc 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -381,7 +381,7 @@ void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext,
 					 HFS_SB(inode->i_sb)->alloc_blksz);
 }
 
-int hfs_write_inode(struct inode *inode, int unused)
+int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct inode *main_inode = inode;
 	struct hfs_find_data fd;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 43022f3d5148..74b473a8ef92 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -87,7 +87,8 @@ bad_inode:
 	return ERR_PTR(err);
 }
 
-static int hfsplus_write_inode(struct inode *inode, int unused)
+static int hfsplus_write_inode(struct inode *inode,
+		struct writeback_control *wbc)
 {
 	struct hfsplus_vh *vhdr;
 	int ret = 0;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b2ae190a77ba..182b78cc3e62 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#include <linux/writeback.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
@@ -120,8 +121,10 @@ int jfs_commit_inode(struct inode *inode, int wait)
 	return rc;
 }
 
-int jfs_write_inode(struct inode *inode, int wait)
+int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
+	int wait = wbc->sync_mode == WB_SYNC_ALL;
+
 	if (test_cflag(COMMIT_Nolink, inode))
 		return 0;
 	/*
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 1eff7db34d63..15902b03c2a7 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -26,7 +26,7 @@ extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
 extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
 extern struct inode *jfs_iget(struct super_block *, unsigned long);
 extern int jfs_commit_inode(struct inode *, int);
-extern int jfs_write_inode(struct inode*, int);
+extern int jfs_write_inode(struct inode *, struct writeback_control *);
 extern void jfs_delete_inode(struct inode *);
 extern void jfs_dirty_inode(struct inode *);
 extern void jfs_truncate(struct inode *);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 74ea82d72164..756f8c93780c 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -17,8 +17,10 @@
 #include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 
-static int minix_write_inode(struct inode * inode, int wait);
+static int minix_write_inode(struct inode *inode,
+		struct writeback_control *wbc);
 static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int minix_remount (struct super_block * sb, int * flags, char * data);
 
@@ -552,7 +554,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
 	return bh;
 }
 
-static int minix_write_inode(struct inode *inode, int wait)
+static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err = 0;
 	struct buffer_head *bh;
@@ -563,7 +565,7 @@ static int minix_write_inode(struct inode *inode, int wait)
 		bh = V2_minix_update_inode(inode);
 	if (!bh)
 		return -EIO;
-	if (wait && buffer_dirty(bh)) {
+	if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) {
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh)) {
 			printk("IO error syncing minix inode [%s:%08lx]\n",
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5ecd952cae1d..7f9ecc46f3fb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,11 +97,12 @@ u64 nfs_compat_user_ino64(u64 fileid)
 	return ino;
 }
 
-int nfs_write_inode(struct inode *inode, int sync)
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 
-	ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0);
+	ret = nfs_commit_inode(inode,
+			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
 	if (ret >= 0)
 		return 0;
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 29e464d23b32..11f82f03c5de 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -211,7 +211,7 @@ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
 extern struct workqueue_struct *nfsiod_workqueue;
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
-extern int nfs_write_inode(struct inode *,int);
+extern int nfs_write_inode(struct inode *, struct writeback_control *);
 extern void nfs_clear_inode(struct inode *);
 #ifdef CONFIG_NFS_V4
 extern void nfs4_clear_inode(struct inode *);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 5a9e34475e37..9173e82a45d1 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1545,7 +1545,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
  		write_inode_now(bmp_vi, !datasync);
 		iput(bmp_vi);
 	}
-	ret = ntfs_write_inode(vi, 1);
+	ret = __ntfs_write_inode(vi, 1);
 	write_inode_now(vi, !datasync);
 	err = sync_blockdev(vi->i_sb->s_bdev);
 	if (unlikely(err && !ret))
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 43179ddd336f..b681c71d7069 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2182,7 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
 	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
 	BUG_ON(S_ISDIR(vi->i_mode));
 	if (!datasync || !NInoNonResident(NTFS_I(vi)))
-		ret = ntfs_write_inode(vi, 1);
+		ret = __ntfs_write_inode(vi, 1);
 	write_inode_now(vi, !datasync);
 	/*
 	 * NOTE: If we were to use mapping->private_list (see ext2 and
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index dc2505abb6d7..4b57fb1eac2a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2957,7 +2957,7 @@ out:
  *
  * Return 0 on success and -errno on error.
  */
-int ntfs_write_inode(struct inode *vi, int sync)
+int __ntfs_write_inode(struct inode *vi, int sync)
 {
 	sle64 nt;
 	ntfs_inode *ni = NTFS_I(vi);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 117eaf8032a3..9a113544605d 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -307,12 +307,12 @@ extern void ntfs_truncate_vfs(struct inode *vi);
 
 extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
 
-extern int ntfs_write_inode(struct inode *vi, int sync);
+extern int __ntfs_write_inode(struct inode *vi, int sync);
 
 static inline void ntfs_commit_inode(struct inode *vi)
 {
 	if (!is_bad_inode(vi))
-		ntfs_write_inode(vi, 1);
+		__ntfs_write_inode(vi, 1);
 	return;
 }
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 80b04770e8e9..1cf39dfaee7a 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -39,6 +39,7 @@
 #include "dir.h"
 #include "debug.h"
 #include "index.h"
+#include "inode.h"
 #include "aops.h"
 #include "layout.h"
 #include "malloc.h"
@@ -2662,6 +2663,13 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
 	return 0;
 }
 
+#ifdef NTFS_RW
+static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc)
+{
+	return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL);
+}
+#endif
+
 /**
  * The complete super operations.
  */
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index f3b7c1541f3a..75d9b5ba1d45 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -11,6 +11,7 @@
 #include <linux/parser.h>
 #include <linux/buffer_head.h>
 #include <linux/vmalloc.h>
+#include <linux/writeback.h>
 #include <linux/crc-itu-t.h>
 #include "omfs.h"
 
@@ -89,7 +90,7 @@ static void omfs_update_checksums(struct omfs_inode *oi)
 	oi->i_head.h_check_xor = xor;
 }
 
-static int omfs_write_inode(struct inode *inode, int wait)
+static int __omfs_write_inode(struct inode *inode, int wait)
 {
 	struct omfs_inode *oi;
 	struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
@@ -162,9 +163,14 @@ out:
 	return ret;
 }
 
+static int omfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __omfs_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int omfs_sync_inode(struct inode *inode)
 {
-	return omfs_write_inode(inode, 1);
+	return __omfs_write_inode(inode, 1);
 }
 
 /*
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2df0f5c7c60b..0d651f980a8d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1615,7 +1615,7 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
 ** to properly mark inodes for datasync and such, but only actually
 ** does something when called for a synchronous update.
 */
-int reiserfs_write_inode(struct inode *inode, int do_sync)
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct reiserfs_transaction_handle th;
 	int jbegin_count = 1;
@@ -1627,7 +1627,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync)
 	 ** inode needs to reach disk for safety, and they can safely be
 	 ** ignored because the altered inode has already been logged.
 	 */
-	if (do_sync && !(current->flags & PF_MEMALLOC)) {
+	if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
 		reiserfs_write_lock(inode->i_sb);
 		if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
 			reiserfs_update_sd(&th, inode);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 9824743832a7..4573734d723d 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 #include <linux/namei.h>
 #include <asm/byteorder.h>
 #include "sysv.h"
@@ -246,7 +247,7 @@ bad_inode:
 	return ERR_PTR(-EIO);
 }
 
-int sysv_write_inode(struct inode *inode, int wait)
+static int __sysv_write_inode(struct inode *inode, int wait)
 {
 	struct super_block * sb = inode->i_sb;
 	struct sysv_sb_info * sbi = SYSV_SB(sb);
@@ -296,9 +297,14 @@ int sysv_write_inode(struct inode *inode, int wait)
 	return 0;
 }
 
+int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int sysv_sync_inode(struct inode *inode)
 {
-	return sysv_write_inode(inode, 1);
+	return __sysv_write_inode(inode, 1);
 }
 
 static void sysv_delete_inode(struct inode *inode)
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 53786eb5cf60..94cb9b4d76c2 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -142,7 +142,7 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
 
 /* inode.c */
 extern struct inode *sysv_iget(struct super_block *, unsigned int);
-extern int sysv_write_inode(struct inode *, int);
+extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
 extern int sysv_sync_inode(struct inode *);
 extern void sysv_set_inode(struct inode *, dev_t);
 extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 552fb0111fff..401e503d44a1 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1120,7 +1120,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (release)
 		ubifs_release_budget(c, &ino_req);
 	if (IS_SYNC(old_inode))
-		err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
+		err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
 	return err;
 
 out_cancel:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 16a6444330ec..e26c02ab6cd5 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1011,7 +1011,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 	/* Is the page fully inside @i_size? */
 	if (page->index < end_index) {
 		if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
-			err = inode->i_sb->s_op->write_inode(inode, 1);
+			err = inode->i_sb->s_op->write_inode(inode, NULL);
 			if (err)
 				goto out_unlock;
 			/*
@@ -1039,7 +1039,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 	kunmap_atomic(kaddr, KM_USER0);
 
 	if (i_size > synced_i_size) {
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 		if (err)
 			goto out_unlock;
 	}
@@ -1242,7 +1242,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
 	if (release)
 		ubifs_release_budget(c, &req);
 	if (IS_SYNC(inode))
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 	return err;
 
 out:
@@ -1316,7 +1316,7 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	 * the inode unless this is a 'datasync()' call.
 	 */
 	if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 		if (err)
 			return err;
 	}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 43f9d19a6f33..4d2f2157dd3f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -283,7 +283,7 @@ static void ubifs_destroy_inode(struct inode *inode)
 /*
  * Note, Linux write-back code calls this without 'i_mutex'.
  */
-static int ubifs_write_inode(struct inode *inode, int wait)
+static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err = 0;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 378a7592257c..b02089247296 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1373,12 +1373,12 @@ static mode_t udf_convert_permissions(struct fileEntry *fe)
 	return mode;
 }
 
-int udf_write_inode(struct inode *inode, int sync)
+int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 
 	lock_kernel();
-	ret = udf_update_inode(inode, sync);
+	ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 
 	return ret;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 8d46f4294ee7..4223ac855da9 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -142,7 +142,7 @@ extern void udf_truncate(struct inode *);
 extern void udf_read_inode(struct inode *);
 extern void udf_delete_inode(struct inode *);
 extern void udf_clear_inode(struct inode *);
-extern int udf_write_inode(struct inode *, int);
+extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
 extern long udf_block_map(struct inode *, sector_t);
 extern int udf_extend_file(struct inode *, struct extent_position *,
 			   struct kernel_long_ad *, sector_t);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7cf33379fd46..0a627e08610b 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -36,6 +36,7 @@
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -890,11 +891,11 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
 	return 0;
 }
 
-int ufs_write_inode (struct inode * inode, int wait)
+int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 	lock_kernel();
-	ret = ufs_update_inode (inode, wait);
+	ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 01d0e2a3b230..43f9f5d5670e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -106,7 +106,7 @@ extern struct inode * ufs_new_inode (struct inode *, int);
 
 /* inode.c */
 extern struct inode *ufs_iget(struct super_block *, unsigned long);
-extern int ufs_write_inode (struct inode *, int);
+extern int ufs_write_inode (struct inode *, struct writeback_control *);
 extern int ufs_sync_inode (struct inode *);
 extern void ufs_delete_inode (struct inode *);
 extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 8f117db6070e..71345a370d9f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1063,7 +1063,7 @@ xfs_log_inode(
 STATIC int
 xfs_fs_write_inode(
 	struct inode		*inode,
-	int			sync)
+	struct writeback_control *wbc)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@@ -1074,7 +1074,7 @@ xfs_fs_write_inode(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	if (sync) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		/*
 		 * Make sure the inode has hit stable storage.  By using the
 		 * log and the fsync transactions we reduce the IOs we have
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 6b049030fbe6..deac2566450e 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -877,7 +877,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	int create);
 
 extern struct inode *ext3_iget(struct super_block *, unsigned long);
-extern int  ext3_write_inode (struct inode *, int);
+extern int  ext3_write_inode (struct inode *, struct writeback_control *);
 extern int  ext3_setattr (struct dentry *, struct iattr *);
 extern void ext3_delete_inode (struct inode *);
 extern int  ext3_sync_inode (handle_t *, struct inode *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5b3182c7eb5f..45689621a851 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1557,7 +1557,7 @@ struct super_operations {
 	void (*destroy_inode)(struct inode *);
 
    	void (*dirty_inode) (struct inode *);
-	int (*write_inode) (struct inode *, int);
+	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	void (*drop_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 1ba3cf6edfbb..3b603f474186 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2034,7 +2034,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
 int reiserfs_find_actor(struct inode *inode, void *p);
 int reiserfs_init_locked_inode(struct inode *inode, void *p);
 void reiserfs_delete_inode(struct inode *inode);
-int reiserfs_write_inode(struct inode *inode, int);
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 int reiserfs_get_block(struct inode *inode, sector_t block,
 		       struct buffer_head *bh_result, int create);
 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-- 
cgit v1.2.3


From 8fc795f703c5138e1a8bfb88c69f52632031aa6a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 16:46:56 -0800
Subject: NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c

The sole purpose of nfs_write_inode is to commit unstable writes, so
move it into fs/nfs/write.c, and make nfs_commit_inode static.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 12 ------------
 fs/nfs/write.c         | 24 +++++++++++++++++++++++-
 include/linux/nfs_fs.h |  7 -------
 3 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7f9ecc46f3fb..89e98312599d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid)
 	return ino;
 }
 
-int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	int ret;
-
-	ret = nfs_commit_inode(inode,
-			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
-	if (ret >= 0)
-		return 0;
-	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-	return ret;
-}
-
 void nfs_clear_inode(struct inode *inode)
 {
 	/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d63d964a0392..09e97097baaa 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_release = nfs_commit_release,
 };
 
-int nfs_commit_inode(struct inode *inode, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
 	int res;
@@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how)
 	}
 	return res;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	int ret;
+
+	ret = nfs_commit_inode(inode,
+			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+	if (ret >= 0)
+		return 0;
+	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	return ret;
+}
 #else
 static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
 	return 0;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	return 0;
+}
 #endif
 
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return nfs_commit_unstable_pages(inode, wbc);
+}
+
 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
 	struct inode *inode = mapping->host;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d09db1bc9083..384ea3ef2863 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -483,15 +483,8 @@ extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
 #endif
 
 static inline int
-- 
cgit v1.2.3


From ff778d02bf867e1733a09b34ad6dbb723b024814 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 16:53:39 -0800
Subject: NFS: Add a count of the number of unstable writes carried by an inode

In order to know when we should do opportunistic commits of the unstable
writes, when the VM is doing a background flush, we add a field to count
the number of unstable writes.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         |  1 +
 fs/nfs/write.c         | 14 ++++++++++----
 include/linux/nfs_fs.h |  1 +
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 89e98312599d..aa5a831001ab 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1404,6 +1404,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 	nfsi->npages = 0;
+	nfsi->ncommit = 0;
 	atomic_set(&nfsi->silly_count, 1);
 	INIT_HLIST_HEAD(&nfsi->silly_list);
 	init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 09e97097baaa..dc08a6fbde67 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -438,6 +438,7 @@ nfs_mark_request_commit(struct nfs_page *req)
 	radix_tree_tag_set(&nfsi->nfs_page_tree,
 			req->wb_index,
 			NFS_PAGE_TAG_COMMIT);
+	nfsi->ncommit++;
 	spin_unlock(&inode->i_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
@@ -573,11 +574,15 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
+	int ret;
 
 	if (!nfs_need_commit(nfsi))
 		return 0;
 
-	return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	if (ret > 0)
+		nfsi->ncommit -= ret;
+	return ret;
 }
 #else
 static inline int nfs_need_commit(struct nfs_inode *nfsi)
@@ -642,9 +647,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 		spin_lock(&inode->i_lock);
 	}
 
-	if (nfs_clear_request_commit(req))
-		radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
-				req->wb_index, NFS_PAGE_TAG_COMMIT);
+	if (nfs_clear_request_commit(req) &&
+			radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
+				req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL)
+		NFS_I(inode)->ncommit--;
 
 	/* Okay, the request matches. Update the region */
 	if (offset < req->wb_offset) {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 384ea3ef2863..309217f46e28 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -166,6 +166,7 @@ struct nfs_inode {
 	struct radix_tree_root	nfs_page_tree;
 
 	unsigned long		npages;
+	unsigned long		ncommit;
 
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
-- 
cgit v1.2.3


From c988950eb6dd6f8e6d98503ca094622729e9aa13 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 17:03:21 -0800
Subject: NFS: Simplify nfs_wb_page_cancel()

In all cases we should be able to just remove the request and call
cancel_dirty_page().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c         | 39 +--------------------------------------
 include/linux/nfs_fs.h |  2 --
 2 files changed, 1 insertion(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e40e949598fd..dc7f5e9a23b4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -540,19 +540,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
 	return res;
 }
 
-static void nfs_cancel_commit_list(struct list_head *head)
-{
-	struct nfs_page *req;
-
-	while(!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_clear_request_commit(req);
-		nfs_inode_remove_request(req);
-		nfs_unlock_request(req);
-	}
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -1495,13 +1482,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
-		if (how & FLUSH_INVALIDATE) {
-			spin_unlock(&inode->i_lock);
-			nfs_cancel_commit_list(&head);
-			ret = pages;
-			spin_lock(&inode->i_lock);
-			continue;
-		}
 		pages += nfs_scan_commit(inode, &head, 0, 0);
 		spin_unlock(&inode->i_lock);
 		ret = nfs_commit_list(inode, &head, how);
@@ -1558,26 +1538,13 @@ int nfs_wb_nocommit(struct inode *inode)
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 {
 	struct nfs_page *req;
-	loff_t range_start = page_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
-	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = range_start,
-		.range_end = range_end,
-	};
 	int ret = 0;
 
 	BUG_ON(!PageLocked(page));
 	for (;;) {
 		req = nfs_page_find_request(page);
 		if (req == NULL)
-			goto out;
-		if (test_bit(PG_CLEAN, &req->wb_flags)) {
-			nfs_release_request(req);
 			break;
-		}
 		if (nfs_lock_request_dontget(req)) {
 			nfs_inode_remove_request(req);
 			/*
@@ -1591,12 +1558,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 		ret = nfs_wait_on_request(req);
 		nfs_release_request(req);
 		if (ret < 0)
-			goto out;
+			break;
 	}
-	if (!PagePrivate(page))
-		return 0;
-	ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
-out:
 	return ret;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 309217f46e28..1083134c02ff 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -34,8 +34,6 @@
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
 #define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
-#define FLUSH_INVALIDATE	64	/* Invalidate the page cache */
-#define FLUSH_NOWRITEPAGE	128	/* Don't call writepage() */
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From acdc53b2146c7ee67feb1f02f7bc3020126514b8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 17:03:26 -0800
Subject: NFS: Replace __nfs_write_mapping with sync_inode()

Now that we have correct COMMIT semantics in writeback_single_inode, we can
reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a
call to filemap_write_and_wait(), which doesn't need to hold the
inode->i_mutex.

With that done, we can eliminate nfs_write_mapping() altogether.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 15 +++++----------
 fs/nfs/write.c         | 42 +++++-------------------------------------
 include/linux/nfs_fs.h |  2 --
 3 files changed, 10 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index aa5a831001ab..443772df9b17 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
-	/*
-	 * Flush out writes to the server in order to update c/mtime.
-	 *
-	 * Hold the i_mutex to suspend application writes temporarily;
-	 * this prevents long-running writing applications from blocking
-	 * nfs_wb_nocommit.
-	 */
+	/* Flush out writes to the server in order to update c/mtime.  */
 	if (S_ISREG(inode->i_mode)) {
-		mutex_lock(&inode->i_mutex);
-		nfs_wb_nocommit(inode);
-		mutex_unlock(&inode->i_mutex);
+		err = filemap_write_and_wait(inode->i_mapping);
+		if (err)
+			goto out;
 	}
 
 	/*
@@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 		generic_fillattr(inode, stat);
 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
 	}
+out:
 	return err;
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index dc7f5e9a23b4..0b323091b481 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1454,7 +1454,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	pgoff_t idx_start, idx_end;
 	unsigned int npages = 0;
 	LIST_HEAD(head);
-	int nocommit = how & FLUSH_NOCOMMIT;
 	long pages, ret;
 
 	/* FIXME */
@@ -1471,14 +1470,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 				npages = 0;
 		}
 	}
-	how &= ~FLUSH_NOCOMMIT;
 	spin_lock(&inode->i_lock);
 	do {
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
 			continue;
-		if (nocommit)
-			break;
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
@@ -1492,47 +1488,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	return ret;
 }
 
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	int ret;
-
-	ret = nfs_writepages(mapping, wbc);
-	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, wbc, how);
-	if (ret < 0)
-		goto out;
-	return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
 {
 	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = LONG_MAX,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
 	};
 
-	return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, 0);
-}
-
-int nfs_wb_nocommit(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+	return sync_inode(inode, &wbc);
 }
 
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1083134c02ff..93f439e7c5bf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
 
 #ifdef __KERNEL__
 
@@ -478,7 +477,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
  */
 extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-- 
cgit v1.2.3


From 7f2f12d963e7c33a93bfb0b22f0178eb1e6a4196 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 17:03:28 -0800
Subject: NFS: Simplify nfs_wb_page()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c         | 120 ++++++++++---------------------------------------
 include/linux/nfs_fs.h |   1 -
 2 files changed, 23 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0b323091b481..53ff70e23993 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -502,44 +502,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
 }
 #endif
 
-/*
- * Wait for a request to complete.
- *
- * Interruptible by fatal signals only.
- */
-static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_page *req;
-	pgoff_t idx_end, next;
-	unsigned int		res = 0;
-	int			error;
-
-	if (npages == 0)
-		idx_end = ~0;
-	else
-		idx_end = idx_start + npages - 1;
-
-	next = idx_start;
-	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
-		if (req->wb_index > idx_end)
-			break;
-
-		next = req->wb_index + 1;
-		BUG_ON(!NFS_WBACK_BUSY(req));
-
-		kref_get(&req->wb_kref);
-		spin_unlock(&inode->i_lock);
-		error = nfs_wait_on_request(req);
-		nfs_release_request(req);
-		spin_lock(&inode->i_lock);
-		if (error < 0)
-			return error;
-		res++;
-	}
-	return res;
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -1432,7 +1394,7 @@ out_mark_dirty:
 	return ret;
 }
 #else
-static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
 {
 	return 0;
 }
@@ -1448,46 +1410,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return nfs_commit_unstable_pages(inode, wbc);
 }
 
-long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	struct inode *inode = mapping->host;
-	pgoff_t idx_start, idx_end;
-	unsigned int npages = 0;
-	LIST_HEAD(head);
-	long pages, ret;
-
-	/* FIXME */
-	if (wbc->range_cyclic)
-		idx_start = 0;
-	else {
-		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		if (idx_end > idx_start) {
-			pgoff_t l_npages = 1 + idx_end - idx_start;
-			npages = l_npages;
-			if (sizeof(npages) != sizeof(l_npages) &&
-					(pgoff_t)npages != l_npages)
-				npages = 0;
-		}
-	}
-	spin_lock(&inode->i_lock);
-	do {
-		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
-		if (ret != 0)
-			continue;
-		pages = nfs_scan_commit(inode, &head, idx_start, npages);
-		if (pages == 0)
-			break;
-		pages += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&inode->i_lock);
-		ret = nfs_commit_list(inode, &head, how);
-		spin_lock(&inode->i_lock);
-
-	} while (ret >= 0);
-	spin_unlock(&inode->i_lock);
-	return ret;
-}
-
 /*
  * flush the inode to disk.
  */
@@ -1531,45 +1453,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 	return ret;
 }
 
-static int nfs_wb_page_priority(struct inode *inode, struct page *page,
-				int how)
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+int nfs_wb_page(struct inode *inode, struct page *page)
 {
 	loff_t range_start = page_offset(page);
 	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
 	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
+		.nr_to_write = 0,
 		.range_start = range_start,
 		.range_end = range_end,
 	};
+	struct nfs_page *req;
+	int need_commit;
 	int ret;
 
-	do {
+	while(PagePrivate(page)) {
 		if (clear_page_dirty_for_io(page)) {
 			ret = nfs_writepage_locked(page, &wbc);
 			if (ret < 0)
 				goto out_error;
-		} else if (!PagePrivate(page))
+		}
+		req = nfs_find_and_lock_request(page);
+		if (!req)
 			break;
-		ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
-		if (ret < 0)
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
 			goto out_error;
-	} while (PagePrivate(page));
+		}
+		need_commit = test_bit(PG_CLEAN, &req->wb_flags);
+		nfs_clear_page_tag_locked(req);
+		if (need_commit) {
+			ret = nfs_commit_inode(inode, FLUSH_SYNC);
+			if (ret < 0)
+				goto out_error;
+		}
+	}
 	return 0;
 out_error:
-	__mark_inode_dirty(inode, I_DIRTY_PAGES);
 	return ret;
 }
 
-/*
- * Write back all requests on one page - we do this before reading it.
- */
-int nfs_wb_page(struct inode *inode, struct page* page)
-{
-	return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
-}
-
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 		struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 93f439e7c5bf..b789d85bff82 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -475,7 +475,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
-- 
cgit v1.2.3


From 1cda707d52e51a6cafac0aef12d2bd7052d572e6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 17:03:30 -0800
Subject: NFS: Remove requirement for inode->i_mutex from
 nfs_invalidate_mapping

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           |  2 +-
 fs/nfs/inode.c         | 41 +----------------------------------------
 fs/nfs/symlink.c       |  2 +-
 include/linux/nfs_fs.h |  1 -
 4 files changed, 3 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3c7f03b669fb..a1f6b4438fb1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	desc->entry = &my_entry;
 
 	nfs_block_sillyrename(dentry);
-	res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
+	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (res < 0)
 		goto out;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8b41170d295..dbaaf7d2a188 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -754,7 +754,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	return __nfs_revalidate_inode(server, inode);
 }
 
-static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
+static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	
@@ -775,49 +775,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
 	return 0;
 }
 
-static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
-{
-	int ret = 0;
-
-	mutex_lock(&inode->i_mutex);
-	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
-		ret = nfs_sync_mapping(mapping);
-		if (ret == 0)
-			ret = nfs_invalidate_mapping_nolock(inode, mapping);
-	}
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-/**
- * nfs_revalidate_mapping_nolock - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- */
-int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	int ret = 0;
-
-	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
-			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
-		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
-		if (ret < 0)
-			goto out;
-	}
-	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
-		ret = nfs_invalidate_mapping_nolock(inode, mapping);
-out:
-	return ret;
-}
-
 /**
  * nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
  * @mapping - pointer to mapping
- *
- * This version of the function will take the inode->i_mutex and attempt to
- * flush out all dirty data if it needs to invalidate the page cache.
  */
 int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 412738dbfbc7..2ea9e5c27e55 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	struct page *page;
 	void *err;
 
-	err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping));
+	err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
 	if (err)
 		goto read_failed;
 	page = read_cache_page(&inode->i_data, 0,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b789d85bff82..1a0b85aa151e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -347,7 +347,6 @@ extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
-extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
-- 
cgit v1.2.3


From f75580c4afb72c156746b3fc1ec977b1a85d3dee Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 15 Feb 2010 17:27:00 +0000
Subject: net/9p: Add multi channel support.

This is needed for supporting multiple mount points.

We can find out the device names to be used with mount by checking

/sys/devices/virtio-pci/virtio*/device file

if the device file have value 9 then the specific virtio device can
be used for mounting.

ex:
 #cat /sys/devices/virtio-pci/virtio1/device
 9

now we can mount using
# mount -t 9p -o trans=virtio virtio1  /mnt/

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio_9p.h |  2 +-
 net/9p/trans_virtio.c     | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 095e10d148b4..7a615c3f5e3d 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -6,6 +6,6 @@
 #include <linux/virtio_config.h>
 
 /* Maximum number of virtio channels per partition (1 for now) */
-#define MAX_9P_CHAN	1
+#define MAX_9P_CHAN	10
 
 #endif /* _LINUX_VIRTIO_9P_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cb50f4ae5eef..df924e5657d3 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -296,13 +296,15 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 
 	mutex_lock(&virtio_9p_lock);
 	while (index < MAX_9P_CHAN) {
-		if (chan->initialized && !chan->inuse) {
-			chan->inuse = true;
-			break;
-		} else {
-			index++;
-			chan = &channels[index];
+		if (chan->initialized &&
+			!strcmp(devname, dev_name(&chan->vdev->dev))) {
+			if (!chan->inuse) {
+				chan->inuse = true;
+				break;
+			}
 		}
+		index++;
+		chan = &channels[index];
 	}
 	mutex_unlock(&virtio_9p_lock);
 
-- 
cgit v1.2.3


From 37c1209d413242d9560e343c040777049a8dd869 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 15 Feb 2010 17:27:01 +0000
Subject: net/9p: Remove MAX_9P_CHAN limit

Use a list to track the channel instead of statically
allocated array

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio_9p.h |  3 --
 net/9p/trans_virtio.c     | 70 ++++++++++++++++++-----------------------------
 2 files changed, 27 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 7a615c3f5e3d..332275080083 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -5,7 +5,4 @@
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
-/* Maximum number of virtio channels per partition (1 for now) */
-#define MAX_9P_CHAN	10
-
 #endif /* _LINUX_VIRTIO_9P_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index df924e5657d3..05918d3cb40d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -49,8 +49,6 @@
 
 /* a single mutex to manage channel initialization and attachment */
 static DEFINE_MUTEX(virtio_9p_lock);
-/* global which tracks highest initialized channel */
-static int chan_index;
 
 /**
  * struct virtio_chan - per-instance transport information
@@ -68,8 +66,7 @@ static int chan_index;
  *
  */
 
-static struct virtio_chan {
-	bool initialized;
+struct virtio_chan {
 	bool inuse;
 
 	spinlock_t lock;
@@ -80,7 +77,11 @@ static struct virtio_chan {
 
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
-} channels[MAX_9P_CHAN];
+
+	struct list_head chan_list;
+};
+
+static struct list_head virtio_chan_list;
 
 /* How many bytes left in this page. */
 static unsigned int rest_of_page(void *data)
@@ -217,9 +218,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
  * p9_virtio_probe - probe for existence of 9P virtio channels
  * @vdev: virtio device to probe
  *
- * This probes for existing virtio channels.  At present only
- * a single channel is in use, so in the future more work may need
- * to be done here.
+ * This probes for existing virtio channels.
  *
  */
 
@@ -227,16 +226,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 {
 	int err;
 	struct virtio_chan *chan;
-	int index;
 
-	mutex_lock(&virtio_9p_lock);
-	index = chan_index++;
-	chan = &channels[index];
-	mutex_unlock(&virtio_9p_lock);
-
-	if (chan_index > MAX_9P_CHAN) {
-		printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n");
-		BUG();
+	chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
+	if (!chan) {
+		printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n");
 		err = -ENOMEM;
 		goto fail;
 	}
@@ -255,15 +248,15 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	sg_init_table(chan->sg, VIRTQUEUE_NUM);
 
 	chan->inuse = false;
-	chan->initialized = true;
+	mutex_lock(&virtio_9p_lock);
+	list_add_tail(&chan->chan_list, &virtio_chan_list);
+	mutex_unlock(&virtio_9p_lock);
 	return 0;
 
 out_free_vq:
 	vdev->config->del_vqs(vdev);
+	kfree(chan);
 fail:
-	mutex_lock(&virtio_9p_lock);
-	chan_index--;
-	mutex_unlock(&virtio_9p_lock);
 	return err;
 }
 
@@ -280,35 +273,27 @@ fail:
  * We use a simple reference count mechanism to ensure that only a single
  * mount has a channel open at a time.
  *
- * Bugs: doesn't allow identification of a specific channel
- * to allocate, channels are allocated sequentially. This was
- * a pragmatic decision to get things rolling, but ideally some
- * way of identifying the channel to attach to would be nice
- * if we are going to support multiple channels.
- *
  */
 
 static int
 p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 {
-	struct virtio_chan *chan = channels;
-	int index = 0;
+	struct virtio_chan *chan;
+	int found = 0;
 
 	mutex_lock(&virtio_9p_lock);
-	while (index < MAX_9P_CHAN) {
-		if (chan->initialized &&
-			!strcmp(devname, dev_name(&chan->vdev->dev))) {
+	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
+		if (!strcmp(devname, dev_name(&chan->vdev->dev))) {
 			if (!chan->inuse) {
 				chan->inuse = true;
+				found = 1;
 				break;
 			}
 		}
-		index++;
-		chan = &channels[index];
 	}
 	mutex_unlock(&virtio_9p_lock);
 
-	if (index >= MAX_9P_CHAN) {
+	if (!found) {
 		printk(KERN_ERR "9p: no channels available\n");
 		return -ENODEV;
 	}
@@ -331,11 +316,13 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	struct virtio_chan *chan = vdev->priv;
 
 	BUG_ON(chan->inuse);
+	vdev->config->del_vqs(vdev);
+
+	mutex_lock(&virtio_9p_lock);
+	list_del(&chan->chan_list);
+	mutex_unlock(&virtio_9p_lock);
+	kfree(chan);
 
-	if (chan->initialized) {
-		vdev->config->del_vqs(vdev);
-		chan->initialized = false;
-	}
 }
 
 static struct virtio_device_id id_table[] = {
@@ -366,10 +353,7 @@ static struct p9_trans_module p9_virtio_trans = {
 /* The standard init function */
 static int __init p9_virtio_init(void)
 {
-	int count;
-
-	for (count = 0; count < MAX_9P_CHAN; count++)
-		channels[count].initialized = false;
+	INIT_LIST_HEAD(&virtio_chan_list);
 
 	v9fs_register_trans(&p9_virtio_trans);
 	return register_virtio_driver(&p9_virtio_drv);
-- 
cgit v1.2.3


From 723b2f57ad83ee7087acf9a95e8e289414b1f521 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@redhat.com>
Date: Wed, 3 Mar 2010 22:51:50 +0000
Subject: ethtool: Add direct access to ops->get_sset_count

This patch is an alternative approach for accessing string
counts, vs. the drvinfo indirect approach.  This way the drvinfo
space doesn't run out, and we don't break ABI later.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 17 +++++++++---
 net/core/ethtool.c      | 72 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index cca1c3de140d..f6f961fefbe5 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -253,6 +253,17 @@ struct ethtool_gstrings {
 	__u8	data[0];
 };
 
+struct ethtool_sset_info {
+	__u32	cmd;		/* ETHTOOL_GSSET_INFO */
+	__u32	reserved;
+	__u64	sset_mask;	/* input: each bit selects an sset to query */
+				/* output: each bit a returned sset */
+	__u32	data[0];	/* ETH_SS_xxx count, in order, based on bits
+				   in sset_mask.  One bit implies one
+				   __u32, two bits implies two
+				   __u32's, etc. */
+};
+
 enum ethtool_test_flags {
 	ETH_TEST_FL_OFFLINE	= (1 << 0),	/* online / offline */
 	ETH_TEST_FL_FAILED	= (1 << 1),	/* test passed / failed */
@@ -606,9 +617,9 @@ struct ethtool_ops {
 #define	ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
 #define	ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
 #define	ETHTOOL_RESET		0x00000034 /* Reset hardware */
-
-#define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
-#define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+#define	ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
+#define	ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+#define	ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0f2f82185ec4..70075c47ada8 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	info.cmd = ETHTOOL_GDRVINFO;
 	ops->get_drvinfo(dev, &info);
 
+	/*
+	 * this method of obtaining string set info is deprecated;
+	 * consider using ETHTOOL_GSSET_INFO instead
+	 */
 	if (ops->get_sset_count) {
 		int rc;
 
@@ -237,6 +241,71 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	return 0;
 }
 
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_get_sset_info(struct net_device *dev,
+                                          void __user *useraddr)
+{
+	struct ethtool_sset_info info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 sset_mask;
+	int i, idx = 0, n_bits = 0, ret, rc;
+	u32 *info_buf = NULL;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&info, useraddr, sizeof(info)))
+		return -EFAULT;
+
+	/* store copy of mask, because we zero struct later on */
+	sset_mask = info.sset_mask;
+	if (!sset_mask)
+		return 0;
+
+	/* calculate size of return buffer */
+	for (i = 0; i < 64; i++)
+		if (sset_mask & (1ULL << i))
+			n_bits++;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GSSET_INFO;
+
+	info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
+	if (!info_buf)
+		return -ENOMEM;
+
+	/*
+	 * fill return buffer based on input bitmask and successful
+	 * get_sset_count return
+	 */
+	for (i = 0; i < 64; i++) {
+		if (!(sset_mask & (1ULL << i)))
+			continue;
+
+		rc = ops->get_sset_count(dev, i);
+		if (rc >= 0) {
+			info.sset_mask |= (1ULL << i);
+			info_buf[idx++] = rc;
+		}
+	}
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		goto out;
+
+	useraddr += offsetof(struct ethtool_sset_info, data);
+	if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
+		goto out;
+
+	ret = 0;
+
+out:
+	kfree(info_buf);
+	return ret;
+}
+
 /*
  * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
  */
@@ -1471,6 +1540,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXNTUPLE:
 		rc = ethtool_get_rx_ntuple(dev, useraddr);
 		break;
+	case ETHTOOL_GSSET_INFO:
+		rc = ethtool_get_sset_info(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From d17792ebdf90289c9fd1bce888076d3d60ecd53b Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Thu, 4 Mar 2010 08:21:53 +0000
Subject: ethtool: Add direct access to ops->get_sset_count

On 03/04/2010 09:26 AM, Ben Hutchings wrote:
> On Thu, 2010-03-04 at 00:51 -0800, Jeff Kirsher wrote:
>> From: Jeff Garzik<jgarzik@redhat.com>
>>
>> This patch is an alternative approach for accessing string
>> counts, vs. the drvinfo indirect approach.  This way the drvinfo
>> space doesn't run out, and we don't break ABI later.
> [...]
>> --- a/net/core/ethtool.c
>> +++ b/net/core/ethtool.c
>> @@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
>>   	info.cmd = ETHTOOL_GDRVINFO;
>>   	ops->get_drvinfo(dev,&info);
>>
>> +	/*
>> +	 * this method of obtaining string set info is deprecated;
>> +	 * consider using ETHTOOL_GSSET_INFO instead
>> +	 */
>
> This comment belongs on the interface (ethtool.h) not the
> implementation.

Debatable -- the current comment is located at the callsite of
ops->get_sset_count(), which is where an implementor might think to add
a new call.  Not all the numeric fields in ethtool_drvinfo are obtained
from ->get_sset_count().

Hence the "some" in the attached patch to include/linux/ethtool.h,
addressing your comment.

> [...]
>> +static noinline int ethtool_get_sset_info(struct net_device *dev,
>> +                                          void __user *useraddr)
>> +{
> [...]
>> +	/* calculate size of return buffer */
>> +	for (i = 0; i<  64; i++)
>> +		if (sset_mask&  (1ULL<<  i))
>> +			n_bits++;
> [...]
>
> We have a function for this:
>
> 	n_bits = hweight64(sset_mask);

Agreed.

I've attached a follow-up patch, which should enable my/Jeff's kernel
patch to be applied, followed by this one.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 7 +++++++
 net/core/ethtool.c      | 7 +++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f6f961fefbe5..b33f316bb92e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -61,6 +61,13 @@ struct ethtool_drvinfo {
 				/* For PCI devices, use pci_name(pci_dev). */
 	char	reserved1[32];
 	char	reserved2[12];
+				/*
+				 * Some struct members below are filled in
+				 * using ops->get_sset_count().  Obtaining
+				 * this info from ethtool_drvinfo is now
+				 * deprecated; Use ETHTOOL_GSSET_INFO
+				 * instead.
+				 */
 	__u32	n_priv_flags;	/* number of flags valid in ETHTOOL_GPFLAGS */
 	__u32	n_stats;	/* number of u64's from ETHTOOL_GSTATS */
 	__u32	testinfo_len;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 70075c47ada8..33d2ded50f84 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
+#include <linux/bitops.h>
 #include <asm/uaccess.h>
 
 /*
@@ -216,7 +217,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 
 	/*
 	 * this method of obtaining string set info is deprecated;
-	 * consider using ETHTOOL_GSSET_INFO instead
+	 * Use ETHTOOL_GSSET_INFO instead.
 	 */
 	if (ops->get_sset_count) {
 		int rc;
@@ -265,9 +266,7 @@ static noinline int ethtool_get_sset_info(struct net_device *dev,
 		return 0;
 
 	/* calculate size of return buffer */
-	for (i = 0; i < 64; i++)
-		if (sset_mask & (1ULL << i))
-			n_bits++;
+	n_bits = hweight64(sset_mask);
 
 	memset(&info, 0, sizeof(info));
 	info.cmd = ETHTOOL_GSSET_INFO;
-- 
cgit v1.2.3


From 8215d6ec5fee1e76545decea2cd73717efb5cb42 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@novell.com>
Date: Sat, 6 Mar 2010 02:32:27 +0000
Subject: dm table: remove unused dm_get_device range parameters

Remove unused parameters(start and len) of dm_get_device()
and fix the callers.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         |  3 +--
 drivers/md/dm-delay.c         |  8 ++++----
 drivers/md/dm-linear.c        |  3 +--
 drivers/md/dm-log.c           |  3 +--
 drivers/md/dm-mpath.c         |  7 +++----
 drivers/md/dm-raid1.c         |  3 +--
 drivers/md/dm-snap.c          |  8 +++-----
 drivers/md/dm-stripe.c        |  3 +--
 drivers/md/dm-table.c         | 10 ++++------
 include/linux/device-mapper.h |  5 ++---
 10 files changed, 21 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a93637223c8d..3bdbb6115702 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1160,8 +1160,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 	cc->start = tmpll;
 
-	if (dm_get_device(ti, argv[3], cc->start, ti->len,
-			  dm_table_get_mode(ti->table), &cc->dev)) {
+	if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) {
 		ti->error = "Device lookup failed";
 		goto bad_device;
 	}
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index ebe7381f47c8..852052880d7a 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -156,8 +156,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	if (dm_get_device(ti, argv[0], dc->start_read, ti->len,
-			  dm_table_get_mode(ti->table), &dc->dev_read)) {
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
+			  &dc->dev_read)) {
 		ti->error = "Device lookup failed";
 		goto bad;
 	}
@@ -177,8 +177,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_dev_read;
 	}
 
-	if (dm_get_device(ti, argv[3], dc->start_write, ti->len,
-			  dm_table_get_mode(ti->table), &dc->dev_write)) {
+	if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table),
+			  &dc->dev_write)) {
 		ti->error = "Write device lookup failed";
 		goto bad_dev_read;
 	}
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 82f7d6e6b1ea..9200dbf2391a 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -47,8 +47,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 	lc->start = tmp;
 
-	if (dm_get_device(ti, argv[0], lc->start, ti->len,
-			  dm_table_get_mode(ti->table), &lc->dev)) {
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev)) {
 		ti->error = "dm-linear: Device lookup failed";
 		goto bad;
 	}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 7035582786fb..5a08be0222db 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -543,8 +543,7 @@ static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 		return -EINVAL;
 	}
 
-	r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */,
-			  FMODE_READ | FMODE_WRITE, &dev);
+	r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &dev);
 	if (r)
 		return r;
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c1335487cc72..826bce7343b3 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -607,8 +607,8 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
-	r = dm_get_device(ti, shift(as), ti->begin, ti->len,
-			  dm_table_get_mode(ti->table), &p->path.dev);
+	r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
+			  &p->path.dev);
 	if (r) {
 		ti->error = "error getting device";
 		goto bad;
@@ -1505,8 +1505,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
 		goto out;
 	}
 
-	r = dm_get_device(ti, argv[1], ti->begin, ti->len,
-			  dm_table_get_mode(ti->table), &dev);
+	r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
 	if (r) {
 		DMWARN("message: error getting device %s",
 		       argv[1]);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index de26fde4098f..6d66ddf39071 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -920,8 +920,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
 		return -EINVAL;
 	}
 
-	if (dm_get_device(ti, argv[0], offset, ti->len,
-			  dm_table_get_mode(ti->table),
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
 			  &ms->mirror[mirror].dev)) {
 		ti->error = "Device lookup failure";
 		return -ENXIO;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ee8eb283650d..0789c22ff0d4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1081,8 +1081,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	argv++;
 	argc--;
 
-	r = dm_get_device(ti, cow_path, 0, 0,
-			  FMODE_READ | FMODE_WRITE, &s->cow);
+	r = dm_get_device(ti, cow_path, FMODE_READ | FMODE_WRITE, &s->cow);
 	if (r) {
 		ti->error = "Cannot get COW device";
 		goto bad_cow;
@@ -1098,7 +1097,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	argv += args_used;
 	argc -= args_used;
 
-	r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin);
+	r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
 	if (r) {
 		ti->error = "Cannot get origin device";
 		goto bad_origin;
@@ -2100,8 +2099,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		return -EINVAL;
 	}
 
-	r = dm_get_device(ti, argv[0], 0, ti->len,
-			  dm_table_get_mode(ti->table), &dev);
+	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev);
 	if (r) {
 		ti->error = "Cannot get target device";
 		return r;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index bd58703ee8f6..e610725db766 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -80,8 +80,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
 	if (sscanf(argv[1], "%llu", &start) != 1)
 		return -EINVAL;
 
-	if (dm_get_device(ti, argv[0], start, sc->stripe_width,
-			  dm_table_get_mode(ti->table),
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
 			  &sc->stripe[stripe].dev))
 		return -ENXIO;
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 7d70cca585ac..9924ea23032d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -429,8 +429,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
  * it's already present.
  */
 static int __table_get_device(struct dm_table *t, struct dm_target *ti,
-			      const char *path, sector_t start, sector_t len,
-			      fmode_t mode, struct dm_dev **result)
+		      const char *path, fmode_t mode, struct dm_dev **result)
 {
 	int r;
 	dev_t uninitialized_var(dev);
@@ -527,11 +526,10 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
-int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
-		  sector_t len, fmode_t mode, struct dm_dev **result)
+int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+		  struct dm_dev **result)
 {
-	return __table_get_device(ti->table, ti, path,
-				  start, len, mode, result);
+	return __table_get_device(ti->table, ti, path, mode, result);
 }
 
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index d4c9c0b88adc..1381cd97b4ed 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -118,10 +118,9 @@ struct dm_dev {
 /*
  * Constructors should call these functions to ensure destination devices
  * are opened/closed correctly.
- * FIXME: too many arguments.
  */
-int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
-		  sector_t len, fmode_t mode, struct dm_dev **result);
+int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+						 struct dm_dev **result);
 void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 
 /*
-- 
cgit v1.2.3


From 3abf85b5b5851b5f28d3d8a920ebb844edd08352 Mon Sep 17 00:00:00 2001
From: Peter Rajnoha <prajnoha@redhat.com>
Date: Sat, 6 Mar 2010 02:32:31 +0000
Subject: dm ioctl: introduce flag indicating uevent was generated

Set a new DM_UEVENT_GENERATED_FLAG when returning from ioctls to
indicate that a uevent was actually generated.  This tells the userspace
caller that it may need to wait for the event to be processed.

Signed-off-by: Peter Rajnoha <prajnoha@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c    | 19 ++++++++++++-------
 drivers/md/dm.c          |  7 ++++---
 drivers/md/dm.h          |  4 ++--
 include/linux/dm-ioctl.h |  9 +++++++--
 4 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index e3cf5686d0aa..d7500e1c26f2 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -285,7 +285,8 @@ retry:
 	up_write(&_hash_lock);
 }
 
-static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
+static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
+			  const char *new)
 {
 	char *new_name, *old_name;
 	struct hash_cell *hc;
@@ -344,7 +345,8 @@ static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
 		dm_table_put(table);
 	}
 
-	dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie);
+	if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie))
+		*flags |= DM_UEVENT_GENERATED_FLAG;
 
 	dm_put(hc->md);
 	up_write(&_hash_lock);
@@ -736,10 +738,10 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
 	__hash_remove(hc);
 	up_write(&_hash_lock);
 
-	dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr);
+	if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
+		param->flags |= DM_UEVENT_GENERATED_FLAG;
 
 	dm_put(md);
-	param->data_size = 0;
 	return 0;
 }
 
@@ -773,7 +775,9 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 		return r;
 
 	param->data_size = 0;
-	return dm_hash_rename(param->event_nr, param->name, new_name);
+
+	return dm_hash_rename(param->event_nr, &param->flags, param->name,
+			      new_name);
 }
 
 static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -899,8 +903,8 @@ static int do_resume(struct dm_ioctl *param)
 
 	if (dm_suspended_md(md)) {
 		r = dm_resume(md);
-		if (!r)
-			dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
+		if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr))
+			param->flags |= DM_UEVENT_GENERATED_FLAG;
 	}
 
 	if (old_map)
@@ -1477,6 +1481,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
 {
 	/* Always clear this flag */
 	param->flags &= ~DM_BUFFER_FULL_FLAG;
+	param->flags &= ~DM_UEVENT_GENERATED_FLAG;
 
 	/* Ignores parameters */
 	if (cmd == DM_REMOVE_ALL_CMD ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7199846364e9..d21e1284604f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2618,18 +2618,19 @@ out:
 /*-----------------------------------------------------------------
  * Event notification.
  *---------------------------------------------------------------*/
-void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
 		       unsigned cookie)
 {
 	char udev_cookie[DM_COOKIE_LENGTH];
 	char *envp[] = { udev_cookie, NULL };
 
 	if (!cookie)
-		kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+		return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
 	else {
 		snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
 			 DM_COOKIE_ENV_VAR_NAME, cookie);
-		kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
+		return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
+					  action, envp);
 	}
 }
 
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 8dadaa5bc396..bad1724d4869 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -125,8 +125,8 @@ void dm_stripe_exit(void);
 int dm_open_count(struct mapped_device *md);
 int dm_lock_for_deletion(struct mapped_device *md);
 
-void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
-		       unsigned cookie);
+int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+		      unsigned cookie);
 
 int dm_io_init(void);
 void dm_io_exit(void);
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index aa95508d2f95..2c445e113790 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -266,9 +266,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	16
+#define DM_VERSION_MINOR	17
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2009-11-05)"
+#define DM_VERSION_EXTRA	"-ioctl (2010-03-05)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
@@ -316,4 +316,9 @@ enum {
  */
 #define DM_QUERY_INACTIVE_TABLE_FLAG	(1 << 12) /* In */
 
+/*
+ * If set, a uevent was generated for which the caller may need to wait.
+ */
+#define DM_UEVENT_GENERATED_FLAG	(1 << 13) /* Out */
+
 #endif				/* _LINUX_DM_IOCTL_H */
-- 
cgit v1.2.3


From 924e600d417ead9ef67043988055ba236f114718 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Sat, 6 Mar 2010 02:32:33 +0000
Subject: dm: eliminate some holes data structures

Eliminate a 4-byte hole in 'struct dm_io_memory' by moving 'offset' above the
'ptr' to which it applies (size reduced from 24 to 16 bytes).  And by
association, 1-4 byte hole is eliminated in 'struct dm_io_request' (size
reduced from 56 to 48 bytes).

Eliminate all 6 4-byte holes and 1 cache-line in 'struct dm_snapshot' (size
reduced from 392 to 368 bytes).

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c  | 26 +++++++++++++-------------
 include/linux/dm-io.h |  4 ++--
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 0789c22ff0d4..54853773510c 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -83,10 +83,10 @@ struct dm_snapshot {
 	/* Whether or not owning mapped_device is suspended */
 	int suspended;
 
-	mempool_t *pending_pool;
-
 	atomic_t pending_exceptions_count;
 
+	mempool_t *pending_pool;
+
 	struct dm_exception_table pending;
 	struct dm_exception_table complete;
 
@@ -96,6 +96,11 @@ struct dm_snapshot {
 	 */
 	spinlock_t pe_lock;
 
+	/* Chunks with outstanding reads */
+	spinlock_t tracked_chunk_lock;
+	mempool_t *tracked_chunk_pool;
+	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+
 	/* The on disk metadata handler */
 	struct dm_exception_store *store;
 
@@ -105,10 +110,12 @@ struct dm_snapshot {
 	struct bio_list queued_bios;
 	struct work_struct queued_bios_work;
 
-	/* Chunks with outstanding reads */
-	mempool_t *tracked_chunk_pool;
-	spinlock_t tracked_chunk_lock;
-	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+	/* Wait for events based on state_bits */
+	unsigned long state_bits;
+
+	/* Range of chunks currently being merged. */
+	chunk_t first_merging_chunk;
+	int num_merging_chunks;
 
 	/*
 	 * The merge operation failed if this flag is set.
@@ -125,13 +132,6 @@ struct dm_snapshot {
 	 */
 	int merge_failed;
 
-	/* Wait for events based on state_bits */
-	unsigned long state_bits;
-
-	/* Range of chunks currently being merged. */
-	chunk_t first_merging_chunk;
-	int num_merging_chunks;
-
 	/*
 	 * Incoming bios that overlap with chunks being merged must wait
 	 * for them to be committed.
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index b6bf17ee2f61..5c9186b93fff 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -37,14 +37,14 @@ enum dm_io_mem_type {
 struct dm_io_memory {
 	enum dm_io_mem_type type;
 
+	unsigned offset;
+
 	union {
 		struct page_list *pl;
 		struct bio_vec *bvec;
 		void *vma;
 		void *addr;
 	} ptr;
-
-	unsigned offset;
 };
 
 struct dm_io_notify {
-- 
cgit v1.2.3


From 984b3f5746ed2cde3d184651dabf26980f2b66e5 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 5 Mar 2010 13:41:37 -0800
Subject: bitops: rename for_each_bit() to for_each_set_bit()

Rename for_each_bit to for_each_set_bit in the kernel source tree.  To
permit for_each_clear_bit(), should that ever be added.

The patch includes a macro to map the old for_each_bit() onto the new
for_each_set_bit().  This is a (very) temporary thing to ease the migration.

[akpm@linux-foundation.org: add temporary for_each_bit()]
Suggested-by: Alexey Dobriyan <adobriyan@gmail.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Artem Bityutskiy <dedekind@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/perf_event.c            |  2 +-
 arch/x86/kernel/cpu/perf_event_intel.c      |  2 +-
 drivers/dma/ioat/dma.c                      |  2 +-
 drivers/gpio/pl061.c                        |  2 +-
 drivers/gpio/timbgpio.c                     |  2 +-
 drivers/i2c/busses/i2c-designware.c         |  4 ++--
 drivers/mfd/htc-egpio.c                     |  2 +-
 drivers/misc/sgi-xp/xpnet.c                 |  2 +-
 drivers/net/gianfar.c                       | 12 ++++++------
 drivers/net/ixgbe/ixgbe_main.c              |  2 +-
 drivers/net/ixgbevf/ixgbevf_main.c          |  2 +-
 drivers/net/wireless/ath/ar9170/main.c      |  2 +-
 drivers/net/wireless/iwmc3200wifi/debugfs.c |  2 +-
 drivers/net/wireless/iwmc3200wifi/rx.c      |  2 +-
 fs/ocfs2/quota_local.c                      |  2 +-
 include/linux/bitops.h                      |  4 +++-
 kernel/sched_cpupri.c                       |  2 +-
 sound/soc/codecs/uda1380.c                  |  2 +-
 18 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 641ccb9dddbc..b1fbdeecf6c9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -676,7 +676,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			if (c->weight != w)
 				continue;
 
-			for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
+			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
 				if (!test_bit(j, used_mask))
 					break;
 			}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cf6590cf4a5f..977e7544738c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -757,7 +757,7 @@ again:
 
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
-	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 5d0e42b263df..af14c9a5b8d4 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -71,7 +71,7 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
 	}
 
 	attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
-	for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+	for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
 		chan = ioat_chan_by_index(instance, bit);
 		tasklet_schedule(&chan->cleanup_task);
 	}
diff --git a/drivers/gpio/pl061.c b/drivers/gpio/pl061.c
index 4ee4c8367a3f..3ad1eeb49609 100644
--- a/drivers/gpio/pl061.c
+++ b/drivers/gpio/pl061.c
@@ -219,7 +219,7 @@ static void pl061_irq_handler(unsigned irq, struct irq_desc *desc)
 		if (pending == 0)
 			continue;
 
-		for_each_bit(offset, &pending, PL061_GPIO_NR)
+		for_each_set_bit(offset, &pending, PL061_GPIO_NR)
 			generic_handle_irq(pl061_to_irq(&chip->gc, offset));
 	}
 	desc->chip->unmask(irq);
diff --git a/drivers/gpio/timbgpio.c b/drivers/gpio/timbgpio.c
index d941f45fe557..4ecba6e5a32d 100644
--- a/drivers/gpio/timbgpio.c
+++ b/drivers/gpio/timbgpio.c
@@ -175,7 +175,7 @@ static void timbgpio_irq(unsigned int irq, struct irq_desc *desc)
 	ipr = ioread32(tgpio->membase + TGPIO_IPR);
 	iowrite32(ipr, tgpio->membase + TGPIO_ICR);
 
-	for_each_bit(offset, &ipr, tgpio->gpio.ngpio)
+	for_each_set_bit(offset, &ipr, tgpio->gpio.ngpio)
 		generic_handle_irq(timbgpio_to_irq(&tgpio->gpio, offset));
 }
 
diff --git a/drivers/i2c/busses/i2c-designware.c b/drivers/i2c/busses/i2c-designware.c
index 9e18ef97f156..3e72b69aa7f8 100644
--- a/drivers/i2c/busses/i2c-designware.c
+++ b/drivers/i2c/busses/i2c-designware.c
@@ -497,13 +497,13 @@ static int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev)
 	int i;
 
 	if (abort_source & DW_IC_TX_ABRT_NOACK) {
-		for_each_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
+		for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
 			dev_dbg(dev->dev,
 				"%s: %s\n", __func__, abort_sources[i]);
 		return -EREMOTEIO;
 	}
 
-	for_each_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
+	for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
 		dev_err(dev->dev, "%s: %s\n", __func__, abort_sources[i]);
 
 	if (abort_source & DW_IC_TX_ARB_LOST)
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index aa266e1f69b2..addb846c1e34 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -108,7 +108,7 @@ static void egpio_handler(unsigned int irq, struct irq_desc *desc)
 	ack_irqs(ei);
 	/* Process all set pins. */
 	readval &= ei->irqs_enabled;
-	for_each_bit(irqpin, &readval, ei->nirqs) {
+	for_each_set_bit(irqpin, &readval, ei->nirqs) {
 		/* Run irq handler */
 		pr_debug("got IRQ %d\n", irqpin);
 		irq = ei->irq_start + irqpin;
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 16f0abda1423..57b152f8d1b9 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -475,7 +475,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (skb->data[0] == 0xff) {
 		/* we are being asked to broadcast to all partitions */
-		for_each_bit(dest_partid, xpnet_broadcast_partitions,
+		for_each_set_bit(dest_partid, xpnet_broadcast_partitions,
 			     xp_max_npartitions) {
 
 			xpnet_send(skb, queued_msg, start_addr, end_addr,
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 6aa526ee9096..61a7b4351e78 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -998,7 +998,7 @@ static int gfar_probe(struct of_device *ofdev,
 	}
 
 	/* Need to reverse the bit maps as  bit_map's MSB is q0
-	 * but, for_each_bit parses from right to left, which
+	 * but, for_each_set_bit parses from right to left, which
 	 * basically reverses the queue numbers */
 	for (i = 0; i< priv->num_grps; i++) {
 		priv->gfargrp[i].tx_bit_map = reverse_bitmap(
@@ -1011,7 +1011,7 @@ static int gfar_probe(struct of_device *ofdev,
 	 * also assign queues to groups */
 	for (grp_idx = 0; grp_idx < priv->num_grps; grp_idx++) {
 		priv->gfargrp[grp_idx].num_rx_queues = 0x0;
-		for_each_bit(i, &priv->gfargrp[grp_idx].rx_bit_map,
+		for_each_set_bit(i, &priv->gfargrp[grp_idx].rx_bit_map,
 				priv->num_rx_queues) {
 			priv->gfargrp[grp_idx].num_rx_queues++;
 			priv->rx_queue[i]->grp = &priv->gfargrp[grp_idx];
@@ -1019,7 +1019,7 @@ static int gfar_probe(struct of_device *ofdev,
 			rqueue = rqueue | ((RQUEUE_EN0 | RQUEUE_EX0) >> i);
 		}
 		priv->gfargrp[grp_idx].num_tx_queues = 0x0;
-		for_each_bit (i, &priv->gfargrp[grp_idx].tx_bit_map,
+		for_each_set_bit(i, &priv->gfargrp[grp_idx].tx_bit_map,
 				priv->num_tx_queues) {
 			priv->gfargrp[grp_idx].num_tx_queues++;
 			priv->tx_queue[i]->grp = &priv->gfargrp[grp_idx];
@@ -1709,7 +1709,7 @@ void gfar_configure_coalescing(struct gfar_private *priv,
 
 	if (priv->mode == MQ_MG_MODE) {
 		baddr = &regs->txic0;
-		for_each_bit (i, &tx_mask, priv->num_tx_queues) {
+		for_each_set_bit(i, &tx_mask, priv->num_tx_queues) {
 			if (likely(priv->tx_queue[i]->txcoalescing)) {
 				gfar_write(baddr + i, 0);
 				gfar_write(baddr + i, priv->tx_queue[i]->txic);
@@ -1717,7 +1717,7 @@ void gfar_configure_coalescing(struct gfar_private *priv,
 		}
 
 		baddr = &regs->rxic0;
-		for_each_bit (i, &rx_mask, priv->num_rx_queues) {
+		for_each_set_bit(i, &rx_mask, priv->num_rx_queues) {
 			if (likely(priv->rx_queue[i]->rxcoalescing)) {
 				gfar_write(baddr + i, 0);
 				gfar_write(baddr + i, priv->rx_queue[i]->rxic);
@@ -2607,7 +2607,7 @@ static int gfar_poll(struct napi_struct *napi, int budget)
 		budget_per_queue = left_over_budget/num_queues;
 		left_over_budget = 0;
 
-		for_each_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
+		for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
 			if (test_bit(i, &serviced_queues))
 				continue;
 			rx_queue = priv->rx_queue[i];
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 45e3532b166f..684af371462d 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1050,7 +1050,7 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
 	 */
 	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
 		q_vector = adapter->q_vector[v_idx];
-		/* XXX for_each_bit(...) */
+		/* XXX for_each_set_bit(...) */
 		r_idx = find_first_bit(q_vector->rxr_idx,
 		                       adapter->num_rx_queues);
 
diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c
index 235b5fd4b8d4..ca653c49b765 100644
--- a/drivers/net/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ixgbevf/ixgbevf_main.c
@@ -751,7 +751,7 @@ static void ixgbevf_configure_msix(struct ixgbevf_adapter *adapter)
 	 */
 	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
 		q_vector = adapter->q_vector[v_idx];
-		/* XXX for_each_bit(...) */
+		/* XXX for_each_set_bit(...) */
 		r_idx = find_first_bit(q_vector->rxr_idx,
 				       adapter->num_rx_queues);
 
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 8a964f130367..a6452af9c6c5 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -394,7 +394,7 @@ static void ar9170_tx_fake_ampdu_status(struct ar9170 *ar)
 		ieee80211_tx_status_irqsafe(ar->hw, skb);
 	}
 
-	for_each_bit(i, &queue_bitmap, BITS_PER_BYTE) {
+	for_each_set_bit(i, &queue_bitmap, BITS_PER_BYTE) {
 #ifdef AR9170_QUEUE_STOP_DEBUG
 		printk(KERN_DEBUG "%s: wake queue %d\n",
 		       wiphy_name(ar->hw->wiphy), i);
diff --git a/drivers/net/wireless/iwmc3200wifi/debugfs.c b/drivers/net/wireless/iwmc3200wifi/debugfs.c
index be992ca41cf1..c29c994de0e2 100644
--- a/drivers/net/wireless/iwmc3200wifi/debugfs.c
+++ b/drivers/net/wireless/iwmc3200wifi/debugfs.c
@@ -89,7 +89,7 @@ static int iwm_debugfs_dbg_modules_write(void *data, u64 val)
 	for (i = 0; i < __IWM_DM_NR; i++)
 		iwm->dbg.dbg_module[i] = 0;
 
-	for_each_bit(bit, &iwm->dbg.dbg_modules, __IWM_DM_NR)
+	for_each_set_bit(bit, &iwm->dbg.dbg_modules, __IWM_DM_NR)
 		iwm->dbg.dbg_module[bit] = iwm->dbg.dbg_level;
 
 	return 0;
diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c
index ad8f7eabb5aa..8456b4dbd146 100644
--- a/drivers/net/wireless/iwmc3200wifi/rx.c
+++ b/drivers/net/wireless/iwmc3200wifi/rx.c
@@ -1116,7 +1116,7 @@ static int iwm_ntf_stop_resume_tx(struct iwm_priv *iwm, u8 *buf,
 		return -EINVAL;
 	}
 
-	for_each_bit(bit, (unsigned long *)&tid_msk, IWM_UMAC_TID_NR) {
+	for_each_set_bit(bit, (unsigned long *)&tid_msk, IWM_UMAC_TID_NR) {
 		tid_info = &sta_info->tid_info[bit];
 
 		mutex_lock(&tid_info->mutex);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 21f9e71223ca..a6467f3d262e 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -457,7 +457,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 			break;
 		}
 		dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
-		for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
+		for_each_set_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
 			qbh = NULL;
 			status = ocfs2_read_quota_block(lqinode,
 						ol_dqblk_block(sb, chunk, bit),
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 25b8b2f33ae9..b79389879238 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -16,11 +16,13 @@
  */
 #include <asm/bitops.h>
 
-#define for_each_bit(bit, addr, size) \
+#define for_each_set_bit(bit, addr, size) \
 	for ((bit) = find_first_bit((addr), (size)); \
 	     (bit) < (size); \
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
+/* Temporary */
+#define for_each_bit(bit, addr, size) for_each_set_bit(bit, addr, size)
 
 static __inline__ int get_bitmask_order(unsigned int count)
 {
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index eeb3506c4834..82095bf2099f 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -47,7 +47,7 @@ static int convert_prio(int prio)
 }
 
 #define for_each_cpupri_active(array, idx)                    \
-	for_each_bit(idx, array, CPUPRI_NR_PRIORITIES)
+	for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES)
 
 /**
  * cpupri_find - find the best (lowest-pri) CPU in the system
diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c
index a2763c2e7348..9cd0a66b7663 100644
--- a/sound/soc/codecs/uda1380.c
+++ b/sound/soc/codecs/uda1380.c
@@ -137,7 +137,7 @@ static void uda1380_flush_work(struct work_struct *work)
 {
 	int bit, reg;
 
-	for_each_bit(bit, &uda1380_cache_dirty, UDA1380_CACHEREGNUM - 0x10) {
+	for_each_set_bit(bit, &uda1380_cache_dirty, UDA1380_CACHEREGNUM - 0x10) {
 		reg = 0x10 + bit;
 		pr_debug("uda1380: flush reg %x val %x:\n", reg,
 				uda1380_read_reg_cache(uda1380_codec, reg));
-- 
cgit v1.2.3


From d559db086ff5be9bcc259e5aa50bf3d881eaf1d1 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:41:39 -0800
Subject: mm: clean up mm_counter

Presently, per-mm statistics counter is defined by macro in sched.h

This patch modifies it to
  - defined in mm.h as inlinf functions
  - use array instead of macro's name creation.

This patch is for reducing patch size in future patch to modify
implementation of per-mm counter.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c       |   4 +-
 include/linux/mm.h       | 104 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm_types.h |  33 ++++++++++-----
 include/linux/sched.h    |  54 ------------------------
 kernel/fork.c            |   3 +-
 kernel/tsacct.c          |   1 +
 mm/filemap_xip.c         |   2 +-
 mm/fremap.c              |   2 +-
 mm/memory.c              |  56 +++++++++++++++----------
 mm/oom_kill.c            |   4 +-
 mm/rmap.c                |  10 ++---
 mm/swapfile.c            |   2 +-
 12 files changed, 174 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f277c4a111cb..375581276011 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -65,11 +65,11 @@ unsigned long task_vsize(struct mm_struct *mm)
 int task_statm(struct mm_struct *mm, int *shared, int *text,
 	       int *data, int *resident)
 {
-	*shared = get_mm_counter(mm, file_rss);
+	*shared = get_mm_counter(mm, MM_FILEPAGES);
 	*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
 								>> PAGE_SHIFT;
 	*data = mm->total_vm - mm->shared_vm;
-	*resident = *shared + get_mm_counter(mm, anon_rss);
+	*resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
 	return mm->total_vm;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 90957f14195c..2124cdb2d1d0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -870,6 +870,110 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
  */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages);
+/*
+ * per-process(per-mm_struct) statistics.
+ */
+#if USE_SPLIT_PTLOCKS
+/*
+ * The mm counters are not protected by its page_table_lock,
+ * so must be incremented atomically.
+ */
+static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
+{
+	atomic_long_set(&mm->rss_stat.count[member], value);
+}
+
+static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+	return (unsigned long)atomic_long_read(&mm->rss_stat.count[member]);
+}
+
+static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
+{
+	atomic_long_add(value, &mm->rss_stat.count[member]);
+}
+
+static inline void inc_mm_counter(struct mm_struct *mm, int member)
+{
+	atomic_long_inc(&mm->rss_stat.count[member]);
+}
+
+static inline void dec_mm_counter(struct mm_struct *mm, int member)
+{
+	atomic_long_dec(&mm->rss_stat.count[member]);
+}
+
+#else  /* !USE_SPLIT_PTLOCKS */
+/*
+ * The mm counters are protected by its page_table_lock,
+ * so can be incremented directly.
+ */
+static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
+{
+	mm->rss_stat.count[member] = value;
+}
+
+static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+	return mm->rss_stat.count[member];
+}
+
+static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
+{
+	mm->rss_stat.count[member] += value;
+}
+
+static inline void inc_mm_counter(struct mm_struct *mm, int member)
+{
+	mm->rss_stat.count[member]++;
+}
+
+static inline void dec_mm_counter(struct mm_struct *mm, int member)
+{
+	mm->rss_stat.count[member]--;
+}
+
+#endif /* !USE_SPLIT_PTLOCKS */
+
+static inline unsigned long get_mm_rss(struct mm_struct *mm)
+{
+	return get_mm_counter(mm, MM_FILEPAGES) +
+		get_mm_counter(mm, MM_ANONPAGES);
+}
+
+static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
+{
+	return max(mm->hiwater_rss, get_mm_rss(mm));
+}
+
+static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
+{
+	return max(mm->hiwater_vm, mm->total_vm);
+}
+
+static inline void update_hiwater_rss(struct mm_struct *mm)
+{
+	unsigned long _rss = get_mm_rss(mm);
+
+	if ((mm)->hiwater_rss < _rss)
+		(mm)->hiwater_rss = _rss;
+}
+
+static inline void update_hiwater_vm(struct mm_struct *mm)
+{
+	if (mm->hiwater_vm < mm->total_vm)
+		mm->hiwater_vm = mm->total_vm;
+}
+
+static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
+					 struct mm_struct *mm)
+{
+	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
+
+	if (*maxrss < hiwater_rss)
+		*maxrss = hiwater_rss;
+}
+
 
 /*
  * A callback you can register to apply pressure to ageable caches.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 36f96271306c..e1ca64be6678 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -24,12 +24,6 @@ struct address_space;
 
 #define USE_SPLIT_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 
-#if USE_SPLIT_PTLOCKS
-typedef atomic_long_t mm_counter_t;
-#else  /* !USE_SPLIT_PTLOCKS */
-typedef unsigned long mm_counter_t;
-#endif /* !USE_SPLIT_PTLOCKS */
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -201,6 +195,22 @@ struct core_state {
 	struct completion startup;
 };
 
+enum {
+	MM_FILEPAGES,
+	MM_ANONPAGES,
+	NR_MM_COUNTERS
+};
+
+#if USE_SPLIT_PTLOCKS
+struct mm_rss_stat {
+	atomic_long_t count[NR_MM_COUNTERS];
+};
+#else  /* !USE_SPLIT_PTLOCKS */
+struct mm_rss_stat {
+	unsigned long count[NR_MM_COUNTERS];
+};
+#endif /* !USE_SPLIT_PTLOCKS */
+
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
@@ -227,11 +237,6 @@ struct mm_struct {
 						 * by mmlist_lock
 						 */
 
-	/* Special counters, in some configurations protected by the
-	 * page_table_lock, in other configurations by being atomic.
-	 */
-	mm_counter_t _file_rss;
-	mm_counter_t _anon_rss;
 
 	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
 	unsigned long hiwater_vm;	/* High-water virtual memory usage */
@@ -244,6 +249,12 @@ struct mm_struct {
 
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
+	/*
+	 * Special counters, in some configurations protected by the
+	 * page_table_lock, in other configurations by being atomic.
+	 */
+	struct mm_rss_stat rss_stat;
+
 	struct linux_binfmt *binfmt;
 
 	cpumask_t cpu_vm_mask;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b1753f7e48e..cbeafa49a53b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -396,60 +396,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-#if USE_SPLIT_PTLOCKS
-/*
- * The mm counters are not protected by its page_table_lock,
- * so must be incremented atomically.
- */
-#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
-#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
-#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
-#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
-#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
-
-#else  /* !USE_SPLIT_PTLOCKS */
-/*
- * The mm counters are protected by its page_table_lock,
- * so can be incremented directly.
- */
-#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
-#define get_mm_counter(mm, member) ((mm)->_##member)
-#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
-#define inc_mm_counter(mm, member) (mm)->_##member++
-#define dec_mm_counter(mm, member) (mm)->_##member--
-
-#endif /* !USE_SPLIT_PTLOCKS */
-
-#define get_mm_rss(mm)					\
-	(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
-#define update_hiwater_rss(mm)	do {			\
-	unsigned long _rss = get_mm_rss(mm);		\
-	if ((mm)->hiwater_rss < _rss)			\
-		(mm)->hiwater_rss = _rss;		\
-} while (0)
-#define update_hiwater_vm(mm)	do {			\
-	if ((mm)->hiwater_vm < (mm)->total_vm)		\
-		(mm)->hiwater_vm = (mm)->total_vm;	\
-} while (0)
-
-static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
-{
-	return max(mm->hiwater_rss, get_mm_rss(mm));
-}
-
-static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
-					 struct mm_struct *mm)
-{
-	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
-
-	if (*maxrss < hiwater_rss)
-		*maxrss = hiwater_rss;
-}
-
-static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
-{
-	return max(mm->hiwater_vm, mm->total_vm);
-}
 
 extern void set_dumpable(struct mm_struct *mm, int value);
 extern int get_dumpable(struct mm_struct *mm);
diff --git a/kernel/fork.c b/kernel/fork.c
index 17bbf093356d..7616bcf107b9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -455,8 +455,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
 	mm->core_state = NULL;
 	mm->nr_ptes = 0;
-	set_mm_counter(mm, file_rss, 0);
-	set_mm_counter(mm, anon_rss, 0);
+	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 00d59d048edf..0a67e041edf8 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -21,6 +21,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
 #include <linux/jiffies.h>
+#include <linux/mm.h>
 
 /*
  * fill in basic accounting fields
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 1888b2d71bb8..78b94f0b6d5d 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -194,7 +194,7 @@ retry:
 			flush_cache_page(vma, address, pte_pfn(*pte));
 			pteval = ptep_clear_flush_notify(vma, address, pte);
 			page_remove_rmap(page);
-			dec_mm_counter(mm, file_rss);
+			dec_mm_counter(mm, MM_FILEPAGES);
 			BUG_ON(pte_dirty(pteval));
 			pte_unmap_unlock(pte, ptl);
 			page_cache_release(page);
diff --git a/mm/fremap.c b/mm/fremap.c
index b6ec85abbb39..46f5dacf90a2 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -40,7 +40,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 			page_remove_rmap(page);
 			page_cache_release(page);
 			update_hiwater_rss(mm);
-			dec_mm_counter(mm, file_rss);
+			dec_mm_counter(mm, MM_FILEPAGES);
 		}
 	} else {
 		if (!pte_file(pte))
diff --git a/mm/memory.c b/mm/memory.c
index 72fb5f39bccc..c57678478801 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -121,6 +121,7 @@ static int __init init_zero_pfn(void)
 }
 core_initcall(init_zero_pfn);
 
+
 /*
  * If a p?d_bad entry is found while walking page tables, report
  * the error, before resetting entry to p?d_none.  Usually (but
@@ -376,12 +377,18 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+static inline void init_rss_vec(int *rss)
 {
-	if (file_rss)
-		add_mm_counter(mm, file_rss, file_rss);
-	if (anon_rss)
-		add_mm_counter(mm, anon_rss, anon_rss);
+	memset(rss, 0, sizeof(int) * NR_MM_COUNTERS);
+}
+
+static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
+{
+	int i;
+
+	for (i = 0; i < NR_MM_COUNTERS; i++)
+		if (rss[i])
+			add_mm_counter(mm, i, rss[i]);
 }
 
 /*
@@ -632,7 +639,10 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (page) {
 		get_page(page);
 		page_dup_rmap(page);
-		rss[PageAnon(page)]++;
+		if (PageAnon(page))
+			rss[MM_ANONPAGES]++;
+		else
+			rss[MM_FILEPAGES]++;
 	}
 
 out_set_pte:
@@ -648,11 +658,12 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
-	int rss[2];
+	int rss[NR_MM_COUNTERS];
 	swp_entry_t entry = (swp_entry_t){0};
 
 again:
-	rss[1] = rss[0] = 0;
+	init_rss_vec(rss);
+
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
 	if (!dst_pte)
 		return -ENOMEM;
@@ -688,7 +699,7 @@ again:
 	arch_leave_lazy_mmu_mode();
 	spin_unlock(src_ptl);
 	pte_unmap_nested(orig_src_pte);
-	add_mm_rss(dst_mm, rss[0], rss[1]);
+	add_mm_rss_vec(dst_mm, rss);
 	pte_unmap_unlock(orig_dst_pte, dst_ptl);
 	cond_resched();
 
@@ -816,8 +827,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	struct mm_struct *mm = tlb->mm;
 	pte_t *pte;
 	spinlock_t *ptl;
-	int file_rss = 0;
-	int anon_rss = 0;
+	int rss[NR_MM_COUNTERS];
+
+	init_rss_vec(rss);
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
@@ -863,14 +875,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				set_pte_at(mm, addr, pte,
 					   pgoff_to_pte(page->index));
 			if (PageAnon(page))
-				anon_rss--;
+				rss[MM_ANONPAGES]--;
 			else {
 				if (pte_dirty(ptent))
 					set_page_dirty(page);
 				if (pte_young(ptent) &&
 				    likely(!VM_SequentialReadHint(vma)))
 					mark_page_accessed(page);
-				file_rss--;
+				rss[MM_FILEPAGES]--;
 			}
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
@@ -893,7 +905,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
-	add_mm_rss(mm, file_rss, anon_rss);
+	add_mm_rss_vec(mm, rss);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
@@ -1527,7 +1539,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
-	inc_mm_counter(mm, file_rss);
+	inc_mm_counter(mm, MM_FILEPAGES);
 	page_add_file_rmap(page);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
@@ -2163,11 +2175,11 @@ gotten:
 	if (likely(pte_same(*page_table, orig_pte))) {
 		if (old_page) {
 			if (!PageAnon(old_page)) {
-				dec_mm_counter(mm, file_rss);
-				inc_mm_counter(mm, anon_rss);
+				dec_mm_counter(mm, MM_FILEPAGES);
+				inc_mm_counter(mm, MM_ANONPAGES);
 			}
 		} else
-			inc_mm_counter(mm, anon_rss);
+			inc_mm_counter(mm, MM_ANONPAGES);
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2604,7 +2616,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * discarded at swap_free().
 	 */
 
-	inc_mm_counter(mm, anon_rss);
+	inc_mm_counter(mm, MM_ANONPAGES);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2688,7 +2700,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pte_none(*page_table))
 		goto release;
 
-	inc_mm_counter(mm, anon_rss);
+	inc_mm_counter(mm, MM_ANONPAGES);
 	page_add_new_anon_rmap(page, vma, address);
 setpte:
 	set_pte_at(mm, address, page_table, entry);
@@ -2842,10 +2854,10 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (flags & FAULT_FLAG_WRITE)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		if (anon) {
-			inc_mm_counter(mm, anon_rss);
+			inc_mm_counter(mm, MM_ANONPAGES);
 			page_add_new_anon_rmap(page, vma, address);
 		} else {
-			inc_mm_counter(mm, file_rss);
+			inc_mm_counter(mm, MM_FILEPAGES);
 			page_add_file_rmap(page);
 			if (flags & FAULT_FLAG_WRITE) {
 				dirty_page = page;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 237050478f28..35755a4156d6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -401,8 +401,8 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
 		       "vsz:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
 		       task_pid_nr(p), p->comm,
 		       K(p->mm->total_vm),
-		       K(get_mm_counter(p->mm, anon_rss)),
-		       K(get_mm_counter(p->mm, file_rss)));
+		       K(get_mm_counter(p->mm, MM_ANONPAGES)),
+		       K(get_mm_counter(p->mm, MM_FILEPAGES)));
 	task_unlock(p);
 
 	/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 278cd277bdec..73d0472884c2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -815,9 +815,9 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
 	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
 		if (PageAnon(page))
-			dec_mm_counter(mm, anon_rss);
+			dec_mm_counter(mm, MM_ANONPAGES);
 		else
-			dec_mm_counter(mm, file_rss);
+			dec_mm_counter(mm, MM_FILEPAGES);
 		set_pte_at(mm, address, pte,
 				swp_entry_to_pte(make_hwpoison_entry(page)));
 	} else if (PageAnon(page)) {
@@ -839,7 +839,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 					list_add(&mm->mmlist, &init_mm.mmlist);
 				spin_unlock(&mmlist_lock);
 			}
-			dec_mm_counter(mm, anon_rss);
+			dec_mm_counter(mm, MM_ANONPAGES);
 		} else if (PAGE_MIGRATION) {
 			/*
 			 * Store the pfn of the page in a special migration
@@ -857,7 +857,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		entry = make_migration_entry(page, pte_write(pteval));
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 	} else
-		dec_mm_counter(mm, file_rss);
+		dec_mm_counter(mm, MM_FILEPAGES);
 
 	page_remove_rmap(page);
 	page_cache_release(page);
@@ -996,7 +996,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 
 		page_remove_rmap(page);
 		page_cache_release(page);
-		dec_mm_counter(mm, file_rss);
+		dec_mm_counter(mm, MM_FILEPAGES);
 		(*mapcount)--;
 	}
 	pte_unmap_unlock(pte - 1, ptl);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c0585b16418..893984946a2c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -840,7 +840,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		goto out;
 	}
 
-	inc_mm_counter(vma->vm_mm, anon_rss);
+	inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
-- 
cgit v1.2.3


From 34e55232e59f7b19050267a05ff1226e5cd122a5 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:41:40 -0800
Subject: mm: avoid false sharing of mm_counter

Considering the nature of per mm stats, it's the shared object among
threads and can be a cache-miss point in the page fault path.

This patch adds per-thread cache for mm_counter.  RSS value will be
counted into a struct in task_struct and synchronized with mm's one at
events.

Now, in this patch, the event is the number of calls to handle_mm_fault.
Per-thread value is added to mm at each 64 calls.

 rough estimation with small benchmark on parallel thread (2threads) shows
 [before]
     4.5 cache-miss/faults
 [after]
     4.0 cache-miss/faults
 Anyway, the most contended object is mmap_sem if the number of threads grows.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  6 +++
 fs/exec.c                          |  1 +
 include/linux/mm.h                 |  8 ++--
 include/linux/mm_types.h           |  6 +++
 include/linux/sched.h              |  4 +-
 kernel/exit.c                      |  3 +-
 mm/memory.c                        | 94 ++++++++++++++++++++++++++++++++++----
 7 files changed, 107 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 0d07513a67a6..e418f3d8f427 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -188,6 +188,12 @@ memory usage. Its seven fields are explained in Table 1-3.  The stat file
 contains details information about the process itself.  Its fields are
 explained in Table 1-4.
 
+(for SMP CONFIG users)
+For making accounting scalable, RSS related information are handled in
+asynchronous manner and the vaule may not be very precise. To see a precise
+snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
+It's slow but very precise.
+
 Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
 ..............................................................................
  Field                       Content
diff --git a/fs/exec.c b/fs/exec.c
index cce6bbdbdbb1..ea7861727efd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -718,6 +718,7 @@ static int exec_mmap(struct mm_struct *mm)
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
 	old_mm = current->mm;
+	sync_mm_rss(tsk, old_mm);
 	mm_release(tsk, old_mm);
 
 	if (old_mm) {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2124cdb2d1d0..8e580c07d171 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -873,7 +873,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 /*
  * per-process(per-mm_struct) statistics.
  */
-#if USE_SPLIT_PTLOCKS
+#if defined(SPLIT_RSS_COUNTING)
 /*
  * The mm counters are not protected by its page_table_lock,
  * so must be incremented atomically.
@@ -883,10 +883,7 @@ static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
 	atomic_long_set(&mm->rss_stat.count[member], value);
 }
 
-static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
-{
-	return (unsigned long)atomic_long_read(&mm->rss_stat.count[member]);
-}
+unsigned long get_mm_counter(struct mm_struct *mm, int member);
 
 static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
 {
@@ -974,6 +971,7 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
 		*maxrss = hiwater_rss;
 }
 
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm);
 
 /*
  * A callback you can register to apply pressure to ageable caches.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e1ca64be6678..21861239ab0c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -202,9 +202,15 @@ enum {
 };
 
 #if USE_SPLIT_PTLOCKS
+#define SPLIT_RSS_COUNTING
 struct mm_rss_stat {
 	atomic_long_t count[NR_MM_COUNTERS];
 };
+/* per-thread cached information, */
+struct task_rss_stat {
+	int events;	/* for synchronization threshold */
+	int count[NR_MM_COUNTERS];
+};
 #else  /* !USE_SPLIT_PTLOCKS */
 struct mm_rss_stat {
 	unsigned long count[NR_MM_COUNTERS];
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbeafa49a53b..46c6f8d5dc06 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1220,7 +1220,9 @@ struct task_struct {
 	struct plist_node pushable_tasks;
 
 	struct mm_struct *mm, *active_mm;
-
+#if defined(SPLIT_RSS_COUNTING)
+	struct task_rss_stat	rss_stat;
+#endif
 /* task state */
 	int exit_state;
 	int exit_code, exit_signal;
diff --git a/kernel/exit.c b/kernel/exit.c
index 45ed043b8bf5..10d3c5d5ae44 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -952,7 +952,8 @@ NORET_TYPE void do_exit(long code)
 				preempt_count());
 
 	acct_update_integrals(tsk);
-
+	/* sync mm's RSS info before statistics gathering */
+	sync_mm_rss(tsk, tsk->mm);
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/mm/memory.c b/mm/memory.c
index c57678478801..a4597614f18d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -122,6 +122,79 @@ static int __init init_zero_pfn(void)
 core_initcall(init_zero_pfn);
 
 
+#if defined(SPLIT_RSS_COUNTING)
+
+void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+{
+	int i;
+
+	for (i = 0; i < NR_MM_COUNTERS; i++) {
+		if (task->rss_stat.count[i]) {
+			add_mm_counter(mm, i, task->rss_stat.count[i]);
+			task->rss_stat.count[i] = 0;
+		}
+	}
+	task->rss_stat.events = 0;
+}
+
+static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
+{
+	struct task_struct *task = current;
+
+	if (likely(task->mm == mm))
+		task->rss_stat.count[member] += val;
+	else
+		add_mm_counter(mm, member, val);
+}
+#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
+#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
+
+/* sync counter once per 64 page faults */
+#define TASK_RSS_EVENTS_THRESH	(64)
+static void check_sync_rss_stat(struct task_struct *task)
+{
+	if (unlikely(task != current))
+		return;
+	if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
+		__sync_task_rss_stat(task, task->mm);
+}
+
+unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+	long val = 0;
+
+	/*
+	 * Don't use task->mm here...for avoiding to use task_get_mm()..
+	 * The caller must guarantee task->mm is not invalid.
+	 */
+	val = atomic_long_read(&mm->rss_stat.count[member]);
+	/*
+	 * counter is updated in asynchronous manner and may go to minus.
+	 * But it's never be expected number for users.
+	 */
+	if (val < 0)
+		return 0;
+	return (unsigned long)val;
+}
+
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+	__sync_task_rss_stat(task, mm);
+}
+#else
+
+#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
+#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
+
+static void check_sync_rss_stat(struct task_struct *task)
+{
+}
+
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+}
+#endif
+
 /*
  * If a p?d_bad entry is found while walking page tables, report
  * the error, before resetting entry to p?d_none.  Usually (but
@@ -386,6 +459,8 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
 {
 	int i;
 
+	if (current->mm == mm)
+		sync_mm_rss(current, mm);
 	for (i = 0; i < NR_MM_COUNTERS; i++)
 		if (rss[i])
 			add_mm_counter(mm, i, rss[i]);
@@ -1539,7 +1614,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
-	inc_mm_counter(mm, MM_FILEPAGES);
+	inc_mm_counter_fast(mm, MM_FILEPAGES);
 	page_add_file_rmap(page);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
@@ -2175,11 +2250,11 @@ gotten:
 	if (likely(pte_same(*page_table, orig_pte))) {
 		if (old_page) {
 			if (!PageAnon(old_page)) {
-				dec_mm_counter(mm, MM_FILEPAGES);
-				inc_mm_counter(mm, MM_ANONPAGES);
+				dec_mm_counter_fast(mm, MM_FILEPAGES);
+				inc_mm_counter_fast(mm, MM_ANONPAGES);
 			}
 		} else
-			inc_mm_counter(mm, MM_ANONPAGES);
+			inc_mm_counter_fast(mm, MM_ANONPAGES);
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2616,7 +2691,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * discarded at swap_free().
 	 */
 
-	inc_mm_counter(mm, MM_ANONPAGES);
+	inc_mm_counter_fast(mm, MM_ANONPAGES);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2700,7 +2775,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pte_none(*page_table))
 		goto release;
 
-	inc_mm_counter(mm, MM_ANONPAGES);
+	inc_mm_counter_fast(mm, MM_ANONPAGES);
 	page_add_new_anon_rmap(page, vma, address);
 setpte:
 	set_pte_at(mm, address, page_table, entry);
@@ -2854,10 +2929,10 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (flags & FAULT_FLAG_WRITE)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		if (anon) {
-			inc_mm_counter(mm, MM_ANONPAGES);
+			inc_mm_counter_fast(mm, MM_ANONPAGES);
 			page_add_new_anon_rmap(page, vma, address);
 		} else {
-			inc_mm_counter(mm, MM_FILEPAGES);
+			inc_mm_counter_fast(mm, MM_FILEPAGES);
 			page_add_file_rmap(page);
 			if (flags & FAULT_FLAG_WRITE) {
 				dirty_page = page;
@@ -3035,6 +3110,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	count_vm_event(PGFAULT);
 
+	/* do counter updates before entering really critical section. */
+	check_sync_rss_stat(current);
+
 	if (unlikely(is_vm_hugetlb_page(vma)))
 		return hugetlb_fault(mm, vma, address, flags);
 
-- 
cgit v1.2.3


From b084d4353ff99d824d3bc5a5c2c22c70b1fba722 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:41:42 -0800
Subject: mm: count swap usage

A frequent questions from users about memory management is what numbers of
swap ents are user for processes.  And this information will give some
hints to oom-killer.

Besides we can count the number of swapents per a process by scanning
/proc/<pid>/smaps, this is very slow and not good for usual process
information handler which works like 'ps' or 'top'.  (ps or top is now
enough slow..)

This patch adds a counter of swapents to mm_counter and update is at each
swap events.  Information is exported via /proc/<pid>/status file as

[kamezawa@bluextal memory]$ cat /proc/self/status
Name:   cat
State:  R (running)
Tgid:   2910
Pid:    2910
PPid:   2823
TracerPid:      0
Uid:    500     500     500     500
Gid:    500     500     500     500
FDSize: 256
Groups: 500
VmPeak:    82696 kB
VmSize:    82696 kB
VmLck:         0 kB
VmHWM:       432 kB
VmRSS:       432 kB
VmData:      172 kB
VmStk:        84 kB
VmExe:        48 kB
VmLib:      1568 kB
VmPTE:        40 kB
VmSwap:        0 kB <=============== this.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  2 ++
 fs/proc/task_mmu.c                 |  9 ++++++---
 include/linux/mm_types.h           |  1 +
 mm/memory.c                        | 16 ++++++++++++----
 mm/rmap.c                          |  1 +
 mm/swapfile.c                      |  1 +
 6 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index e418f3d8f427..b5c5fc657a88 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -164,6 +164,7 @@ read the file /proc/PID/status:
   VmExe:        68 kB
   VmLib:      1412 kB
   VmPTE:        20 kb
+  VmSwap:        0 kB
   Threads:        1
   SigQ:   0/28578
   SigPnd: 0000000000000000
@@ -219,6 +220,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
+ VmSwap                      size of swap usage (the number of referred swapents)
  Threads                     number of threads
  SigQ                        number of signals queued/max. number for queue
  SigPnd                      bitmap of pending signals for the thread
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 375581276011..183f8ff5f400 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -16,7 +16,7 @@
 
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
-	unsigned long data, text, lib;
+	unsigned long data, text, lib, swap;
 	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
 
 	/*
@@ -36,6 +36,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
+	swap = get_mm_counter(mm, MM_SWAPENTS);
 	seq_printf(m,
 		"VmPeak:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
@@ -46,7 +47,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		"VmStk:\t%8lu kB\n"
 		"VmExe:\t%8lu kB\n"
 		"VmLib:\t%8lu kB\n"
-		"VmPTE:\t%8lu kB\n",
+		"VmPTE:\t%8lu kB\n"
+		"VmSwap:\t%8lu kB\n",
 		hiwater_vm << (PAGE_SHIFT-10),
 		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
@@ -54,7 +56,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		total_rss << (PAGE_SHIFT-10),
 		data << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
+		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
+		swap << (PAGE_SHIFT-10));
 }
 
 unsigned long task_vsize(struct mm_struct *mm)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 21861239ab0c..19549d7275ab 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -198,6 +198,7 @@ struct core_state {
 enum {
 	MM_FILEPAGES,
 	MM_ANONPAGES,
+	MM_SWAPENTS,
 	NR_MM_COUNTERS
 };
 
diff --git a/mm/memory.c b/mm/memory.c
index a4597614f18d..77d9f840936b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -679,7 +679,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 						 &src_mm->mmlist);
 				spin_unlock(&mmlist_lock);
 			}
-			if (is_write_migration_entry(entry) &&
+			if (likely(!non_swap_entry(entry)))
+				rss[MM_SWAPENTS]++;
+			else if (is_write_migration_entry(entry) &&
 					is_cow_mapping(vm_flags)) {
 				/*
 				 * COW mappings require pages in both parent
@@ -974,9 +976,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 		if (pte_file(ptent)) {
 			if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
 				print_bad_pte(vma, addr, ptent, NULL);
-		} else if
-		  (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent))))
-			print_bad_pte(vma, addr, ptent, NULL);
+		} else {
+			swp_entry_t entry = pte_to_swp_entry(ptent);
+
+			if (!non_swap_entry(entry))
+				rss[MM_SWAPENTS]--;
+			if (unlikely(!free_swap_and_cache(entry)))
+				print_bad_pte(vma, addr, ptent, NULL);
+		}
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
@@ -2692,6 +2699,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 
 	inc_mm_counter_fast(mm, MM_ANONPAGES);
+	dec_mm_counter_fast(mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
diff --git a/mm/rmap.c b/mm/rmap.c
index 73d0472884c2..5cb47111f79e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -840,6 +840,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				spin_unlock(&mmlist_lock);
 			}
 			dec_mm_counter(mm, MM_ANONPAGES);
+			inc_mm_counter(mm, MM_SWAPENTS);
 		} else if (PAGE_MIGRATION) {
 			/*
 			 * Store the pfn of the page in a special migration
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 893984946a2c..187a21f8b7bd 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -840,6 +840,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		goto out;
 	}
 
+	dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
 	inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
-- 
cgit v1.2.3


From fc91668eaf9e7ba61e867fc2218b7e9fb67faa4f Mon Sep 17 00:00:00 2001
From: Li Hong <lihong.hi@gmail.com>
Date: Fri, 5 Mar 2010 13:41:54 -0800
Subject: mm: remove free_hot_page()

free_hot_page() is just a wrapper around free_hot_cold_page() with
parameter 'cold = 0'.  After adding a clear comment for
free_hot_cold_page(), it is reasonable to remove a level of call.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Li Hong <lihong.hi@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  2 +-
 mm/page_alloc.c     | 10 +++-------
 mm/swap.c           |  2 +-
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 557bdad320b6..e5567e6762f3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -325,7 +325,7 @@ void free_pages_exact(void *virt, size_t size);
 
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
-extern void free_hot_page(struct page *page);
+extern void free_hot_cold_page(struct page *page, int cold);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr),0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index caa7df60a4a1..80bcee0c5034 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1074,8 +1074,9 @@ void mark_free_pages(struct zone *zone)
 
 /*
  * Free a 0-order page
+ * cold == 1 ? free a cold page : free a hot page
  */
-static void free_hot_cold_page(struct page *page, int cold)
+void free_hot_cold_page(struct page *page, int cold)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
@@ -1135,11 +1136,6 @@ out:
 	local_irq_restore(flags);
 }
 
-void free_hot_page(struct page *page)
-{
-	free_hot_cold_page(page, 0);
-}
-	
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
@@ -2010,7 +2006,7 @@ void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
 		if (order == 0)
-			free_hot_page(page);
+			free_hot_cold_page(page, 0);
 		else
 			__free_pages_ok(page, order);
 	}
diff --git a/mm/swap.c b/mm/swap.c
index 308e57d8d7ed..9036b89813ac 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -55,7 +55,7 @@ static void __page_cache_release(struct page *page)
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
-	free_hot_page(page);
+	free_hot_cold_page(page, 0);
 }
 
 static void put_compound_page(struct page *page)
-- 
cgit v1.2.3


From 93e4a89a8c987189b168a530a331ef6d0fcf07a7 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:41:55 -0800
Subject: mm: restore zone->all_unreclaimable to independence word

commit e815af95 ("change all_unreclaimable zone member to flags") changed
all_unreclaimable member to bit flag.  But it had an undesireble side
effect.  free_one_page() is one of most hot path in linux kernel and
increasing atomic ops in it can reduce kernel performance a bit.

Thus, this patch revert such commit partially. at least
all_unreclaimable shouldn't share memory word with other zone flags.

[akpm@linux-foundation.org: fix patch interaction]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  7 +------
 mm/page_alloc.c        |  6 +++---
 mm/vmscan.c            | 22 +++++++++-------------
 mm/vmstat.c            |  2 +-
 4 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a01a103341bd..bc209d8b7b5c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -306,6 +306,7 @@ struct zone {
 	 * free areas of different sizes
 	 */
 	spinlock_t		lock;
+	int                     all_unreclaimable; /* All pages pinned */
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/* see spanned/present_pages for more description */
 	seqlock_t		span_seqlock;
@@ -417,7 +418,6 @@ struct zone {
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
-	ZONE_ALL_UNRECLAIMABLE,		/* all pages pinned */
 	ZONE_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
 	ZONE_OOM_LOCKED,		/* zone is in OOM killer zonelist */
 } zone_flags_t;
@@ -437,11 +437,6 @@ static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag)
 	clear_bit(flag, &zone->flags);
 }
 
-static inline int zone_is_all_unreclaimable(const struct zone *zone)
-{
-	return test_bit(ZONE_ALL_UNRECLAIMABLE, &zone->flags);
-}
-
 static inline int zone_is_reclaim_locked(const struct zone *zone)
 {
 	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 80bcee0c5034..0734bedabd9c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -530,7 +530,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	int batch_free = 0;
 
 	spin_lock(&zone->lock);
-	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+	zone->all_unreclaimable = 0;
 	zone->pages_scanned = 0;
 
 	__mod_zone_page_state(zone, NR_FREE_PAGES, count);
@@ -568,7 +568,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
 				int migratetype)
 {
 	spin_lock(&zone->lock);
-	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+	zone->all_unreclaimable = 0;
 	zone->pages_scanned = 0;
 
 	__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
@@ -2262,7 +2262,7 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_BOUNCE)),
 			K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
 			zone->pages_scanned,
-			(zone_is_all_unreclaimable(zone) ? "yes" : "no")
+			(zone->all_unreclaimable ? "yes" : "no")
 			);
 		printk("lowmem_reserve[]:");
 		for (i = 0; i < MAX_NR_ZONES; i++)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bc0f8db8340f..5cbf64dd79c1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1699,8 +1699,7 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
 				continue;
 			note_zone_scanning_priority(zone, priority);
 
-			if (zone_is_all_unreclaimable(zone) &&
-						priority != DEF_PRIORITY)
+			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;	/* Let kswapd poll it */
 			sc->all_unreclaimable = 0;
 		} else {
@@ -1927,7 +1926,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 		if (!populated_zone(zone))
 			continue;
 
-		if (zone_is_all_unreclaimable(zone))
+		if (zone->all_unreclaimable)
 			continue;
 
 		if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
@@ -2017,8 +2016,7 @@ loop_again:
 			if (!populated_zone(zone))
 				continue;
 
-			if (zone_is_all_unreclaimable(zone) &&
-			    priority != DEF_PRIORITY)
+			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;
 
 			/*
@@ -2061,8 +2059,7 @@ loop_again:
 			if (!populated_zone(zone))
 				continue;
 
-			if (zone_is_all_unreclaimable(zone) &&
-					priority != DEF_PRIORITY)
+			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;
 
 			temp_priority[i] = priority;
@@ -2089,12 +2086,11 @@ loop_again:
 						lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
-			if (zone_is_all_unreclaimable(zone))
+			if (zone->all_unreclaimable)
 				continue;
-			if (nr_slab == 0 && zone->pages_scanned >=
-					(zone_reclaimable_pages(zone) * 6))
-					zone_set_flag(zone,
-						      ZONE_ALL_UNRECLAIMABLE);
+			if (nr_slab == 0 &&
+			    zone->pages_scanned >= (zone_reclaimable_pages(zone) * 6))
+				zone->all_unreclaimable = 1;
 			/*
 			 * If we've done a decent amount of scanning and
 			 * the reclaim ratio is low, start doing writepage
@@ -2624,7 +2620,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	    zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
 		return ZONE_RECLAIM_FULL;
 
-	if (zone_is_all_unreclaimable(zone))
+	if (zone->all_unreclaimable)
 		return ZONE_RECLAIM_FULL;
 
 	/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index fc5aa183bc45..7f760cbc73f3 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -763,7 +763,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n  prev_priority:     %i"
 		   "\n  start_pfn:         %lu"
 		   "\n  inactive_ratio:    %u",
-			   zone_is_all_unreclaimable(zone),
+		   zone->all_unreclaimable,
 		   zone->prev_priority,
 		   zone->zone_start_pfn,
 		   zone->inactive_ratio);
-- 
cgit v1.2.3


From d96ae5309165d9ed7c008a178238977b73595cd9 Mon Sep 17 00:00:00 2001
From: "akpm@linux-foundation.org" <akpm@linux-foundation.org>
Date: Fri, 5 Mar 2010 13:41:58 -0800
Subject: memory-hotplug: create /sys/firmware/memmap entry for new memory

A memmap is a directory in sysfs which includes 3 text files: start, end
and type.  For example:

start: 	0x100000
end:	0x7e7b1cff
type:	System RAM

Interface firmware_map_add was not called explicitly.  Remove it and add
function firmware_map_add_hotplug as hotplug interface of memmap.

Each memory entry has a memmap in sysfs, When we hot-add new memory, sysfs
does not export memmap entry for it.  We add a call in function add_memory
to function firmware_map_add_hotplug.

Add a new function add_sysfs_fw_map_entry() to create memmap entry, it
will be called when initialize memmap and hot-add memory.

[akpm@linux-foundation.org: un-kernedoc a no longer kerneldoc comment]
Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/memmap.c    | 57 ++++++++++++++++++++++++++++----------------
 include/linux/firmware-map.h |  6 ++---
 mm/memory_hotplug.c          |  4 ++++
 3 files changed, 43 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 56f9234781fa..20f645743ead 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -122,29 +122,53 @@ static int firmware_map_add_entry(u64 start, u64 end,
 	return 0;
 }
 
+/*
+ * Add memmap entry on sysfs
+ */
+static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry)
+{
+	static int map_entries_nr;
+	static struct kset *mmap_kset;
+
+	if (!mmap_kset) {
+		mmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj);
+		if (!mmap_kset)
+			return -ENOMEM;
+	}
+
+	entry->kobj.kset = mmap_kset;
+	if (kobject_add(&entry->kobj, NULL, "%d", map_entries_nr++))
+		kobject_put(&entry->kobj);
+
+	return 0;
+}
+
 /**
- * firmware_map_add() - Adds a firmware mapping entry.
+ * firmware_map_add_hotplug() - Adds a firmware mapping entry when we do
+ * memory hotplug.
  * @start: Start of the memory range.
  * @end:   End of the memory range (inclusive).
  * @type:  Type of the memory range.
  *
- * This function uses kmalloc() for memory
- * allocation. Use firmware_map_add_early() if you want to use the bootmem
- * allocator.
- *
- * That function must be called before late_initcall.
+ * Adds a firmware mapping entry. This function is for memory hotplug, it is
+ * similar to function firmware_map_add_early(). The only difference is that
+ * it will create the syfs entry dynamically.
  *
  * Returns 0 on success, or -ENOMEM if no memory could be allocated.
  **/
-int firmware_map_add(u64 start, u64 end, const char *type)
+int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = kmalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
+	entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
 	if (!entry)
 		return -ENOMEM;
 
-	return firmware_map_add_entry(start, end, type, entry);
+	firmware_map_add_entry(start, end, type, entry);
+	/* create the memmap entry */
+	add_sysfs_fw_map_entry(entry);
+
+	return 0;
 }
 
 /**
@@ -154,7 +178,7 @@ int firmware_map_add(u64 start, u64 end, const char *type)
  * @type:  Type of the memory range.
  *
  * Adds a firmware mapping entry. This function uses the bootmem allocator
- * for memory allocation. Use firmware_map_add() if you want to use kmalloc().
+ * for memory allocation.
  *
  * That function must be called before late_initcall.
  *
@@ -214,19 +238,10 @@ static ssize_t memmap_attr_show(struct kobject *kobj,
  */
 static int __init memmap_init(void)
 {
-	int i = 0;
 	struct firmware_map_entry *entry;
-	struct kset *memmap_kset;
-
-	memmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj);
-	if (WARN_ON(!memmap_kset))
-		return -ENOMEM;
 
-	list_for_each_entry(entry, &map_entries, list) {
-		entry->kobj.kset = memmap_kset;
-		if (kobject_add(&entry->kobj, NULL, "%d", i++))
-			kobject_put(&entry->kobj);
-	}
+	list_for_each_entry(entry, &map_entries, list)
+		add_sysfs_fw_map_entry(entry);
 
 	return 0;
 }
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index 875451f1373a..c6dcc1dfe781 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -24,17 +24,17 @@
  */
 #ifdef CONFIG_FIRMWARE_MEMMAP
 
-int firmware_map_add(u64 start, u64 end, const char *type);
 int firmware_map_add_early(u64 start, u64 end, const char *type);
+int firmware_map_add_hotplug(u64 start, u64 end, const char *type);
 
 #else /* CONFIG_FIRMWARE_MEMMAP */
 
-static inline int firmware_map_add(u64 start, u64 end, const char *type)
+static inline int firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	return 0;
 }
 
-static inline int firmware_map_add_early(u64 start, u64 end, const char *type)
+static inline int firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 {
 	return 0;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 030ce8a5bb0e..78e34e63c7b8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -28,6 +28,7 @@
 #include <linux/pfn.h>
 #include <linux/suspend.h>
 #include <linux/mm_inline.h>
+#include <linux/firmware-map.h>
 
 #include <asm/tlbflush.h>
 
@@ -523,6 +524,9 @@ int __ref add_memory(int nid, u64 start, u64 size)
 		BUG_ON(ret);
 	}
 
+	/* create new memmap entry */
+	firmware_map_add_hotplug(start, start + size, "System RAM");
+
 	goto out;
 
 error:
-- 
cgit v1.2.3


From 0141450f66c3c12a3aaa869748caa64241885cdf Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 5 Mar 2010 13:42:03 -0800
Subject: readahead: introduce FMODE_RANDOM for POSIX_FADV_RANDOM

This fixes inefficient page-by-page reads on POSIX_FADV_RANDOM.

POSIX_FADV_RANDOM used to set ra_pages=0, which leads to poor performance:
a 16K read will be carried out in 4 _sync_ 1-page reads.

In other places, ra_pages==0 means
- it's ramfs/tmpfs/hugetlbfs/sysfs/configfs
- some IO error happened
where multi-page read IO won't help or should be avoided.

POSIX_FADV_RANDOM actually want a different semantics: to disable the
*heuristic* readahead algorithm, and to use a dumb one which faithfully
submit read IO for whatever application requests.

So introduce a flag FMODE_RANDOM for POSIX_FADV_RANDOM.

Note that the random hint is not likely to help random reads performance
noticeably.  And it may be too permissive on huge request size (its IO
size is not limited by read_ahead_kb).

In Quentin's report (http://lkml.org/lkml/2009/12/24/145), the overall
(NFS read) performance of the application increased by 313%!

Tested-by: Quentin Barnes <qbarnes+nfs@yahoo-inc.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: <stable@kernel.org>			[2.6.33.x]
Cc: <qbarnes+nfs@yahoo-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h |  3 +++
 mm/fadvise.c       | 10 +++++++++-
 mm/readahead.c     |  6 ++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 45689621a851..be87edcaba06 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -87,6 +87,9 @@ struct inodes_stat_t {
  */
 #define FMODE_NOCMTIME		((__force fmode_t)2048)
 
+/* Expect random access pattern */
+#define FMODE_RANDOM		((__force fmode_t)4096)
+
 /*
  * The below are the various read and write types that we support. Some of
  * them include behavioral modifiers that send information down to the
diff --git a/mm/fadvise.c b/mm/fadvise.c
index e43359214f6f..8d723c9e8b75 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -77,12 +77,20 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
 		file->f_ra.ra_pages = bdi->ra_pages;
+		spin_lock(&file->f_lock);
+		file->f_mode &= ~FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_RANDOM:
-		file->f_ra.ra_pages = 0;
+		spin_lock(&file->f_lock);
+		file->f_mode |= FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_SEQUENTIAL:
 		file->f_ra.ra_pages = bdi->ra_pages * 2;
+		spin_lock(&file->f_lock);
+		file->f_mode &= ~FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_WILLNEED:
 		if (!mapping->a_ops->readpage) {
diff --git a/mm/readahead.c b/mm/readahead.c
index 033bc135a41f..337b20e946f6 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -501,6 +501,12 @@ void page_cache_sync_readahead(struct address_space *mapping,
 	if (!ra->ra_pages)
 		return;
 
+	/* be dumb */
+	if (filp->f_mode & FMODE_RANDOM) {
+		force_page_cache_readahead(mapping, filp, offset, req_size);
+		return;
+	}
+
 	/* do read-ahead */
 	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
 }
-- 
cgit v1.2.3


From 19adf9c5d5793657118f2002237c0ee49c3b6185 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 5 Mar 2010 13:42:03 -0800
Subject: include/linux/fs.h: convert FMODE_* constants to hex

It was tolerable until Eric went and added 8388608.

Cc: Eric Paris <eparis@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index be87edcaba06..10b8dedcd18b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -60,24 +60,24 @@ struct inodes_stat_t {
  */
 
 /* file is open for reading */
-#define FMODE_READ		((__force fmode_t)1)
+#define FMODE_READ		((__force fmode_t)0x1)
 /* file is open for writing */
-#define FMODE_WRITE		((__force fmode_t)2)
+#define FMODE_WRITE		((__force fmode_t)0x2)
 /* file is seekable */
-#define FMODE_LSEEK		((__force fmode_t)4)
+#define FMODE_LSEEK		((__force fmode_t)0x4)
 /* file can be accessed using pread */
-#define FMODE_PREAD		((__force fmode_t)8)
+#define FMODE_PREAD		((__force fmode_t)0x8)
 /* file can be accessed using pwrite */
-#define FMODE_PWRITE		((__force fmode_t)16)
+#define FMODE_PWRITE		((__force fmode_t)0x10)
 /* File is opened for execution with sys_execve / sys_uselib */
-#define FMODE_EXEC		((__force fmode_t)32)
+#define FMODE_EXEC		((__force fmode_t)0x20)
 /* File is opened with O_NDELAY (only set for block devices) */
-#define FMODE_NDELAY		((__force fmode_t)64)
+#define FMODE_NDELAY		((__force fmode_t)0x40)
 /* File is opened with O_EXCL (only set for block devices) */
-#define FMODE_EXCL		((__force fmode_t)128)
+#define FMODE_EXCL		((__force fmode_t)0x80)
 /* File is opened using open(.., 3, ..) and is writeable only for ioctls
    (specialy hack for floppy.c) */
-#define FMODE_WRITE_IOCTL	((__force fmode_t)256)
+#define FMODE_WRITE_IOCTL	((__force fmode_t)0x100)
 
 /*
  * Don't update ctime and mtime.
@@ -85,10 +85,10 @@ struct inodes_stat_t {
  * Currently a special hack for the XFS open_by_handle ioctl, but we'll
  * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
  */
-#define FMODE_NOCMTIME		((__force fmode_t)2048)
+#define FMODE_NOCMTIME		((__force fmode_t)0x800)
 
 /* Expect random access pattern */
-#define FMODE_RANDOM		((__force fmode_t)4096)
+#define FMODE_RANDOM		((__force fmode_t)0x1000)
 
 /*
  * The below are the various read and write types that we support. Some of
-- 
cgit v1.2.3


From 5beb49305251e5669852ed541e8e2f2f7696c53e Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Fri, 5 Mar 2010 13:42:07 -0800
Subject: mm: change anon_vma linking to fix multi-process server scalability
 issue

The old anon_vma code can lead to scalability issues with heavily forking
workloads.  Specifically, each anon_vma will be shared between the parent
process and all its child processes.

In a workload with 1000 child processes and a VMA with 1000 anonymous
pages per process that get COWed, this leads to a system with a million
anonymous pages in the same anon_vma, each of which is mapped in just one
of the 1000 processes.  However, the current rmap code needs to walk them
all, leading to O(N) scanning complexity for each page.

This can result in systems where one CPU is walking the page tables of
1000 processes in page_referenced_one, while all other CPUs are stuck on
the anon_vma lock.  This leads to catastrophic failure for a benchmark
like AIM7, where the total number of processes can reach in the tens of
thousands.  Real workloads are still a factor 10 less process intensive
than AIM7, but they are catching up.

This patch changes the way anon_vmas and VMAs are linked, which allows us
to associate multiple anon_vmas with a VMA.  At fork time, each child
process gets its own anon_vmas, in which its COWed pages will be
instantiated.  The parents' anon_vma is also linked to the VMA, because
non-COWed pages could be present in any of the children.

This reduces rmap scanning complexity to O(1) for the pages of the 1000
child processes, with O(N) complexity for at most 1/N pages in the system.
 This reduces the average scanning cost in heavily forking workloads from
O(N) to 2.

The only real complexity in this patch stems from the fact that linking a
VMA to anon_vmas now involves memory allocations.  This means vma_adjust
can fail, if it needs to attach a VMA to anon_vma structures.  This in
turn means error handling needs to be added to the calling functions.

A second source of complexity is that, because there can be multiple
anon_vmas, the anon_vma linking in vma_adjust can no longer be done under
"the" anon_vma lock.  To prevent the rmap code from walking up an
incomplete VMA, this patch introduces the VM_LOCK_RMAP VMA flag.  This bit
flag uses the same slot as the NOMMU VM_MAPPED_COPY, with an ifdef in mm.h
to make sure it is impossible to compile a kernel that needs both symbolic
values for the same bitflag.

Some test results:

Without the anon_vma changes, when AIM7 hits around 9.7k users (on a test
box with 16GB RAM and not quite enough IO), the system ends up running
>99% in system time, with every CPU on the same anon_vma lock in the
pageout code.

With these changes, AIM7 hits the cross-over point around 29.7k users.
This happens with ~99% IO wait time, there never seems to be any spike in
system time.  The anon_vma lock contention appears to be resolved.

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/perfmon.c |   1 +
 arch/ia64/mm/init.c        |   2 +
 fs/exec.c                  |   6 +-
 include/linux/mm.h         |   6 +-
 include/linux/mm_types.h   |   3 +-
 include/linux/rmap.h       |  35 ++++++++--
 kernel/fork.c              |   6 +-
 mm/ksm.c                   |  12 +++-
 mm/memory-failure.c        |   5 +-
 mm/memory.c                |   4 +-
 mm/mmap.c                  | 138 +++++++++++++++++++++++++++------------
 mm/mremap.c                |   7 +-
 mm/nommu.c                 |   2 +-
 mm/rmap.c                  | 156 +++++++++++++++++++++++++++++++++++++--------
 14 files changed, 298 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index b81e46b1629b..703062c44fb9 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2315,6 +2315,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
 	}
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	/*
 	 * partially initialize the vma for the sampling buffer
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ca3335ea56cc..ed41759efcac 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -117,6 +117,7 @@ ia64_init_addr_space (void)
 	 */
 	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
+		INIT_LIST_HEAD(&vma->anon_vma_chain);
 		vma->vm_mm = current->mm;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
@@ -135,6 +136,7 @@ ia64_init_addr_space (void)
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
 		vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 		if (vma) {
+			INIT_LIST_HEAD(&vma->anon_vma_chain);
 			vma->vm_mm = current->mm;
 			vma->vm_end = PAGE_SIZE;
 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
diff --git a/fs/exec.c b/fs/exec.c
index ea7861727efd..591030735591 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -246,6 +246,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
 	vma->vm_flags = VM_STACK_FLAGS;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	err = insert_vm_struct(mm, vma);
 	if (err)
 		goto err;
@@ -516,7 +517,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 	/*
 	 * cover the whole range: [new_start, old_end)
 	 */
-	vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL);
+	if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
+		return -ENOMEM;
 
 	/*
 	 * move the page tables downwards, on failure we rely on
@@ -547,7 +549,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 	tlb_finish_mmu(tlb, new_end, old_end);
 
 	/*
-	 * shrink the vma to just the new range.
+	 * Shrink the vma to just the new range.  Always succeeds.
 	 */
 	vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e580c07d171..8e2841a2f441 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -97,7 +97,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
+#ifdef CONFIG_MMU
+#define VM_LOCK_RMAP	0x01000000	/* Do not follow this rmap (mmu mmap) */
+#else
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
+#endif
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
@@ -1216,7 +1220,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
 
 /* mmap.c */
 extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
-extern void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 19549d7275ab..048b46270aa5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -163,7 +163,8 @@ struct vm_area_struct {
 	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
 	 * or brk vma (with NULL file) can only be in an anon_vma list.
 	 */
-	struct list_head anon_vma_node;	/* Serialized by anon_vma->lock */
+	struct list_head anon_vma_chain; /* Serialized by mmap_sem &
+					  * page_table_lock */
 	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
 
 	/* Function pointers to deal with this struct. */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b019ae64e2ab..62da2001d55c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -37,7 +37,27 @@ struct anon_vma {
 	 * is serialized by a system wide lock only visible to
 	 * mm_take_all_locks() (mm_all_locks_mutex).
 	 */
-	struct list_head head;	/* List of private "related" vmas */
+	struct list_head head;	/* Chain of private "related" vmas */
+};
+
+/*
+ * The copy-on-write semantics of fork mean that an anon_vma
+ * can become associated with multiple processes. Furthermore,
+ * each child process will have its own anon_vma, where new
+ * pages for that process are instantiated.
+ *
+ * This structure allows us to find the anon_vmas associated
+ * with a VMA, or the VMAs associated with an anon_vma.
+ * The "same_vma" list contains the anon_vma_chains linking
+ * all the anon_vmas associated with this VMA.
+ * The "same_anon_vma" list contains the anon_vma_chains
+ * which link all the VMAs associated with this anon_vma.
+ */
+struct anon_vma_chain {
+	struct vm_area_struct *vma;
+	struct anon_vma *anon_vma;
+	struct list_head same_vma;   /* locked by mmap_sem & page_table_lock */
+	struct list_head same_anon_vma;	/* locked by anon_vma->lock */
 };
 
 #ifdef CONFIG_MMU
@@ -89,12 +109,19 @@ static inline void anon_vma_unlock(struct vm_area_struct *vma)
  */
 void anon_vma_init(void);	/* create anon_vma_cachep */
 int  anon_vma_prepare(struct vm_area_struct *);
-void __anon_vma_merge(struct vm_area_struct *, struct vm_area_struct *);
-void anon_vma_unlink(struct vm_area_struct *);
-void anon_vma_link(struct vm_area_struct *);
+void unlink_anon_vmas(struct vm_area_struct *);
+int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
+int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 void anon_vma_free(struct anon_vma *);
 
+static inline void anon_vma_merge(struct vm_area_struct *vma,
+				  struct vm_area_struct *next)
+{
+	VM_BUG_ON(vma->anon_vma != next->anon_vma);
+	unlink_anon_vmas(next);
+}
+
 /*
  * rmap interfaces called when adding or removing pte of page
  */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7616bcf107b9..bab7b254ad39 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -329,15 +329,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
+		INIT_LIST_HEAD(&tmp->anon_vma_chain);
 		pol = mpol_dup(vma_policy(mpnt));
 		retval = PTR_ERR(pol);
 		if (IS_ERR(pol))
 			goto fail_nomem_policy;
 		vma_set_policy(tmp, pol);
+		if (anon_vma_fork(tmp, mpnt))
+			goto fail_nomem_anon_vma_fork;
 		tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
 		tmp->vm_next = NULL;
-		anon_vma_link(tmp);
 		file = tmp->vm_file;
 		if (file) {
 			struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +394,8 @@ out:
 	flush_tlb_mm(oldmm);
 	up_write(&oldmm->mmap_sem);
 	return retval;
+fail_nomem_anon_vma_fork:
+	mpol_put(pol);
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
diff --git a/mm/ksm.c b/mm/ksm.c
index 56a0da1f9979..a93f1b7f508c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1563,10 +1563,12 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
 again:
 	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
 		spin_lock(&anon_vma->lock);
-		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+			vma = vmac->vma;
 			if (rmap_item->address < vma->vm_start ||
 			    rmap_item->address >= vma->vm_end)
 				continue;
@@ -1614,10 +1616,12 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
 again:
 	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
 		spin_lock(&anon_vma->lock);
-		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+			vma = vmac->vma;
 			if (rmap_item->address < vma->vm_start ||
 			    rmap_item->address >= vma->vm_end)
 				continue;
@@ -1664,10 +1668,12 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
 again:
 	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
 		spin_lock(&anon_vma->lock);
-		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+			vma = vmac->vma;
 			if (rmap_item->address < vma->vm_start ||
 			    rmap_item->address >= vma->vm_end)
 				continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 17299fd4577c..d1f335162976 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -383,9 +383,12 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 	if (av == NULL)	/* Not actually mapped anymore */
 		goto out;
 	for_each_process (tsk) {
+		struct anon_vma_chain *vmac;
+
 		if (!task_early_kill(tsk))
 			continue;
-		list_for_each_entry (vma, &av->head, anon_vma_node) {
+		list_for_each_entry(vmac, &av->head, same_anon_vma) {
+			vma = vmac->vma;
 			if (!page_mapped_in_vma(page, vma))
 				continue;
 			if (vma->vm_mm == tsk->mm)
diff --git a/mm/memory.c b/mm/memory.c
index 77d9f840936b..dc785b438d70 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -374,7 +374,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 * Hide vma from rmap and truncate_pagecache before freeing
 		 * pgtables
 		 */
-		anon_vma_unlink(vma);
+		unlink_anon_vmas(vma);
 		unlink_file_vma(vma);
 
 		if (is_vm_hugetlb_page(vma)) {
@@ -388,7 +388,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			       && !is_vm_hugetlb_page(next)) {
 				vma = next;
 				next = vma->vm_next;
-				anon_vma_unlink(vma);
+				unlink_anon_vmas(vma);
 				unlink_file_vma(vma);
 			}
 			free_pgd_range(tlb, addr, vma->vm_end,
diff --git a/mm/mmap.c b/mm/mmap.c
index 31656147128e..6a0c15db7f60 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -437,7 +437,6 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	__vma_link_list(mm, vma, prev, rb_parent);
 	__vma_link_rb(mm, vma, rb_link, rb_parent);
-	__anon_vma_link(vma);
 }
 
 static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -499,7 +498,7 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
  * are necessary.  The "insert" vma (if any) is to be inserted
  * before we drop the necessary locks.
  */
-void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -542,6 +541,28 @@ again:			remove_next = 1 + (end > next->vm_end);
 		}
 	}
 
+	/*
+	 * When changing only vma->vm_end, we don't really need anon_vma lock.
+	 */
+	if (vma->anon_vma && (insert || importer || start != vma->vm_start))
+		anon_vma = vma->anon_vma;
+	if (anon_vma) {
+		/*
+		 * Easily overlooked: when mprotect shifts the boundary,
+		 * make sure the expanding vma has anon_vma set if the
+		 * shrinking vma had, to cover any anon pages imported.
+		 */
+		if (importer && !importer->anon_vma) {
+			/* Block reverse map lookups until things are set up. */
+			importer->vm_flags |= VM_LOCK_RMAP;
+			if (anon_vma_clone(importer, vma)) {
+				importer->vm_flags &= ~VM_LOCK_RMAP;
+				return -ENOMEM;
+			}
+			importer->anon_vma = anon_vma;
+		}
+	}
+
 	if (file) {
 		mapping = file->f_mapping;
 		if (!(vma->vm_flags & VM_NONLINEAR))
@@ -567,25 +588,6 @@ again:			remove_next = 1 + (end > next->vm_end);
 		}
 	}
 
-	/*
-	 * When changing only vma->vm_end, we don't really need
-	 * anon_vma lock.
-	 */
-	if (vma->anon_vma && (insert || importer || start != vma->vm_start))
-		anon_vma = vma->anon_vma;
-	if (anon_vma) {
-		spin_lock(&anon_vma->lock);
-		/*
-		 * Easily overlooked: when mprotect shifts the boundary,
-		 * make sure the expanding vma has anon_vma set if the
-		 * shrinking vma had, to cover any anon pages imported.
-		 */
-		if (importer && !importer->anon_vma) {
-			importer->anon_vma = anon_vma;
-			__anon_vma_link(importer);
-		}
-	}
-
 	if (root) {
 		flush_dcache_mmap_lock(mapping);
 		vma_prio_tree_remove(vma, root);
@@ -616,8 +618,11 @@ again:			remove_next = 1 + (end > next->vm_end);
 		__vma_unlink(mm, next, vma);
 		if (file)
 			__remove_shared_vm_struct(next, file, mapping);
-		if (next->anon_vma)
-			__anon_vma_merge(vma, next);
+		/*
+		 * This VMA is now dead, no need for rmap to follow it.
+		 * Call anon_vma_merge below, outside of i_mmap_lock.
+		 */
+		next->vm_flags |= VM_LOCK_RMAP;
 	} else if (insert) {
 		/*
 		 * split_vma has split insert from vma, and needs
@@ -627,17 +632,25 @@ again:			remove_next = 1 + (end > next->vm_end);
 		__insert_vm_struct(mm, insert);
 	}
 
-	if (anon_vma)
-		spin_unlock(&anon_vma->lock);
 	if (mapping)
 		spin_unlock(&mapping->i_mmap_lock);
 
+	/*
+	 * The current VMA has been set up. It is now safe for the
+	 * rmap code to get from the pages to the ptes.
+	 */
+	if (anon_vma && importer)
+		importer->vm_flags &= ~VM_LOCK_RMAP;
+
 	if (remove_next) {
 		if (file) {
 			fput(file);
 			if (next->vm_flags & VM_EXECUTABLE)
 				removed_exe_file_vma(mm);
 		}
+		/* Protected by mmap_sem and VM_LOCK_RMAP. */
+		if (next->anon_vma)
+			anon_vma_merge(vma, next);
 		mm->map_count--;
 		mpol_put(vma_policy(next));
 		kmem_cache_free(vm_area_cachep, next);
@@ -653,6 +666,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 	}
 
 	validate_mm(mm);
+
+	return 0;
 }
 
 /*
@@ -759,6 +774,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 {
 	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
 	struct vm_area_struct *area, *next;
+	int err;
 
 	/*
 	 * We later require that vma->vm_flags == vm_flags,
@@ -792,11 +808,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 				is_mergeable_anon_vma(prev->anon_vma,
 						      next->anon_vma)) {
 							/* cases 1, 6 */
-			vma_adjust(prev, prev->vm_start,
+			err = vma_adjust(prev, prev->vm_start,
 				next->vm_end, prev->vm_pgoff, NULL);
 		} else					/* cases 2, 5, 7 */
-			vma_adjust(prev, prev->vm_start,
+			err = vma_adjust(prev, prev->vm_start,
 				end, prev->vm_pgoff, NULL);
+		if (err)
+			return NULL;
 		return prev;
 	}
 
@@ -808,11 +826,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 			can_vma_merge_before(next, vm_flags,
 					anon_vma, file, pgoff+pglen)) {
 		if (prev && addr < prev->vm_end)	/* case 4 */
-			vma_adjust(prev, prev->vm_start,
+			err = vma_adjust(prev, prev->vm_start,
 				addr, prev->vm_pgoff, NULL);
 		else					/* cases 3, 8 */
-			vma_adjust(area, addr, next->vm_end,
+			err = vma_adjust(area, addr, next->vm_end,
 				next->vm_pgoff - pglen, NULL);
+		if (err)
+			return NULL;
 		return area;
 	}
 
@@ -1205,6 +1225,7 @@ munmap_back:
 	vma->vm_flags = vm_flags;
 	vma->vm_page_prot = vm_get_page_prot(vm_flags);
 	vma->vm_pgoff = pgoff;
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	if (file) {
 		error = -EINVAL;
@@ -1865,6 +1886,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 {
 	struct mempolicy *pol;
 	struct vm_area_struct *new;
+	int err = -ENOMEM;
 
 	if (is_vm_hugetlb_page(vma) && (addr &
 					~(huge_page_mask(hstate_vma(vma)))))
@@ -1872,11 +1894,13 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!new)
-		return -ENOMEM;
+		goto out_err;
 
 	/* most fields are the same, copy all, and then fixup */
 	*new = *vma;
 
+	INIT_LIST_HEAD(&new->anon_vma_chain);
+
 	if (new_below)
 		new->vm_end = addr;
 	else {
@@ -1886,11 +1910,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	pol = mpol_dup(vma_policy(vma));
 	if (IS_ERR(pol)) {
-		kmem_cache_free(vm_area_cachep, new);
-		return PTR_ERR(pol);
+		err = PTR_ERR(pol);
+		goto out_free_vma;
 	}
 	vma_set_policy(new, pol);
 
+	if (anon_vma_clone(new, vma))
+		goto out_free_mpol;
+
 	if (new->vm_file) {
 		get_file(new->vm_file);
 		if (vma->vm_flags & VM_EXECUTABLE)
@@ -1901,12 +1928,28 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 		new->vm_ops->open(new);
 
 	if (new_below)
-		vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
+		err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
 			((addr - new->vm_start) >> PAGE_SHIFT), new);
 	else
-		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
-	return 0;
+	/* Success. */
+	if (!err)
+		return 0;
+
+	/* Clean everything up if vma_adjust failed. */
+	new->vm_ops->close(new);
+	if (new->vm_file) {
+		if (vma->vm_flags & VM_EXECUTABLE)
+			removed_exe_file_vma(mm);
+		fput(new->vm_file);
+	}
+ out_free_mpol:
+	mpol_put(pol);
+ out_free_vma:
+	kmem_cache_free(vm_area_cachep, new);
+ out_err:
+	return err;
 }
 
 /*
@@ -2116,6 +2159,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 		return -ENOMEM;
 	}
 
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
@@ -2252,10 +2296,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		if (new_vma) {
 			*new_vma = *vma;
 			pol = mpol_dup(vma_policy(vma));
-			if (IS_ERR(pol)) {
-				kmem_cache_free(vm_area_cachep, new_vma);
-				return NULL;
-			}
+			if (IS_ERR(pol))
+				goto out_free_vma;
+			INIT_LIST_HEAD(&new_vma->anon_vma_chain);
+			if (anon_vma_clone(new_vma, vma))
+				goto out_free_mempol;
 			vma_set_policy(new_vma, pol);
 			new_vma->vm_start = addr;
 			new_vma->vm_end = addr + len;
@@ -2271,6 +2316,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		}
 	}
 	return new_vma;
+
+ out_free_mempol:
+	mpol_put(pol);
+ out_free_vma:
+	kmem_cache_free(vm_area_cachep, new_vma);
+	return NULL;
 }
 
 /*
@@ -2348,6 +2399,7 @@ int install_special_mapping(struct mm_struct *mm,
 	if (unlikely(vma == NULL))
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
@@ -2448,6 +2500,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
 int mm_take_all_locks(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 	int ret = -EINTR;
 
 	BUG_ON(down_read_trylock(&mm->mmap_sem));
@@ -2465,7 +2518,8 @@ int mm_take_all_locks(struct mm_struct *mm)
 		if (signal_pending(current))
 			goto out_unlock;
 		if (vma->anon_vma)
-			vm_lock_anon_vma(mm, vma->anon_vma);
+			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+				vm_lock_anon_vma(mm, avc->anon_vma);
 	}
 
 	ret = 0;
@@ -2520,13 +2574,15 @@ static void vm_unlock_mapping(struct address_space *mapping)
 void mm_drop_all_locks(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 
 	BUG_ON(down_read_trylock(&mm->mmap_sem));
 	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
 
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (vma->anon_vma)
-			vm_unlock_anon_vma(vma->anon_vma);
+			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+				vm_unlock_anon_vma(avc->anon_vma);
 		if (vma->vm_file && vma->vm_file->f_mapping)
 			vm_unlock_mapping(vma->vm_file->f_mapping);
 	}
diff --git a/mm/mremap.c b/mm/mremap.c
index 4c4c803453f3..e9c75efce609 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -460,8 +460,11 @@ unsigned long do_mremap(unsigned long addr,
 		if (vma_expandable(vma, new_len - old_len)) {
 			int pages = (new_len - old_len) >> PAGE_SHIFT;
 
-			vma_adjust(vma, vma->vm_start,
-				addr + new_len, vma->vm_pgoff, NULL);
+			if (vma_adjust(vma, vma->vm_start, addr + new_len,
+				       vma->vm_pgoff, NULL)) {
+				ret = -ENOMEM;
+				goto out;
+			}
 
 			mm->total_vm += pages;
 			vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
diff --git a/mm/nommu.c b/mm/nommu.c
index 48a2ecfaf059..55727a74af98 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1209,7 +1209,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
-	INIT_LIST_HEAD(&vma->anon_vma_node);
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_flags = vm_flags;
 	vma->vm_pgoff = pgoff;
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 5cb47111f79e..be34094e4595 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -62,6 +62,7 @@
 #include "internal.h"
 
 static struct kmem_cache *anon_vma_cachep;
+static struct kmem_cache *anon_vma_chain_cachep;
 
 static inline struct anon_vma *anon_vma_alloc(void)
 {
@@ -73,6 +74,16 @@ void anon_vma_free(struct anon_vma *anon_vma)
 	kmem_cache_free(anon_vma_cachep, anon_vma);
 }
 
+static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
+{
+	return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
+}
+
+void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
+{
+	kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
+}
+
 /**
  * anon_vma_prepare - attach an anon_vma to a memory region
  * @vma: the memory region in question
@@ -103,18 +114,23 @@ void anon_vma_free(struct anon_vma *anon_vma)
 int anon_vma_prepare(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
+	struct anon_vma_chain *avc;
 
 	might_sleep();
 	if (unlikely(!anon_vma)) {
 		struct mm_struct *mm = vma->vm_mm;
 		struct anon_vma *allocated;
 
+		avc = anon_vma_chain_alloc();
+		if (!avc)
+			goto out_enomem;
+
 		anon_vma = find_mergeable_anon_vma(vma);
 		allocated = NULL;
 		if (!anon_vma) {
 			anon_vma = anon_vma_alloc();
 			if (unlikely(!anon_vma))
-				return -ENOMEM;
+				goto out_enomem_free_avc;
 			allocated = anon_vma;
 		}
 		spin_lock(&anon_vma->lock);
@@ -123,53 +139,113 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 		spin_lock(&mm->page_table_lock);
 		if (likely(!vma->anon_vma)) {
 			vma->anon_vma = anon_vma;
-			list_add_tail(&vma->anon_vma_node, &anon_vma->head);
+			avc->anon_vma = anon_vma;
+			avc->vma = vma;
+			list_add(&avc->same_vma, &vma->anon_vma_chain);
+			list_add(&avc->same_anon_vma, &anon_vma->head);
 			allocated = NULL;
 		}
 		spin_unlock(&mm->page_table_lock);
 
 		spin_unlock(&anon_vma->lock);
-		if (unlikely(allocated))
+		if (unlikely(allocated)) {
 			anon_vma_free(allocated);
+			anon_vma_chain_free(avc);
+		}
 	}
 	return 0;
+
+ out_enomem_free_avc:
+	anon_vma_chain_free(avc);
+ out_enomem:
+	return -ENOMEM;
 }
 
-void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
+static void anon_vma_chain_link(struct vm_area_struct *vma,
+				struct anon_vma_chain *avc,
+				struct anon_vma *anon_vma)
 {
-	BUG_ON(vma->anon_vma != next->anon_vma);
-	list_del(&next->anon_vma_node);
+	avc->vma = vma;
+	avc->anon_vma = anon_vma;
+	list_add(&avc->same_vma, &vma->anon_vma_chain);
+
+	spin_lock(&anon_vma->lock);
+	list_add_tail(&avc->same_anon_vma, &anon_vma->head);
+	spin_unlock(&anon_vma->lock);
 }
 
-void __anon_vma_link(struct vm_area_struct *vma)
+/*
+ * Attach the anon_vmas from src to dst.
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 {
-	struct anon_vma *anon_vma = vma->anon_vma;
+	struct anon_vma_chain *avc, *pavc;
+
+	list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
+		avc = anon_vma_chain_alloc();
+		if (!avc)
+			goto enomem_failure;
+		anon_vma_chain_link(dst, avc, pavc->anon_vma);
+	}
+	return 0;
 
-	if (anon_vma)
-		list_add_tail(&vma->anon_vma_node, &anon_vma->head);
+ enomem_failure:
+	unlink_anon_vmas(dst);
+	return -ENOMEM;
 }
 
-void anon_vma_link(struct vm_area_struct *vma)
+/*
+ * Attach vma to its own anon_vma, as well as to the anon_vmas that
+ * the corresponding VMA in the parent process is attached to.
+ * Returns 0 on success, non-zero on failure.
+ */
+int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 {
-	struct anon_vma *anon_vma = vma->anon_vma;
+	struct anon_vma_chain *avc;
+	struct anon_vma *anon_vma;
 
-	if (anon_vma) {
-		spin_lock(&anon_vma->lock);
-		list_add_tail(&vma->anon_vma_node, &anon_vma->head);
-		spin_unlock(&anon_vma->lock);
-	}
+	/* Don't bother if the parent process has no anon_vma here. */
+	if (!pvma->anon_vma)
+		return 0;
+
+	/*
+	 * First, attach the new VMA to the parent VMA's anon_vmas,
+	 * so rmap can find non-COWed pages in child processes.
+	 */
+	if (anon_vma_clone(vma, pvma))
+		return -ENOMEM;
+
+	/* Then add our own anon_vma. */
+	anon_vma = anon_vma_alloc();
+	if (!anon_vma)
+		goto out_error;
+	avc = anon_vma_chain_alloc();
+	if (!avc)
+		goto out_error_free_anon_vma;
+	anon_vma_chain_link(vma, avc, anon_vma);
+	/* Mark this anon_vma as the one where our new (COWed) pages go. */
+	vma->anon_vma = anon_vma;
+
+	return 0;
+
+ out_error_free_anon_vma:
+	anon_vma_free(anon_vma);
+ out_error:
+	return -ENOMEM;
 }
 
-void anon_vma_unlink(struct vm_area_struct *vma)
+static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
 {
-	struct anon_vma *anon_vma = vma->anon_vma;
+	struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
 	int empty;
 
+	/* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
 	if (!anon_vma)
 		return;
 
 	spin_lock(&anon_vma->lock);
-	list_del(&vma->anon_vma_node);
+	list_del(&anon_vma_chain->same_anon_vma);
 
 	/* We must garbage collect the anon_vma if it's empty */
 	empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
@@ -179,6 +255,18 @@ void anon_vma_unlink(struct vm_area_struct *vma)
 		anon_vma_free(anon_vma);
 }
 
+void unlink_anon_vmas(struct vm_area_struct *vma)
+{
+	struct anon_vma_chain *avc, *next;
+
+	/* Unlink each anon_vma chained to the VMA. */
+	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
+		anon_vma_unlink(avc);
+		list_del(&avc->same_vma);
+		anon_vma_chain_free(avc);
+	}
+}
+
 static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
@@ -192,6 +280,7 @@ void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
 			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
+	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
 }
 
 /*
@@ -240,6 +329,18 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 		/* page should be within @vma mapping range */
 		return -EFAULT;
 	}
+	if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) {
+		/*
+		 * This VMA is being unlinked or is not yet linked into the
+		 * VMA tree.  Do not try to follow this rmap.  This race
+		 * condition can result in page_referenced() ignoring a
+		 * reference or in try_to_unmap() failing to unmap a page.
+		 * The VMA cannot be freed under us because we hold the
+		 * anon_vma->lock, which the munmap code takes while
+		 * unlinking the anon_vmas from the VMA.
+		 */
+		return -EFAULT;
+	}
 	return address;
 }
 
@@ -396,7 +497,7 @@ static int page_referenced_anon(struct page *page,
 {
 	unsigned int mapcount;
 	struct anon_vma *anon_vma;
-	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 	int referenced = 0;
 
 	anon_vma = page_lock_anon_vma(page);
@@ -404,7 +505,8 @@ static int page_referenced_anon(struct page *page,
 		return referenced;
 
 	mapcount = page_mapcount(page);
-	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
@@ -1025,14 +1127,15 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 {
 	struct anon_vma *anon_vma;
-	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
 	anon_vma = page_lock_anon_vma(page);
 	if (!anon_vma)
 		return ret;
 
-	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
@@ -1223,7 +1326,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 		struct vm_area_struct *, unsigned long, void *), void *arg)
 {
 	struct anon_vma *anon_vma;
-	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
 	/*
@@ -1238,7 +1341,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 	if (!anon_vma)
 		return ret;
 	spin_lock(&anon_vma->lock);
-	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
-- 
cgit v1.2.3


From c44b674323f4a2480dbeb65d4b487fa5f06f49e0 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Fri, 5 Mar 2010 13:42:09 -0800
Subject: rmap: move exclusively owned pages to own anon_vma in do_wp_page()

When the parent process breaks the COW on a page, both the original which
is mapped at child and the new page which is mapped parent end up in that
same anon_vma.  Generally this won't be a problem, but for some workloads
it could preserve the O(N) rmap scanning complexity.

A simple fix is to ensure that, when a page which is mapped child gets
reused in do_wp_page, because we already are the exclusive owner, the page
gets moved to our own exclusive child's anon_vma.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  1 +
 mm/memory.c          |  7 +++++++
 mm/rmap.c            | 24 ++++++++++++++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 62da2001d55c..72be23b1480a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -125,6 +125,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,
 /*
  * rmap interfaces called when adding or removing pte of page
  */
+void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
diff --git a/mm/memory.c b/mm/memory.c
index dc785b438d70..d1153e37e9ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2138,6 +2138,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			page_cache_release(old_page);
 		}
 		reuse = reuse_swap_page(old_page);
+		if (reuse)
+			/*
+			 * The page is all ours.  Move it to our anon_vma so
+			 * the rmap code will not search our parent or siblings.
+			 * Protected against the rmap code by the page lock.
+			 */
+			page_move_anon_rmap(old_page, vma, address);
 		unlock_page(old_page);
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {
diff --git a/mm/rmap.c b/mm/rmap.c
index 23ecd0a892df..28bcdc433d88 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -715,6 +715,30 @@ int page_mkclean(struct page *page)
 }
 EXPORT_SYMBOL_GPL(page_mkclean);
 
+/**
+ * page_move_anon_rmap - move a page to our anon_vma
+ * @page:	the page to move to our anon_vma
+ * @vma:	the vma the page belongs to
+ * @address:	the user virtual address mapped
+ *
+ * When a page belongs exclusively to one process after a COW event,
+ * that page can be moved into the anon_vma that belongs to just that
+ * process, so the rmap code will not search the parent or sibling
+ * processes.
+ */
+void page_move_anon_rmap(struct page *page,
+	struct vm_area_struct *vma, unsigned long address)
+{
+	struct anon_vma *anon_vma = vma->anon_vma;
+
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(!anon_vma);
+	VM_BUG_ON(page->index != linear_page_index(vma, address));
+
+	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
+	page->mapping = (struct address_space *) anon_vma;
+}
+
 /**
  * __page_set_anon_rmap - setup new anonymous rmap
  * @page:	the page to add the mapping to
-- 
cgit v1.2.3


From fc148a5f7e0532750c312385c7ee9fa3e9311f34 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Fri, 5 Mar 2010 13:42:10 -0800
Subject: mm: remove VM_LOCK_RMAP code

When a VMA is in an inconsistent state during setup or teardown, the worst
that can happen is that the rmap code will not be able to find the page.

The mapping is in the process of being torn down (PTEs just got
invalidated by munmap), or set up (no PTEs have been instantiated yet).

It is also impossible for the rmap code to follow a pointer to an already
freed VMA, because the rmap code holds the anon_vma->lock, which the VMA
teardown code needs to take before the VMA is removed from the anon_vma
chain.

Hence, we should not need the VM_LOCK_RMAP locking at all.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  4 ----
 mm/mmap.c          | 15 ---------------
 mm/rmap.c          | 12 ------------
 3 files changed, 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e2841a2f441..3899395a03de 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -97,11 +97,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
-#ifdef CONFIG_MMU
-#define VM_LOCK_RMAP	0x01000000	/* Do not follow this rmap (mmu mmap) */
-#else
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
-#endif
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 6a0c15db7f60..f1b4448626bf 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -554,9 +554,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		 */
 		if (importer && !importer->anon_vma) {
 			/* Block reverse map lookups until things are set up. */
-			importer->vm_flags |= VM_LOCK_RMAP;
 			if (anon_vma_clone(importer, vma)) {
-				importer->vm_flags &= ~VM_LOCK_RMAP;
 				return -ENOMEM;
 			}
 			importer->anon_vma = anon_vma;
@@ -618,11 +616,6 @@ again:			remove_next = 1 + (end > next->vm_end);
 		__vma_unlink(mm, next, vma);
 		if (file)
 			__remove_shared_vm_struct(next, file, mapping);
-		/*
-		 * This VMA is now dead, no need for rmap to follow it.
-		 * Call anon_vma_merge below, outside of i_mmap_lock.
-		 */
-		next->vm_flags |= VM_LOCK_RMAP;
 	} else if (insert) {
 		/*
 		 * split_vma has split insert from vma, and needs
@@ -635,20 +628,12 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (mapping)
 		spin_unlock(&mapping->i_mmap_lock);
 
-	/*
-	 * The current VMA has been set up. It is now safe for the
-	 * rmap code to get from the pages to the ptes.
-	 */
-	if (anon_vma && importer)
-		importer->vm_flags &= ~VM_LOCK_RMAP;
-
 	if (remove_next) {
 		if (file) {
 			fput(file);
 			if (next->vm_flags & VM_EXECUTABLE)
 				removed_exe_file_vma(mm);
 		}
-		/* Protected by mmap_sem and VM_LOCK_RMAP. */
 		if (next->anon_vma)
 			anon_vma_merge(vma, next);
 		mm->map_count--;
diff --git a/mm/rmap.c b/mm/rmap.c
index 28bcdc433d88..4d2fb93851ca 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -329,18 +329,6 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 		/* page should be within @vma mapping range */
 		return -EFAULT;
 	}
-	if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) {
-		/*
-		 * This VMA is being unlinked or is not yet linked into the
-		 * VMA tree.  Do not try to follow this rmap.  This race
-		 * condition can result in page_referenced() ignoring a
-		 * reference or in try_to_unmap() failing to unmap a page.
-		 * The VMA cannot be freed under us because we hold the
-		 * anon_vma->lock, which the munmap code takes while
-		 * unlinking the anon_vmas from the VMA.
-		 */
-		return -EFAULT;
-	}
 	return address;
 }
 
-- 
cgit v1.2.3


From 452aa6999e6703ffbddd7f6ea124d3968915f3e3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 5 Mar 2010 13:42:13 -0800
Subject: mm/pm: force GFP_NOIO during suspend/hibernation and resume

There are quite a few GFP_KERNEL memory allocations made during
suspend/hibernation and resume that may cause the system to hang, because
the I/O operations they depend on cannot be completed due to the
underlying devices being suspended.

Avoid this problem by clearing the __GFP_IO and __GFP_FS bits in
gfp_allowed_mask before suspend/hibernation and restoring the original
values of these bits in gfp_allowed_mask durig the subsequent resume.

[akpm@linux-foundation.org: fix CONFIG_PM=n linkage]
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reported-by: Maxim Levitsky <maximlevitsky@gmail.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h      |  7 +++----
 init/main.c              |  2 +-
 kernel/power/hibernate.c |  9 +++++++++
 kernel/power/suspend.c   |  3 +++
 mm/page_alloc.c          | 25 +++++++++++++++++++++++++
 5 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e5567e6762f3..2e1b32c0484d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -83,6 +83,7 @@ struct vm_area_struct;
 #define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 				 __GFP_HARDWALL | __GFP_HIGHMEM | \
 				 __GFP_MOVABLE)
+#define GFP_IOFS	(__GFP_IO | __GFP_FS)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
@@ -337,9 +338,7 @@ void drain_local_pages(void *dummy);
 
 extern gfp_t gfp_allowed_mask;
 
-static inline void set_gfp_allowed_mask(gfp_t mask)
-{
-	gfp_allowed_mask = mask;
-}
+extern void set_gfp_allowed_mask(gfp_t mask);
+extern gfp_t clear_gfp_allowed_mask(gfp_t mask);
 
 #endif /* __LINUX_GFP_H */
diff --git a/init/main.c b/init/main.c
index 40aaa020cd68..41d0f10dbbc7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -618,7 +618,7 @@ asmlinkage void __init start_kernel(void)
 	local_irq_enable();
 
 	/* Interrupts are enabled now so all GFP allocations are safe. */
-	set_gfp_allowed_mask(__GFP_BITS_MASK);
+	gfp_allowed_mask = __GFP_BITS_MASK;
 
 	kmem_cache_init_late();
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index bbfe472d7524..da5288ec2392 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -323,6 +323,7 @@ static int create_image(int platform_mode)
 int hibernation_snapshot(int platform_mode)
 {
 	int error;
+	gfp_t saved_mask;
 
 	error = platform_begin(platform_mode);
 	if (error)
@@ -334,6 +335,7 @@ int hibernation_snapshot(int platform_mode)
 		goto Close;
 
 	suspend_console();
+	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	error = dpm_suspend_start(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
@@ -351,6 +353,7 @@ int hibernation_snapshot(int platform_mode)
 
 	dpm_resume_end(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+	set_gfp_allowed_mask(saved_mask);
 	resume_console();
  Close:
 	platform_end(platform_mode);
@@ -445,14 +448,17 @@ static int resume_target_kernel(bool platform_mode)
 int hibernation_restore(int platform_mode)
 {
 	int error;
+	gfp_t saved_mask;
 
 	pm_prepare_console();
 	suspend_console();
+	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	error = dpm_suspend_start(PMSG_QUIESCE);
 	if (!error) {
 		error = resume_target_kernel(platform_mode);
 		dpm_resume_end(PMSG_RECOVER);
 	}
+	set_gfp_allowed_mask(saved_mask);
 	resume_console();
 	pm_restore_console();
 	return error;
@@ -466,6 +472,7 @@ int hibernation_restore(int platform_mode)
 int hibernation_platform_enter(void)
 {
 	int error;
+	gfp_t saved_mask;
 
 	if (!hibernation_ops)
 		return -ENOSYS;
@@ -481,6 +488,7 @@ int hibernation_platform_enter(void)
 
 	entering_platform_hibernation = true;
 	suspend_console();
+	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	error = dpm_suspend_start(PMSG_HIBERNATE);
 	if (error) {
 		if (hibernation_ops->recover)
@@ -518,6 +526,7 @@ int hibernation_platform_enter(void)
  Resume_devices:
 	entering_platform_hibernation = false;
 	dpm_resume_end(PMSG_RESTORE);
+	set_gfp_allowed_mask(saved_mask);
 	resume_console();
 
  Close:
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 6f10dfc2d3e9..44cce10b582d 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -189,6 +189,7 @@ static int suspend_enter(suspend_state_t state)
 int suspend_devices_and_enter(suspend_state_t state)
 {
 	int error;
+	gfp_t saved_mask;
 
 	if (!suspend_ops)
 		return -ENOSYS;
@@ -199,6 +200,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 			goto Close;
 	}
 	suspend_console();
+	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	suspend_test_start();
 	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
@@ -215,6 +217,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 	suspend_test_start();
 	dpm_resume_end(PMSG_RESUME);
 	suspend_test_finish("resume devices");
+	set_gfp_allowed_mask(saved_mask);
 	resume_console();
  Close:
 	if (suspend_ops->end)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0734bedabd9c..298f307c63a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -76,6 +76,31 @@ unsigned long totalreserve_pages __read_mostly;
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
+#ifdef CONFIG_PM_SLEEP
+/*
+ * The following functions are used by the suspend/hibernate code to temporarily
+ * change gfp_allowed_mask in order to avoid using I/O during memory allocations
+ * while devices are suspended.  To avoid races with the suspend/hibernate code,
+ * they should always be called with pm_mutex held (gfp_allowed_mask also should
+ * only be modified with pm_mutex held, unless the suspend/hibernate code is
+ * guaranteed not to run in parallel with that modification).
+ */
+void set_gfp_allowed_mask(gfp_t mask)
+{
+	WARN_ON(!mutex_is_locked(&pm_mutex));
+	gfp_allowed_mask = mask;
+}
+
+gfp_t clear_gfp_allowed_mask(gfp_t mask)
+{
+	gfp_t ret = gfp_allowed_mask;
+
+	WARN_ON(!mutex_is_locked(&pm_mutex));
+	gfp_allowed_mask &= ~mask;
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 int pageblock_order __read_mostly;
 #endif
-- 
cgit v1.2.3


From 645747462435d84c6c6a64269ed49cc3015f753d Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 5 Mar 2010 13:42:22 -0800
Subject: vmscan: detect mapped file pages used only once

The VM currently assumes that an inactive, mapped and referenced file page
is in use and promotes it to the active list.

However, every mapped file page starts out like this and thus a problem
arises when workloads create a stream of such pages that are used only for
a short time.  By flooding the active list with those pages, the VM
quickly gets into trouble finding eligible reclaim canditates.  The result
is long allocation latencies and eviction of the wrong pages.

This patch reuses the PG_referenced page flag (used for unmapped file
pages) to implement a usage detection that scales with the speed of LRU
list cycling (i.e.  memory pressure).

If the scanner encounters those pages, the flag is set and the page cycled
again on the inactive list.  Only if it returns with another page table
reference it is activated.  Otherwise it is reclaimed as 'not recently
used cache'.

This effectively changes the minimum lifetime of a used-once mapped file
page from a full memory cycle to an inactive list cycle, which allows it
to occur in linear streams without affecting the stable working set of the
system.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: OSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  2 +-
 mm/rmap.c            |  3 ---
 mm/vmscan.c          | 45 +++++++++++++++++++++++++++++++++++----------
 3 files changed, 36 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 72be23b1480a..d25bd224d370 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -209,7 +209,7 @@ static inline int page_referenced(struct page *page, int is_locked,
 				  unsigned long *vm_flags)
 {
 	*vm_flags = 0;
-	return TestClearPageReferenced(page);
+	return 0;
 }
 
 #define try_to_unmap(page, refs) SWAP_FAIL
diff --git a/mm/rmap.c b/mm/rmap.c
index 4d2fb93851ca..fcd593c9c997 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -601,9 +601,6 @@ int page_referenced(struct page *page,
 	int referenced = 0;
 	int we_locked = 0;
 
-	if (TestClearPageReferenced(page))
-		referenced++;
-
 	*vm_flags = 0;
 	if (page_mapped(page) && page_rmapping(page)) {
 		if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d9a0e0d3aac7..79c809895fba 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -561,18 +561,18 @@ redo:
 enum page_references {
 	PAGEREF_RECLAIM,
 	PAGEREF_RECLAIM_CLEAN,
+	PAGEREF_KEEP,
 	PAGEREF_ACTIVATE,
 };
 
 static enum page_references page_check_references(struct page *page,
 						  struct scan_control *sc)
 {
+	int referenced_ptes, referenced_page;
 	unsigned long vm_flags;
-	int referenced;
 
-	referenced = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
-	if (!referenced)
-		return PAGEREF_RECLAIM;
+	referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
+	referenced_page = TestClearPageReferenced(page);
 
 	/* Lumpy reclaim - ignore references */
 	if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
@@ -585,11 +585,36 @@ static enum page_references page_check_references(struct page *page,
 	if (vm_flags & VM_LOCKED)
 		return PAGEREF_RECLAIM;
 
-	if (page_mapped(page))
-		return PAGEREF_ACTIVATE;
+	if (referenced_ptes) {
+		if (PageAnon(page))
+			return PAGEREF_ACTIVATE;
+		/*
+		 * All mapped pages start out with page table
+		 * references from the instantiating fault, so we need
+		 * to look twice if a mapped file page is used more
+		 * than once.
+		 *
+		 * Mark it and spare it for another trip around the
+		 * inactive list.  Another page table reference will
+		 * lead to its activation.
+		 *
+		 * Note: the mark is set for activated pages as well
+		 * so that recently deactivated but used pages are
+		 * quickly recovered.
+		 */
+		SetPageReferenced(page);
+
+		if (referenced_page)
+			return PAGEREF_ACTIVATE;
+
+		return PAGEREF_KEEP;
+	}
 
 	/* Reclaim if clean, defer dirty pages to writeback */
-	return PAGEREF_RECLAIM_CLEAN;
+	if (referenced_page)
+		return PAGEREF_RECLAIM_CLEAN;
+
+	return PAGEREF_RECLAIM;
 }
 
 /*
@@ -657,6 +682,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		switch (references) {
 		case PAGEREF_ACTIVATE:
 			goto activate_locked;
+		case PAGEREF_KEEP:
+			goto keep_locked;
 		case PAGEREF_RECLAIM:
 		case PAGEREF_RECLAIM_CLEAN:
 			; /* try to reclaim the page below */
@@ -1359,9 +1386,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			continue;
 		}
 
-		/* page_referenced clears PageReferenced */
-		if (page_mapped(page) &&
-		    page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
+		if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
 			nr_rotated++;
 			/*
 			 * Identify referenced, file-backed active pages and
-- 
cgit v1.2.3


From 478352e789f507105193d3d0177c3b4f26da0399 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 5 Mar 2010 13:42:23 -0800
Subject: mm: add comment about deprecation of __GFP_NOFAIL

__GFP_NOFAIL was deprecated in dab48dab, so add a comment that no new
users should be added.

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2e1b32c0484d..4c6d41333f98 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,7 +30,8 @@ struct vm_area_struct;
  * _might_ fail.  This depends upon the particular VM implementation.
  *
  * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- * cannot handle allocation failures.
+ * cannot handle allocation failures.  This modifier is deprecated and no new
+ * users should be added.
  *
  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
  *
-- 
cgit v1.2.3


From 221e3ebf6d5f2625373573155924e39f196c5d3d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 5 Mar 2010 13:42:41 -0800
Subject: cpumask: let num_*_cpus() function always return unsigned values

Dependent on CONFIG_SMP the num_*_cpus() functions return unsigned or
signed values.  Let them always return unsigned values to avoid strange
casts.

Fixes at least one warning:

 kernel/kprobes.c: In function 'register_kretprobe':
 kernel/kprobes.c:1038: warning: comparison of distinct pointer types lacks a cast

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index dbcee7647d9a..bae6fe24d1f9 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -90,10 +90,10 @@ extern const struct cpumask *const cpu_active_mask;
 #define cpu_present(cpu)	cpumask_test_cpu((cpu), cpu_present_mask)
 #define cpu_active(cpu)		cpumask_test_cpu((cpu), cpu_active_mask)
 #else
-#define num_online_cpus()	1
-#define num_possible_cpus()	1
-#define num_present_cpus()	1
-#define num_active_cpus()	1
+#define num_online_cpus()	1U
+#define num_possible_cpus()	1U
+#define num_present_cpus()	1U
+#define num_active_cpus()	1U
 #define cpu_online(cpu)		((cpu) == 0)
 #define cpu_possible(cpu)	((cpu) == 0)
 #define cpu_present(cpu)	((cpu) == 0)
-- 
cgit v1.2.3


From 72c3368856c543ace033f6a5b9a3edf1f4043236 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Fri, 5 Mar 2010 13:42:43 -0800
Subject: nodemask.h: remove macro any_online_node

The macro any_online_node() is prone to producing sparse warnings due to
the local symbol 'node'.  Since all the in-tree users are really
requesting the first online node (the mask argument is either
NODE_MASK_ALL or node_online_map) just use the first_online_node macro and
remove the any_online_node macro since there are no users.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Milton Miller <miltonm@bga.com>
Cc: Nathan Fontenot <nfont@austin.ibm.com>
Cc: Geoff Levand <geoffrey.levand@am.sony.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Benny Halevy <bhalevy@panasas.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/numa.c   |  6 +++---
 include/linux/nodemask.h | 11 -----------
 net/sunrpc/svc.c         |  2 +-
 3 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b037d95eeadc..64c00227b997 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -451,7 +451,7 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu)
 	nid = of_node_to_nid_single(cpu);
 
 	if (nid < 0 || !node_online(nid))
-		nid = any_online_node(NODE_MASK_ALL);
+		nid = first_online_node;
 out:
 	map_cpu_to_node(lcpu, nid);
 
@@ -1114,7 +1114,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 	int nid, found = 0;
 
 	if (!numa_enabled || (min_common_depth < 0))
-		return any_online_node(NODE_MASK_ALL);
+		return first_online_node;
 
 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (memory) {
@@ -1125,7 +1125,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 	}
 
 	if (nid < 0 || !node_online(nid))
-		nid = any_online_node(NODE_MASK_ALL);
+		nid = first_online_node;
 
 	if (NODE_DATA(nid)->node_spanned_pages)
 		return nid;
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 454997cccbd8..c4fa64b585ff 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -69,8 +69,6 @@
  * int node_online(node)		Is some node online?
  * int node_possible(node)		Is some node possible?
  *
- * int any_online_node(mask)		First online node in mask
- *
  * node_set_online(node)		set bit 'node' in node_online_map
  * node_set_offline(node)		clear bit 'node' in node_online_map
  *
@@ -467,15 +465,6 @@ static inline int num_node_state(enum node_states state)
 #define node_online_map 	node_states[N_ONLINE]
 #define node_possible_map 	node_states[N_POSSIBLE]
 
-#define any_online_node(mask)			\
-({						\
-	int node;				\
-	for_each_node_mask(node, (mask))	\
-		if (node_online(node))		\
-			break;			\
-	node;					\
-})
-
 #define num_online_nodes()	num_node_state(N_ONLINE)
 #define num_possible_nodes()	num_node_state(N_POSSIBLE)
 #define node_online(node)	node_state((node), N_ONLINE)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6dcf8c9c784c..8420a4205b76 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -133,7 +133,7 @@ svc_pool_map_choose_mode(void)
 		return SVC_POOL_PERNODE;
 	}
 
-	node = any_online_node(node_online_map);
+	node = first_online_node;
 	if (nr_cpus_node(node) > 2) {
 		/*
 		 * Non-trivial SMP, or CONFIG_NUMA on
-- 
cgit v1.2.3


From cfd8d6c0ed89ba387609419e3d8d4c6b92a5d446 Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Fri, 5 Mar 2010 13:42:45 -0800
Subject: smp: fix documentation in include/linux/smp.h

smp: Fix documentation.

Fix documentation in include/linux/smp.h: smp_processor_id()

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 7a0570e6a596..cfa2d20e35f1 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -154,7 +154,7 @@ smp_call_function_any(const struct cpumask *mask, void (*func)(void *info),
 /*
  * smp_processor_id(): get the current CPU ID.
  *
- * if DEBUG_PREEMPT is enabled the we check whether it is
+ * if DEBUG_PREEMPT is enabled then we check whether it is
  * used in a preemption-safe way. (smp_processor_id() is safe
  * if it's used in a preemption-off critical section, or in
  * a thread that is bound to the current CPU.)
-- 
cgit v1.2.3


From 9a86e2bad0b9fbf3290ae496da6dab9536dd6bf7 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 5 Mar 2010 13:43:17 -0800
Subject: lib: fix first line of kernel-doc for a few functions

The function name must be followed by a space, hypen, space, and a short
description.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 6 +++---
 lib/bitmap.c         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 5d9c6558e8ab..8392884a2977 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -498,7 +498,7 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
 
 /**
- * list_for_each_entry_safe_continue
+ * list_for_each_entry_safe_continue - continue list iteration safe against removal
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
@@ -514,7 +514,7 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
 
 /**
- * list_for_each_entry_safe_from
+ * list_for_each_entry_safe_from - iterate over list from current point safe against removal
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
@@ -529,7 +529,7 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
 
 /**
- * list_for_each_entry_safe_reverse
+ * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 11bf49750583..61998c5924fe 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -487,7 +487,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
 EXPORT_SYMBOL(__bitmap_parse);
 
 /**
- * bitmap_parse_user()
+ * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
  *
  * @ubuf: pointer to user buffer containing string.
  * @ulen: buffer size in bytes.  If string is smaller than this
@@ -619,7 +619,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 EXPORT_SYMBOL(bitmap_parselist);
 
 /**
- * bitmap_pos_to_ord(buf, pos, bits)
+ * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
  *	@pos: a bit position in @buf (0 <= @pos < @bits)
  *	@bits: number of valid bit positions in @buf
@@ -655,7 +655,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
 }
 
 /**
- * bitmap_ord_to_pos(buf, ord, bits)
+ * bitmap_ord_to_pos - find position of n-th set bit in bitmap
  *	@buf: pointer to bitmap
  *	@ord: ordinal bit position (n-th set bit, n >= 0)
  *	@bits: number of valid bit positions in @buf
-- 
cgit v1.2.3


From 3fb7fb4a01d09f81d1daaf65e52d929734bd691f Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Fri, 5 Mar 2010 13:43:25 -0800
Subject: sdio: add quirk to clamp byte mode transfer

Some SDIO cards expect byte transfers not to exceed the configured block
transfer size.  Add a quirk to that effect.

Patches to make use of this quirk will be sent separately.

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio_io.c | 7 ++++++-
 include/linux/mmc/card.h   | 7 +++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index f9aa8a7deffa..87c618904ee2 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -189,7 +189,12 @@ static inline unsigned int sdio_max_byte_size(struct sdio_func *func)
 {
 	unsigned mval =	min(func->card->host->max_seg_size,
 			    func->card->host->max_blk_size);
-	mval = min(mval, func->max_blksize);
+
+	if (mmc_blksz_for_byte_mode(func->card))
+		mval = min(mval, func->cur_blksize);
+	else
+		mval = min(mval, func->max_blksize);
+
 	return min(mval, 512u); /* maximum size for byte mode */
 }
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 2ee22e8af110..d02d2c6e0cfe 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -99,6 +99,8 @@ struct mmc_card {
 #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
+#define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
+						/* for byte mode */
 
 	u32			raw_cid[4];	/* raw card CID */
 	u32			raw_csd[4];	/* raw card CSD */
@@ -139,6 +141,11 @@ static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_LENIENT_FN0;
 }
 
+static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-- 
cgit v1.2.3


From da68c4eb258cd9f3f0b8aeb7e46b8118bb6358b6 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Fri, 5 Mar 2010 13:43:31 -0800
Subject: sdio: introduce API for special power management features

This patch series provides the core changes needed to allow SDIO cards to
remain powered and active while the host system is suspended, and let them
wake up the host system when needed.  This is used to implement
wake-on-lan with SDIO wireless cards at the moment.  Patches to add that
support to the libertas driver will be posted separately.

This patch:

Some SDIO cards have the ability to keep on running autonomously when the
host system is suspended, and wake it up when needed.  This however
requires that the host controller preserve power to the card, and
configure itself appropriately for wake-up.

There is however 4 layers of abstractions involved: the host controller
driver, the MMC core code, the SDIO card management code, and the actual
SDIO function driver.  To make things simple and manageable, host drivers
must advertise their PM capabilities with a feature bitmask, then function
drivers can query and set those features from their suspend method.  Then
each layer in the suspend call chain is expected to act upon those bits
accordingly.

[akpm@linux-foundation.org: fix typo in comment]
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c       | 12 ++++++++---
 drivers/mmc/core/sdio_io.c    | 49 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/host.h      |  5 +++++
 include/linux/mmc/pm.h        | 30 ++++++++++++++++++++++++++
 include/linux/mmc/sdio_func.h |  5 +++++
 5 files changed, 98 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/mmc/pm.h

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 30acd5265821..f4b97d3c3d0f 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1151,6 +1151,9 @@ void mmc_stop_host(struct mmc_host *host)
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
 
+	/* clear pm flags now and let card drivers set them as needed */
+	host->pm_flags = 0;
+
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
 		if (host->bus_ops->remove)
@@ -1273,12 +1276,13 @@ int mmc_suspend_host(struct mmc_host *host, pm_message_t state)
 			mmc_claim_host(host);
 			mmc_detach_bus(host);
 			mmc_release_host(host);
+			host->pm_flags = 0;
 			err = 0;
 		}
 	}
 	mmc_bus_put(host);
 
-	if (!err)
+	if (!err && !(host->pm_flags & MMC_PM_KEEP_POWER))
 		mmc_power_off(host);
 
 	return err;
@@ -1296,8 +1300,10 @@ int mmc_resume_host(struct mmc_host *host)
 
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
-		mmc_power_up(host);
-		mmc_select_voltage(host, host->ocr);
+		if (!(host->pm_flags & MMC_PM_KEEP_POWER)) {
+			mmc_power_up(host);
+			mmc_select_voltage(host, host->ocr);
+		}
 		BUG_ON(!host->bus_ops->resume);
 		err = host->bus_ops->resume(host);
 		if (err) {
diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index 87c618904ee2..ff27c8c71355 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -640,3 +640,52 @@ void sdio_f0_writeb(struct sdio_func *func, unsigned char b, unsigned int addr,
 		*err_ret = ret;
 }
 EXPORT_SYMBOL_GPL(sdio_f0_writeb);
+
+/**
+ *	sdio_get_host_pm_caps - get host power management capabilities
+ *	@func: SDIO function attached to host
+ *
+ *	Returns a capability bitmask corresponding to power management
+ *	features supported by the host controller that the card function
+ *	might rely upon during a system suspend.  The host doesn't need
+ *	to be claimed, nor the function active, for this information to be
+ *	obtained.
+ */
+mmc_pm_flag_t sdio_get_host_pm_caps(struct sdio_func *func)
+{
+	BUG_ON(!func);
+	BUG_ON(!func->card);
+
+	return func->card->host->pm_caps;
+}
+EXPORT_SYMBOL_GPL(sdio_get_host_pm_caps);
+
+/**
+ *	sdio_set_host_pm_flags - set wanted host power management capabilities
+ *	@func: SDIO function attached to host
+ *
+ *	Set a capability bitmask corresponding to wanted host controller
+ *	power management features for the upcoming suspend state.
+ *	This must be called, if needed, each time the suspend method of
+ *	the function driver is called, and must contain only bits that
+ *	were returned by sdio_get_host_pm_caps().
+ *	The host doesn't need to be claimed, nor the function active,
+ *	for this information to be set.
+ */
+int sdio_set_host_pm_flags(struct sdio_func *func, mmc_pm_flag_t flags)
+{
+	struct mmc_host *host;
+
+	BUG_ON(!func);
+	BUG_ON(!func->card);
+
+	host = func->card->host;
+
+	if (flags & ~host->pm_caps)
+		return -EINVAL;
+
+	/* function suspend methods are serialized, hence no lock needed */
+	host->pm_flags |= flags;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sdio_set_host_pm_flags);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index eaf36364b7d4..43eaf5ca5848 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 
 #include <linux/mmc/core.h>
+#include <linux/mmc/pm.h>
 
 struct mmc_ios {
 	unsigned int	clock;			/* clock rate */
@@ -152,6 +153,8 @@ struct mmc_host {
 #define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
 #define MMC_CAP_WAIT_WHILE_BUSY	(1 << 9)	/* Waits while card is busy */
 
+	mmc_pm_flag_t		pm_caps;	/* supported pm features */
+
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
 	unsigned short		max_hw_segs;	/* see blk_queue_max_hw_segments */
@@ -197,6 +200,8 @@ struct mmc_host {
 	struct task_struct	*sdio_irq_thread;
 	atomic_t		sdio_irq_thread_abort;
 
+	mmc_pm_flag_t		pm_flags;	/* requested pm features */
+
 #ifdef CONFIG_LEDS_TRIGGERS
 	struct led_trigger	*led;		/* activity led */
 #endif
diff --git a/include/linux/mmc/pm.h b/include/linux/mmc/pm.h
new file mode 100644
index 000000000000..d37aac49cf9a
--- /dev/null
+++ b/include/linux/mmc/pm.h
@@ -0,0 +1,30 @@
+/*
+ * linux/include/linux/mmc/pm.h
+ *
+ * Author:	Nicolas Pitre
+ * Copyright:	(C) 2009 Marvell Technology Group Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef LINUX_MMC_PM_H
+#define LINUX_MMC_PM_H
+
+/*
+ * These flags are used to describe power management features that
+ * some cards (typically SDIO cards) might wish to benefit from when
+ * the host system is being suspended.  There are several layers of
+ * abstractions involved, from the host controller driver, to the MMC core
+ * code, to the SDIO core code, to finally get to the actual SDIO function
+ * driver.  This file is therefore used for common definitions shared across
+ * all those layers.
+ */
+
+typedef unsigned int mmc_pm_flag_t;
+
+#define MMC_PM_KEEP_POWER	(1 << 0)	/* preserve card power during suspend */
+#define MMC_PM_WAKE_SDIO_IRQ	(1 << 1)	/* wake up host system on SDIO IRQ assertion */
+
+#endif
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index ac3ab683fec6..c6c0cceba5fe 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -15,6 +15,8 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 
+#include <linux/mmc/pm.h>
+
 struct mmc_card;
 struct sdio_func;
 
@@ -153,5 +155,8 @@ extern unsigned char sdio_f0_readb(struct sdio_func *func,
 extern void sdio_f0_writeb(struct sdio_func *func, unsigned char b,
 	unsigned int addr, int *err_ret);
 
+extern mmc_pm_flag_t sdio_get_host_pm_caps(struct sdio_func *func);
+extern int sdio_set_host_pm_flags(struct sdio_func *func, mmc_pm_flag_t flags);
+
 #endif
 
-- 
cgit v1.2.3


From 6b5eda369ac3772dad416ef96d86064204d74770 Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@laptop.org>
Date: Fri, 5 Mar 2010 13:43:34 -0800
Subject: sdio: put active devices into 1-bit mode during suspend

And bring them back to 4-bit mode during resume.

Signed-off-by: Daniel Drake <dsd@laptop.org>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio.c  | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/sdio.h |  2 ++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 5840de106b69..2dd4cfe7ca17 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -187,6 +187,40 @@ static int sdio_disable_cd(struct mmc_card *card)
 	return mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
 }
 
+/*
+ * Devices that remain active during a system suspend are
+ * put back into 1-bit mode.
+ */
+static int sdio_disable_wide(struct mmc_card *card)
+{
+	int ret;
+	u8 ctrl;
+
+	if (!(card->host->caps & MMC_CAP_4_BIT_DATA))
+		return 0;
+
+	if (card->cccr.low_speed && !card->cccr.wide_bus)
+		return 0;
+
+	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_IF, 0, &ctrl);
+	if (ret)
+		return ret;
+
+	if (!(ctrl & SDIO_BUS_WIDTH_4BIT))
+		return 0;
+
+	ctrl &= ~SDIO_BUS_WIDTH_4BIT;
+	ctrl |= SDIO_BUS_ASYNC_INT;
+
+	ret = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
+	if (ret)
+		return ret;
+
+	mmc_set_bus_width(card->host, MMC_BUS_WIDTH_1);
+
+	return 0;
+}
+
 /*
  * Test if the card supports high-speed mode and, if so, switch to it.
  */
@@ -427,6 +461,12 @@ static int mmc_sdio_suspend(struct mmc_host *host)
 		}
 	}
 
+	if (!err && host->pm_flags & MMC_PM_KEEP_POWER) {
+		mmc_claim_host(host);
+		sdio_disable_wide(host->card);
+		mmc_release_host(host);
+	}
+
 	return err;
 }
 
@@ -441,6 +481,9 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	mmc_claim_host(host);
 	err = mmc_sdio_init_card(host, host->ocr, host->card,
 				 (host->pm_flags & MMC_PM_KEEP_POWER));
+	if (!err)
+		/* We may have switched to 1-bit mode during suspend. */
+		err = sdio_enable_wide(host->card);
 	if (!err && host->sdio_irqs)
 		mmc_signal_sdio_irq(host);
 	mmc_release_host(host);
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index 47ba464f5170..0ebaef577ff5 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -95,6 +95,8 @@
 #define  SDIO_BUS_WIDTH_1BIT	0x00
 #define  SDIO_BUS_WIDTH_4BIT	0x02
 
+#define  SDIO_BUS_ASYNC_INT	0x20
+
 #define  SDIO_BUS_CD_DISABLE     0x80	/* disable pull-up on DAT3 (pin 1) */
 
 #define SDIO_CCCR_CAPS		0x08
-- 
cgit v1.2.3


From 088e7af73a962fcc8883b7a6392544d8342553d6 Mon Sep 17 00:00:00 2001
From: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:44:06 -0800
Subject: coredump: move dump_write() and dump_seek() into a header file

My next patch will replace ELF_CORE_EXTRA_* macros by functions, putting
them into other newly created *.c files.  Then, each files will contain
dump_write(), where each pair of binfmt_*.c and elfcore.c should be the
same.  So, this patch moves them into a header file with dump_seek().
Also, the patch deletes confusing DUMP_WRITE macros in each files.

Signed-off-by: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_aout.c         | 49 ++++++++-----------------------------------
 fs/binfmt_elf.c          | 52 +++++++++++++---------------------------------
 fs/binfmt_elf_fdpic.c    | 54 ++++++++++++++----------------------------------
 include/linux/coredump.h | 41 ++++++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+), 117 deletions(-)
 create mode 100644 include/linux/coredump.h

(limited to 'include/linux')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index d2f8872dd767..15d80bb35d6f 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -24,6 +24,7 @@
 #include <linux/binfmts.h>
 #include <linux/personality.h>
 #include <linux/init.h>
+#include <linux/coredump.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -59,42 +60,6 @@ static int set_brk(unsigned long start, unsigned long end)
 	return 0;
 }
 
-/*
- * These are the only things you should do on a core-file: use only these
- * macros to write out all the necessary info.
- */
-
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static int dump_seek(struct file *file, loff_t off)
-{
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n))
-				return 0;
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return 1;
-}
-
-#define DUMP_WRITE(addr, nr)	\
-	if (!dump_write(file, (void *)(addr), (nr))) \
-		goto end_coredump;
-
 /*
  * Routine writes a core dump image in the current directory.
  * Currently only a stub-function.
@@ -146,7 +111,8 @@ static int aout_core_dump(struct coredump_params *cprm)
 
 	set_fs(KERNEL_DS);
 /* struct user */
-	DUMP_WRITE(&dump,sizeof(dump));
+	if (!dump_write(file, &dump, sizeof(dump)))
+		goto end_coredump;
 /* Now dump all of the user data.  Include malloced stuff as well */
 	if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
 		goto end_coredump;
@@ -156,17 +122,20 @@ static int aout_core_dump(struct coredump_params *cprm)
 	if (dump.u_dsize != 0) {
 		dump_start = START_DATA(dump);
 		dump_size = dump.u_dsize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start,dump_size);
+		if (!dump_write(file, dump_start, dump_size))
+			goto end_coredump;
 	}
 /* Now prepare to dump the stack area */
 	if (dump.u_ssize != 0) {
 		dump_start = START_STACK(dump);
 		dump_size = dump.u_ssize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start,dump_size);
+		if (!dump_write(file, dump_start, dump_size))
+			goto end_coredump;
 	}
 /* Finally dump the task struct.  Not be used by gdb, but could be useful */
 	set_fs(KERNEL_DS);
-	DUMP_WRITE(current,sizeof(*current));
+	if (!dump_write(file, current, sizeof(*current)))
+		goto end_coredump;
 end_coredump:
 	set_fs(fs);
 	return has_dumped;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fd5b2ea5d299..0bcfbb05c32d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/utsname.h>
+#include <linux/coredump.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -1085,36 +1086,6 @@ out:
  * Modelled on fs/exec.c:aout_core_dump()
  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
  */
-/*
- * These are the only things you should do on a core-file: use only these
- * functions to write out all the necessary info.
- */
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static int dump_seek(struct file *file, loff_t off)
-{
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n))
-				return 0;
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return 1;
-}
 
 /*
  * Decide what to dump of a segment, part, all or none.
@@ -1249,11 +1220,6 @@ static int writenote(struct memelfnote *men, struct file *file,
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)				\
-	if ((size += (nr)) > cprm->limit ||		\
-	    !dump_write(cprm->file, (addr), (nr)))	\
-		goto end_coredump;
-
 static void fill_elf_header(struct elfhdr *elf, int segs,
 			    u16 machine, u32 flags, u8 osabi)
 {
@@ -1934,7 +1900,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 
-	DUMP_WRITE(elf, sizeof(*elf));
+	size += sizeof(*elf);
+	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+		goto end_coredump;
+
 	offset += sizeof(*elf);				/* Elf header */
 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
 	foffset = offset;
@@ -1948,7 +1917,11 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 		fill_elf_note_phdr(&phdr, sz, offset);
 		offset += sz;
-		DUMP_WRITE(&phdr, sizeof(phdr));
+
+		size += sizeof(phdr);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
@@ -1979,7 +1952,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
-		DUMP_WRITE(&phdr, sizeof(phdr));
+		size += sizeof(phdr);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 32d9b44c3cb9..63edf40b569b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -34,6 +34,7 @@
 #include <linux/elf.h>
 #include <linux/elf-fdpic.h>
 #include <linux/elfcore.h>
+#include <linux/coredump.h>
 
 #include <asm/uaccess.h>
 #include <asm/param.h>
@@ -1215,37 +1216,6 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
  */
 #ifdef CONFIG_ELF_CORE
 
-/*
- * These are the only things you should do on a core-file: use only these
- * functions to write out all the necessary info.
- */
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static int dump_seek(struct file *file, loff_t off)
-{
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n))
-				return 0;
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return 1;
-}
-
 /*
  * Decide whether a segment is worth dumping; default is yes to be
  * sure (missing info is worse than too much; etc).
@@ -1354,11 +1324,6 @@ static int writenote(struct memelfnote *men, struct file *file,
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)				\
-	if ((size += (nr)) > cprm->limit ||		\
-	    !dump_write(cprm->file, (addr), (nr)))	\
-		goto end_coredump;
-
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
 {
 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
@@ -1743,7 +1708,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 
-	DUMP_WRITE(elf, sizeof(*elf));
+	size += sizeof(*elf);
+	if (size > cprm->limit
+	    || !dump_write(cprm->file, elf, sizeof(*elf)))
+		goto end_coredump;
+
 	offset += sizeof(*elf);				/* Elf header */
 	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
 	foffset = offset;
@@ -1760,7 +1729,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 
 		fill_elf_note_phdr(&phdr, sz, offset);
 		offset += sz;
-		DUMP_WRITE(&phdr, sizeof(phdr));
+
+		size += sizeof(phdr);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 	/* Page-align dumped data */
@@ -1794,7 +1767,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
-		DUMP_WRITE(&phdr, sizeof(phdr));
+		size += sizeof(phdr);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
new file mode 100644
index 000000000000..b3c91d7cede4
--- /dev/null
+++ b/include/linux/coredump.h
@@ -0,0 +1,41 @@
+#ifndef _LINUX_COREDUMP_H
+#define _LINUX_COREDUMP_H
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+
+/*
+ * These are the only things you should do on a core-file: use only these
+ * functions to write out all the necessary info.
+ */
+static inline int dump_write(struct file *file, const void *addr, int nr)
+{
+	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+
+static inline int dump_seek(struct file *file, loff_t off)
+{
+	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+			return 0;
+	} else {
+		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+		if (!buf)
+			return 0;
+		while (off > 0) {
+			unsigned long n = off;
+
+			if (n > PAGE_SIZE)
+				n = PAGE_SIZE;
+			if (!dump_write(file, buf, n))
+				return 0;
+			off -= n;
+		}
+		free_page((unsigned long)buf);
+	}
+	return 1;
+}
+
+#endif /* _LINUX_COREDUMP_H */
-- 
cgit v1.2.3


From 1fcccbac89f5bbc5e41aa72086960059fce372da Mon Sep 17 00:00:00 2001
From: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:44:07 -0800
Subject: elf coredump: replace ELF_CORE_EXTRA_* macros by functions

elf_core_dump() and elf_fdpic_core_dump() use #ifdef and the corresponding
macro for hiding _multiline_ logics in functions.  This patch removes
#ifdef and replaces ELF_CORE_EXTRA_* by corresponding functions.  For
architectures not implemeonting ELF_CORE_EXTRA_*, we use weak functions in
order to reduce a range of modification.

This cleanup is for my next patches, but I think this cleanup itself is
worth doing regardless of my firnal purpose.

Signed-off-by: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/elf.h | 48 --------------------------------
 arch/ia64/kernel/Makefile   |  2 ++
 arch/ia64/kernel/elfcore.c  | 64 +++++++++++++++++++++++++++++++++++++++++++
 arch/um/sys-i386/Makefile   |  2 ++
 arch/um/sys-i386/asm/elf.h  | 43 -----------------------------
 arch/um/sys-i386/elfcore.c  | 67 +++++++++++++++++++++++++++++++++++++++++++++
 fs/binfmt_elf.c             | 14 ++++------
 fs/binfmt_elf_fdpic.c       | 14 ++++------
 fs/compat_binfmt_elf.c      |  2 ++
 include/linux/elf.h         |  2 ++
 include/linux/elfcore.h     | 16 +++++++++++
 kernel/Makefile             |  3 ++
 kernel/elfcore.c            | 23 ++++++++++++++++
 13 files changed, 191 insertions(+), 109 deletions(-)
 create mode 100644 arch/ia64/kernel/elfcore.c
 create mode 100644 arch/um/sys-i386/elfcore.c
 create mode 100644 kernel/elfcore.c

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
index 4c41656ede87..b5298eb09adb 100644
--- a/arch/ia64/include/asm/elf.h
+++ b/arch/ia64/include/asm/elf.h
@@ -219,54 +219,6 @@ do {										\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR);		\
 } while (0)
 
-
-/*
- * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
- * extra segments containing the gate DSO contents.  Dumping its
- * contents makes post-mortem fully interpretable later without matching up
- * the same kernel and hardware config to see what PC values meant.
- * Dumping its extra ELF program headers includes all the other information
- * a debugger needs to easily find how the gate DSO was being used.
- */
-#define ELF_CORE_EXTRA_PHDRS		(GATE_EHDR->e_phnum)
-#define ELF_CORE_WRITE_EXTRA_PHDRS						\
-do {										\
-	const struct elf_phdr *const gate_phdrs =			      \
-		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);   \
-	int i;									\
-	Elf64_Off ofs = 0;						      \
-	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {				\
-		struct elf_phdr phdr = gate_phdrs[i];			      \
-		if (phdr.p_type == PT_LOAD) {					\
-			phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);	      \
-			phdr.p_filesz = phdr.p_memsz;			      \
-			if (ofs == 0) {					      \
-				ofs = phdr.p_offset = offset;		      \
-			offset += phdr.p_filesz;				\
-		}							      \
-		else							      \
-				phdr.p_offset = ofs;			      \
-		}							      \
-		else							      \
-			phdr.p_offset += ofs;					\
-		phdr.p_paddr = 0; /* match other core phdrs */			\
-		DUMP_WRITE(&phdr, sizeof(phdr));				\
-	}									\
-} while (0)
-#define ELF_CORE_WRITE_EXTRA_DATA					\
-do {									\
-	const struct elf_phdr *const gate_phdrs =			      \
-		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);   \
-	int i;								\
-	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {			\
-		if (gate_phdrs[i].p_type == PT_LOAD) {			      \
-			DUMP_WRITE((void *) gate_phdrs[i].p_vaddr,	      \
-				   PAGE_ALIGN(gate_phdrs[i].p_memsz));	      \
-			break;						      \
-		}							      \
-	}								\
-} while (0)
-
 /*
  * format for entries in the Global Offset Table
  */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 4138282aefa8..db10b1e378b0 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -45,6 +45,8 @@ endif
 obj-$(CONFIG_DMAR)		+= pci-dma.o
 obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 
+obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
+
 # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
 CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
new file mode 100644
index 000000000000..57a2298a8581
--- /dev/null
+++ b/arch/ia64/kernel/elfcore.c
@@ -0,0 +1,64 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf64_Half elf_core_extra_phdrs(void)
+{
+	return GATE_EHDR->e_phnum;
+}
+
+int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+			       unsigned long limit)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	Elf64_Off ofs = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		struct elf_phdr phdr = gate_phdrs[i];
+
+		if (phdr.p_type == PT_LOAD) {
+			phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);
+			phdr.p_filesz = phdr.p_memsz;
+			if (ofs == 0) {
+				ofs = phdr.p_offset = offset;
+				offset += phdr.p_filesz;
+			} else {
+				phdr.p_offset = ofs;
+			}
+		} else {
+			phdr.p_offset += ofs;
+		}
+		phdr.p_paddr = 0; /* match other core phdrs */
+		*size += sizeof(phdr);
+		if (*size > limit || !dump_write(file, &phdr, sizeof(phdr)))
+			return 0;
+	}
+	return 1;
+}
+
+int elf_core_write_extra_data(struct file *file, size_t *size,
+			      unsigned long limit)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			void *addr = (void *)gate_phdrs[i].p_vaddr;
+			size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
+
+			*size += memsz;
+			if (*size > limit || !dump_write(file, addr, memsz))
+				return 0;
+			break;
+		}
+	}
+	return 1;
+}
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 1b549bca4645..804b28dd0328 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -6,6 +6,8 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
 	sys_call_table.o tls.o
 
+obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+
 subarch-obj-y = lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module.o
diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h
index 770885472ed4..e64cd41d7bab 100644
--- a/arch/um/sys-i386/asm/elf.h
+++ b/arch/um/sys-i386/asm/elf.h
@@ -116,47 +116,4 @@ do {								\
 	}							\
 } while (0)
 
-/*
- * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
- * extra segments containing the vsyscall DSO contents.  Dumping its
- * contents makes post-mortem fully interpretable later without matching up
- * the same kernel and hardware config to see what PC values meant.
- * Dumping its extra ELF program headers includes all the other information
- * a debugger needs to easily find how the vsyscall DSO was being used.
- */
-#define ELF_CORE_EXTRA_PHDRS						      \
-	(vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0 )
-
-#define ELF_CORE_WRITE_EXTRA_PHDRS					      \
-if ( vsyscall_ehdr ) {							      \
-	const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr;    \
-	const struct elf_phdr *const phdrp =				      \
-		(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);   \
-	int i;								      \
-	Elf32_Off ofs = 0;						      \
-	for (i = 0; i < ehdrp->e_phnum; ++i) {				      \
-		struct elf_phdr phdr = phdrp[i];			      \
-		if (phdr.p_type == PT_LOAD) {				      \
-			ofs = phdr.p_offset = offset;			      \
-			offset += phdr.p_filesz;			      \
-		}							      \
-		else							      \
-			phdr.p_offset += ofs;				      \
-		phdr.p_paddr = 0; /* match other core phdrs */		      \
-		DUMP_WRITE(&phdr, sizeof(phdr));			      \
-	}								      \
-}
-#define ELF_CORE_WRITE_EXTRA_DATA					      \
-if ( vsyscall_ehdr ) {							      \
-	const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr;    \
-	const struct elf_phdr *const phdrp =				      \
-		(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);   \
-	int i;								      \
-	for (i = 0; i < ehdrp->e_phnum; ++i) {				      \
-		if (phdrp[i].p_type == PT_LOAD)				      \
-			DUMP_WRITE((void *) phdrp[i].p_vaddr,		      \
-				   phdrp[i].p_filesz);			      \
-	}								      \
-}
-
 #endif
diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c
new file mode 100644
index 000000000000..30cac52a04b4
--- /dev/null
+++ b/arch/um/sys-i386/elfcore.c
@@ -0,0 +1,67 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf32_Half elf_core_extra_phdrs(void)
+{
+	return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
+}
+
+int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+			       unsigned long limit)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *) vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+		Elf32_Off ofs = 0;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i) {
+			struct elf_phdr phdr = phdrp[i];
+
+			if (phdr.p_type == PT_LOAD) {
+				ofs = phdr.p_offset = offset;
+				offset += phdr.p_filesz;
+			} else {
+				phdr.p_offset += ofs;
+			}
+			phdr.p_paddr = 0; /* match other core phdrs */
+			*size += sizeof(phdr);
+			if (*size > limit
+			    || !dump_write(file, &phdr, sizeof(phdr)))
+				return 0;
+		}
+	}
+	return 1;
+}
+
+int elf_core_write_extra_data(struct file *file, size_t *size,
+			      unsigned long limit)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *) vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i) {
+			if (phdrp[i].p_type == PT_LOAD) {
+				void *addr = (void *) phdrp[i].p_vaddr;
+				size_t filesz = phdrp[i].p_filesz;
+
+				*size += filesz;
+				if (*size > limit
+				    || !dump_write(file, addr, filesz))
+					return 0;
+			}
+		}
+	}
+	return 1;
+}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0bcfbb05c32d..c1a499599b7d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1878,9 +1878,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
 	 */
 	segs = current->mm->map_count;
-#ifdef ELF_CORE_EXTRA_PHDRS
-	segs += ELF_CORE_EXTRA_PHDRS;
-#endif
+	segs += elf_core_extra_phdrs();
 
 	gate_vma = get_gate_vma(current);
 	if (gate_vma != NULL)
@@ -1958,9 +1956,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 			goto end_coredump;
 	}
 
-#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
-	ELF_CORE_WRITE_EXTRA_PHDRS;
-#endif
+	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+		goto end_coredump;
 
  	/* write out the notes section */
 	if (!write_note_info(&info, cprm->file, &foffset))
@@ -1999,9 +1996,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 		}
 	}
 
-#ifdef ELF_CORE_WRITE_EXTRA_DATA
-	ELF_CORE_WRITE_EXTRA_DATA;
-#endif
+	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+		goto end_coredump;
 
 end_coredump:
 	set_fs(fs);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 63edf40b569b..952699a86ec3 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1664,9 +1664,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
 
 	segs = current->mm->map_count;
-#ifdef ELF_CORE_EXTRA_PHDRS
-	segs += ELF_CORE_EXTRA_PHDRS;
-#endif
+	segs += elf_core_extra_phdrs();
 
 	/* Set up header */
 	fill_elf_fdpic_header(elf, segs + 1);	/* including notes section */
@@ -1773,9 +1771,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			goto end_coredump;
 	}
 
-#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
-	ELF_CORE_WRITE_EXTRA_PHDRS;
-#endif
+	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+		goto end_coredump;
 
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
@@ -1799,9 +1796,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 				    mm_flags) < 0)
 		goto end_coredump;
 
-#ifdef ELF_CORE_WRITE_EXTRA_DATA
-	ELF_CORE_WRITE_EXTRA_DATA;
-#endif
+	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+		goto end_coredump;
 
 	if (cprm->file->f_pos != offset) {
 		/* Sanity check */
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 0adced2f296f..112e45a17e99 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -28,10 +28,12 @@
 
 #undef	elfhdr
 #undef	elf_phdr
+#undef	elf_shdr
 #undef	elf_note
 #undef	elf_addr_t
 #define elfhdr		elf32_hdr
 #define elf_phdr	elf32_phdr
+#define elf_shdr	elf32_shdr
 #define elf_note	elf32_note
 #define elf_addr_t	Elf32_Addr
 
diff --git a/include/linux/elf.h b/include/linux/elf.h
index ad990c5f63f6..ccde3fd45f36 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -396,6 +396,7 @@ extern Elf32_Dyn _DYNAMIC [];
 #define elf_phdr	elf32_phdr
 #define elf_note	elf32_note
 #define elf_addr_t	Elf32_Off
+#define Elf_Half	Elf32_Half
 
 #else
 
@@ -404,6 +405,7 @@ extern Elf64_Dyn _DYNAMIC [];
 #define elf_phdr	elf64_phdr
 #define elf_note	elf64_note
 #define elf_addr_t	Elf64_Off
+#define Elf_Half	Elf64_Half
 
 #endif
 
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 00d6a68d0421..cfda74f521b5 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -8,6 +8,8 @@
 #include <linux/user.h>
 #endif
 #include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/fs.h>
 
 struct elf_siginfo
 {
@@ -150,5 +152,19 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse
 
 #endif /* __KERNEL__ */
 
+/*
+ * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
+ * extra segments containing the gate DSO contents.  Dumping its
+ * contents makes post-mortem fully interpretable later without matching up
+ * the same kernel and hardware config to see what PC values meant.
+ * Dumping its extra ELF program headers includes all the other information
+ * a debugger needs to easily find how the gate DSO was being used.
+ */
+extern Elf_Half elf_core_extra_phdrs(void);
+extern int
+elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+			   unsigned long limit);
+extern int
+elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
 
 #endif /* _LINUX_ELFCORE_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 7b974699f8c2..a987aa1676b5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -91,6 +91,9 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
+obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o
+obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
new file mode 100644
index 000000000000..5445741f4b4c
--- /dev/null
+++ b/kernel/elfcore.c
@@ -0,0 +1,23 @@
+#include <linux/elf.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf_Half __weak elf_core_extra_phdrs(void)
+{
+	return 0;
+}
+
+int __weak elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+				      unsigned long limit)
+{
+	return 1;
+}
+
+int __weak elf_core_write_extra_data(struct file *file, size_t *size,
+				     unsigned long limit)
+{
+	return 1;
+}
-- 
cgit v1.2.3


From 8d9032bbe4671dc481261ccd4e161cd96e54b118 Mon Sep 17 00:00:00 2001
From: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:44:10 -0800
Subject: elf coredump: add extended numbering support

The current ELF dumper implementation can produce broken corefiles if
program headers exceed 65535.  This number is determined by the number of
vmas which the process have.  In particular, some extreme programs may use
more than 65535 vmas.  (If you google max_map_count, you can find some
users facing this problem.) This kind of program never be able to generate
correct coredumps.

This patch implements ``extended numbering'' that uses sh_info field of
the first section header instead of e_phnum field in order to represent
upto 4294967295 vmas.

This is supported by
AMD64-ABI(http://www.x86-64.org/documentation.html) and
Solaris(http://docs.sun.com/app/docs/doc/817-1984/).
Of course, we are preparing patches for gdb and binutils.

Signed-off-by: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/elfcore.c | 16 +++++++++++
 arch/um/sys-i386/elfcore.c | 16 +++++++++++
 fs/binfmt_elf.c            | 66 +++++++++++++++++++++++++++++++++++++++++++---
 fs/binfmt_elf_fdpic.c      | 63 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/elf.h        | 26 +++++++++++++++++-
 include/linux/elfcore.h    |  1 +
 kernel/elfcore.c           |  5 ++++
 7 files changed, 187 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index 57a2298a8581..bac1639bc320 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -62,3 +62,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
 	}
 	return 1;
 }
+
+size_t elf_core_extra_data_size(void)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	size_t size = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
+			break;
+		}
+	}
+	return size;
+}
diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c
index 30cac52a04b4..6bb49b687c97 100644
--- a/arch/um/sys-i386/elfcore.c
+++ b/arch/um/sys-i386/elfcore.c
@@ -65,3 +65,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
 	}
 	return 1;
 }
+
+size_t elf_core_extra_data_size(void)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *)vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i)
+			if (phdrp[i].p_type == PT_LOAD)
+				return (size_t) phdrp[i].p_filesz;
+	}
+	return 0;
+}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6fc49b6ed936..78de530cfb02 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1838,6 +1838,34 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
 	return gate_vma;
 }
 
+static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
+			     elf_addr_t e_shoff, int segs)
+{
+	elf->e_shoff = e_shoff;
+	elf->e_shentsize = sizeof(*shdr4extnum);
+	elf->e_shnum = 1;
+	elf->e_shstrndx = SHN_UNDEF;
+
+	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
+
+	shdr4extnum->sh_type = SHT_NULL;
+	shdr4extnum->sh_size = elf->e_shnum;
+	shdr4extnum->sh_link = elf->e_shstrndx;
+	shdr4extnum->sh_info = segs;
+}
+
+static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
+				     unsigned long mm_flags)
+{
+	struct vm_area_struct *vma;
+	size_t size = 0;
+
+	for (vma = first_vma(current, gate_vma); vma != NULL;
+	     vma = next_vma(vma, gate_vma))
+		size += vma_dump_size(vma, mm_flags);
+	return size;
+}
+
 /*
  * Actual dumper
  *
@@ -1857,6 +1885,9 @@ static int elf_core_dump(struct coredump_params *cprm)
 	unsigned long mm_flags;
 	struct elf_note_info info;
 	struct elf_phdr *phdr4note = NULL;
+	struct elf_shdr *shdr4extnum = NULL;
+	Elf_Half e_phnum;
+	elf_addr_t e_shoff;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -1885,12 +1916,19 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (gate_vma != NULL)
 		segs++;
 
+	/* for notes section */
+	segs++;
+
+	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
+	 * this, kernel supports extended numbering. Have a look at
+	 * include/linux/elf.h for further information. */
+	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
+
 	/*
 	 * Collect all the non-memory information about the process for the
 	 * notes.  This also sets up the file header.
 	 */
-	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, cprm->signr, cprm->regs))
+	if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
 		goto cleanup;
 
 	has_dumped = 1;
@@ -1900,7 +1938,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	set_fs(KERNEL_DS);
 
 	offset += sizeof(*elf);				/* Elf header */
-	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
+	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
 	foffset = offset;
 
 	/* Write notes phdr entry */
@@ -1926,6 +1964,19 @@ static int elf_core_dump(struct coredump_params *cprm)
 	 */
 	mm_flags = current->mm->flags;
 
+	offset += elf_core_vma_data_size(gate_vma, mm_flags);
+	offset += elf_core_extra_data_size();
+	e_shoff = offset;
+
+	if (e_phnum == PN_XNUM) {
+		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
+		if (!shdr4extnum)
+			goto end_coredump;
+		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
+	}
+
+	offset = dataoff;
+
 	size += sizeof(*elf);
 	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
 		goto end_coredump;
@@ -2003,11 +2054,20 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
 		goto end_coredump;
 
+	if (e_phnum == PN_XNUM) {
+		size += sizeof(*shdr4extnum);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, shdr4extnum,
+				   sizeof(*shdr4extnum)))
+			goto end_coredump;
+	}
+
 end_coredump:
 	set_fs(fs);
 
 cleanup:
 	free_note_info(&info);
+	kfree(shdr4extnum);
 	kfree(phdr4note);
 	kfree(elf);
 out:
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 112da491d75d..e49d9c06a4b6 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1505,6 +1505,22 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
 	return sz;
 }
 
+static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
+			     elf_addr_t e_shoff, int segs)
+{
+	elf->e_shoff = e_shoff;
+	elf->e_shentsize = sizeof(*shdr4extnum);
+	elf->e_shnum = 1;
+	elf->e_shstrndx = SHN_UNDEF;
+
+	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
+
+	shdr4extnum->sh_type = SHT_NULL;
+	shdr4extnum->sh_size = elf->e_shnum;
+	shdr4extnum->sh_link = elf->e_shstrndx;
+	shdr4extnum->sh_info = segs;
+}
+
 /*
  * dump the segments for an MMU process
  */
@@ -1569,6 +1585,17 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 }
 #endif
 
+static size_t elf_core_vma_data_size(unsigned long mm_flags)
+{
+	struct vm_area_struct *vma;
+	size_t size = 0;
+
+	for (vma = current->mm->mmap; vma; vma->vm_next)
+		if (maydump(vma, mm_flags))
+			size += vma->vm_end - vma->vm_start;
+	return size;
+}
+
 /*
  * Actual dumper
  *
@@ -1601,6 +1628,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	elf_addr_t *auxv;
 	unsigned long mm_flags;
 	struct elf_phdr *phdr4note = NULL;
+	struct elf_shdr *shdr4extnum = NULL;
+	Elf_Half e_phnum;
+	elf_addr_t e_shoff;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -1667,8 +1697,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	segs = current->mm->map_count;
 	segs += elf_core_extra_phdrs();
 
+	/* for notes section */
+	segs++;
+
+	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
+	 * this, kernel supports extended numbering. Have a look at
+	 * include/linux/elf.h for further information. */
+	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
+
 	/* Set up header */
-	fill_elf_fdpic_header(elf, segs + 1);	/* including notes section */
+	fill_elf_fdpic_header(elf, e_phnum);
 
 	has_dumped = 1;
 	current->flags |= PF_DUMPCORE;
@@ -1708,7 +1746,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	set_fs(KERNEL_DS);
 
 	offset += sizeof(*elf);				/* Elf header */
-	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
+	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
 	foffset = offset;
 
 	/* Write notes phdr entry */
@@ -1738,6 +1776,19 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	 */
 	mm_flags = current->mm->flags;
 
+	offset += elf_core_vma_data_size(mm_flags);
+	offset += elf_core_extra_data_size();
+	e_shoff = offset;
+
+	if (e_phnum == PN_XNUM) {
+		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
+		if (!shdr4extnum)
+			goto end_coredump;
+		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
+	}
+
+	offset = dataoff;
+
 	size += sizeof(*elf);
 	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
 		goto end_coredump;
@@ -1802,6 +1853,14 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
 		goto end_coredump;
 
+	if (e_phnum == PN_XNUM) {
+		size += sizeof(*shdr4extnum);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, shdr4extnum,
+				   sizeof(*shdr4extnum)))
+			goto end_coredump;
+	}
+
 	if (cprm->file->f_pos != offset) {
 		/* Sanity check */
 		printk(KERN_WARNING
diff --git a/include/linux/elf.h b/include/linux/elf.h
index ccde3fd45f36..597858418051 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -50,6 +50,28 @@ typedef __s64	Elf64_Sxword;
 
 #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
 
+/*
+ * Extended Numbering
+ *
+ * If the real number of program header table entries is larger than
+ * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
+ * section header at index 0, and PN_XNUM is set to e_phnum
+ * field. Otherwise, the section header at index 0 is zero
+ * initialized, if it exists.
+ *
+ * Specifications are available in:
+ *
+ * - Sun microsystems: Linker and Libraries.
+ *   Part No: 817-1984-17, September 2008.
+ *   URL: http://docs.sun.com/app/docs/doc/817-1984
+ *
+ * - System V ABI AMD64 Architecture Processor Supplement
+ *   Draft Version 0.99.,
+ *   May 11, 2009.
+ *   URL: http://www.x86-64.org/
+ */
+#define PN_XNUM 0xffff
+
 /* These constants define the different elf file types */
 #define ET_NONE   0
 #define ET_REL    1
@@ -286,7 +308,7 @@ typedef struct elf64_phdr {
 #define SHN_COMMON	0xfff2
 #define SHN_HIRESERVE	0xffff
  
-typedef struct {
+typedef struct elf32_shdr {
   Elf32_Word	sh_name;
   Elf32_Word	sh_type;
   Elf32_Word	sh_flags;
@@ -394,6 +416,7 @@ typedef struct elf64_note {
 extern Elf32_Dyn _DYNAMIC [];
 #define elfhdr		elf32_hdr
 #define elf_phdr	elf32_phdr
+#define elf_shdr	elf32_shdr
 #define elf_note	elf32_note
 #define elf_addr_t	Elf32_Off
 #define Elf_Half	Elf32_Half
@@ -403,6 +426,7 @@ extern Elf32_Dyn _DYNAMIC [];
 extern Elf64_Dyn _DYNAMIC [];
 #define elfhdr		elf64_hdr
 #define elf_phdr	elf64_phdr
+#define elf_shdr	elf64_shdr
 #define elf_note	elf64_note
 #define elf_addr_t	Elf64_Off
 #define Elf_Half	Elf64_Half
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index cfda74f521b5..e687bc3ba4da 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -166,5 +166,6 @@ elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
 			   unsigned long limit);
 extern int
 elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
+extern size_t elf_core_extra_data_size(void);
 
 #endif /* _LINUX_ELFCORE_H */
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
index 5445741f4b4c..ff915efef66d 100644
--- a/kernel/elfcore.c
+++ b/kernel/elfcore.c
@@ -21,3 +21,8 @@ int __weak elf_core_write_extra_data(struct file *file, size_t *size,
 {
 	return 1;
 }
+
+size_t __weak elf_core_extra_data_size(void)
+{
+	return 0;
+}
-- 
cgit v1.2.3


From 30736a4d43f4af7f1a7836d6a266be17082195c4 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Fri, 5 Mar 2010 13:44:12 -0800
Subject: coredump: pass mm->flags as a coredump parameter for consistency

Pass mm->flags as a coredump parameter for consistency.

 ---
1787         if (mm->core_state || !get_dumpable(mm)) {  <- (1)
1788                 up_write(&mm->mmap_sem);
1789                 put_cred(cred);
1790                 goto fail;
1791         }
1792
[...]
1798         if (get_dumpable(mm) == 2) {    /* Setuid core dump mode */ <-(2)
1799                 flag = O_EXCL;          /* Stop rewrite attacks */
1800                 cred->fsuid = 0;        /* Dump root private */
1801         }
 ---

Since dumpable bits are not protected by lock, there is a chance to change
these bits between (1) and (2).

To solve this issue, this patch copies mm->flags to
coredump_params.mm_flags at the beginning of do_coredump() and uses it
instead of get_dumpable() while dumping core.

This copy is also passed to binfmt->core_dump, since elf*_core_dump() uses
dump_filter bits in mm->flags.

[akpm@linux-foundation.org: fix merge]
Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c         | 14 +++-----------
 fs/binfmt_elf_fdpic.c   | 14 +++-----------
 fs/exec.c               | 20 ++++++++++++++++----
 include/linux/binfmts.h |  1 +
 4 files changed, 23 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 78de530cfb02..535e763ab1a6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1882,7 +1882,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff, foffset;
-	unsigned long mm_flags;
 	struct elf_note_info info;
 	struct elf_phdr *phdr4note = NULL;
 	struct elf_shdr *shdr4extnum = NULL;
@@ -1957,14 +1956,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	/*
-	 * We must use the same mm->flags while dumping core to avoid
-	 * inconsistency between the program headers and bodies, otherwise an
-	 * unusable core file can be generated.
-	 */
-	mm_flags = current->mm->flags;
-
-	offset += elf_core_vma_data_size(gate_vma, mm_flags);
+	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -1995,7 +1987,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		phdr.p_offset = offset;
 		phdr.p_vaddr = vma->vm_start;
 		phdr.p_paddr = 0;
-		phdr.p_filesz = vma_dump_size(vma, mm_flags);
+		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
 		phdr.p_memsz = vma->vm_end - vma->vm_start;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
@@ -2030,7 +2022,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		unsigned long addr;
 		unsigned long end;
 
-		end = vma->vm_start + vma_dump_size(vma, mm_flags);
+		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
 
 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index e49d9c06a4b6..6d6a16c5e9bb 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1626,7 +1626,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 #endif
 	int thread_status_size = 0;
 	elf_addr_t *auxv;
-	unsigned long mm_flags;
 	struct elf_phdr *phdr4note = NULL;
 	struct elf_shdr *shdr4extnum = NULL;
 	Elf_Half e_phnum;
@@ -1769,14 +1768,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	/* Page-align dumped data */
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	/*
-	 * We must use the same mm->flags while dumping core to avoid
-	 * inconsistency between the program headers and bodies, otherwise an
-	 * unusable core file can be generated.
-	 */
-	mm_flags = current->mm->flags;
-
-	offset += elf_core_vma_data_size(mm_flags);
+	offset += elf_core_vma_data_size(cprm->mm_flags);
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -1809,7 +1801,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 		phdr.p_offset = offset;
 		phdr.p_vaddr = vma->vm_start;
 		phdr.p_paddr = 0;
-		phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0;
+		phdr.p_filesz = maydump(vma, cprm->mm_flags) ? sz : 0;
 		phdr.p_memsz = sz;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
@@ -1847,7 +1839,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
-				    mm_flags) < 0)
+				    cprm->mm_flags) < 0)
 		goto end_coredump;
 
 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
diff --git a/fs/exec.c b/fs/exec.c
index da2b31dc4e1c..89d4080c1435 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1748,14 +1748,19 @@ void set_dumpable(struct mm_struct *mm, int value)
 	}
 }
 
-int get_dumpable(struct mm_struct *mm)
+static int __get_dumpable(unsigned long mm_flags)
 {
 	int ret;
 
-	ret = mm->flags & 0x3;
+	ret = mm_flags & MMF_DUMPABLE_MASK;
 	return (ret >= 2) ? 2 : ret;
 }
 
+int get_dumpable(struct mm_struct *mm)
+{
+	return __get_dumpable(mm->flags);
+}
+
 static void wait_for_dump_helpers(struct file *file)
 {
 	struct pipe_inode_info *pipe;
@@ -1799,6 +1804,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		.signr = signr,
 		.regs = regs,
 		.limit = rlimit(RLIMIT_CORE),
+		/*
+		 * We must use the same mm->flags while dumping core to avoid
+		 * inconsistency of bit flags, since this flag is not protected
+		 * by any locks.
+		 */
+		.mm_flags = mm->flags,
 	};
 
 	audit_core_dumps(signr);
@@ -1817,7 +1828,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	/*
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
-	if (mm->core_state || !get_dumpable(mm)) {
+	if (mm->core_state || !__get_dumpable(cprm.mm_flags)) {
 		up_write(&mm->mmap_sem);
 		put_cred(cred);
 		goto fail;
@@ -1828,7 +1839,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 *	process nor do we know its entire history. We only know it
 	 *	was tainted so we dump it as root in mode 2.
 	 */
-	if (get_dumpable(mm) == 2) {	/* Setuid core dump mode */
+	if (__get_dumpable(cprm.mm_flags) == 2) {
+		/* Setuid core dump mode */
 		flag = O_EXCL;		/* Stop rewrite attacks */
 		cred->fsuid = 0;	/* Dump root private */
 	}
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 89c6249fc561..c809e286d213 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -74,6 +74,7 @@ struct coredump_params {
 	struct pt_regs *regs;
 	struct file *file;
 	unsigned long limit;
+	unsigned long mm_flags;
 };
 
 /*
-- 
cgit v1.2.3


From 57205026da070b59e9546df352fe465f1aeacf99 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 5 Mar 2010 13:44:25 -0800
Subject: mc13783: rename mc13783_{{un,}mask,ack_irq} to have a mc13783_irq
 prefix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the source file group these functions together.

The mc13783 header file provides fallback implementations for the old
names to prevent build failures.  When all users of the old names are
fixed to use the new names these can go away.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Paul Gortmaker <p_gortmaker@yahoo.com>
Cc: Valentin Longchamp <valentin.longchamp@epfl.ch>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Luotao Fu <l.fu@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/mc13783-core.c  | 38 +++++++++++++++++++-------------------
 include/linux/mfd/mc13783.h | 24 +++++++++++++++++++++---
 2 files changed, 40 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c
index 735c8a4d164f..a2bd44552042 100644
--- a/drivers/mfd/mc13783-core.c
+++ b/drivers/mfd/mc13783-core.c
@@ -225,7 +225,7 @@ int mc13783_reg_rmw(struct mc13783 *mc13783, unsigned int offset,
 }
 EXPORT_SYMBOL(mc13783_reg_rmw);
 
-int mc13783_mask(struct mc13783 *mc13783, int irq)
+int mc13783_irq_mask(struct mc13783 *mc13783, int irq)
 {
 	int ret;
 	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
@@ -245,9 +245,9 @@ int mc13783_mask(struct mc13783 *mc13783, int irq)
 
 	return mc13783_reg_write(mc13783, offmask, mask | irqbit);
 }
-EXPORT_SYMBOL(mc13783_mask);
+EXPORT_SYMBOL(mc13783_irq_mask);
 
-int mc13783_unmask(struct mc13783 *mc13783, int irq)
+int mc13783_irq_unmask(struct mc13783 *mc13783, int irq)
 {
 	int ret;
 	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
@@ -267,7 +267,18 @@ int mc13783_unmask(struct mc13783 *mc13783, int irq)
 
 	return mc13783_reg_write(mc13783, offmask, mask & ~irqbit);
 }
-EXPORT_SYMBOL(mc13783_unmask);
+EXPORT_SYMBOL(mc13783_irq_unmask);
+
+int mc13783_irq_ack(struct mc13783 *mc13783, int irq)
+{
+	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
+	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
+
+	BUG_ON(irq < 0 || irq >= MC13783_NUM_IRQ);
+
+	return mc13783_reg_write(mc13783, offstat, val);
+}
+EXPORT_SYMBOL(mc13783_irq_ack);
 
 int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
 		irq_handler_t handler, const char *name, void *dev)
@@ -297,7 +308,7 @@ int mc13783_irq_request(struct mc13783 *mc13783, int irq,
 	if (ret)
 		return ret;
 
-	ret = mc13783_unmask(mc13783, irq);
+	ret = mc13783_irq_unmask(mc13783, irq);
 	if (ret) {
 		mc13783->irqhandler[irq] = NULL;
 		mc13783->irqdata[irq] = NULL;
@@ -317,7 +328,7 @@ int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev)
 			mc13783->irqdata[irq] != dev)
 		return -EINVAL;
 
-	ret = mc13783_mask(mc13783, irq);
+	ret = mc13783_irq_mask(mc13783, irq);
 	if (ret)
 		return ret;
 
@@ -333,17 +344,6 @@ static inline irqreturn_t mc13783_irqhandler(struct mc13783 *mc13783, int irq)
 	return mc13783->irqhandler[irq](irq, mc13783->irqdata[irq]);
 }
 
-int mc13783_ackirq(struct mc13783 *mc13783, int irq)
-{
-	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
-	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
-
-	BUG_ON(irq < 0 || irq >= MC13783_NUM_IRQ);
-
-	return mc13783_reg_write(mc13783, offstat, val);
-}
-EXPORT_SYMBOL(mc13783_ackirq);
-
 /*
  * returns: number of handled irqs or negative error
  * locking: holds mc13783->lock
@@ -422,7 +422,7 @@ static irqreturn_t mc13783_handler_adcdone(int irq, void *data)
 {
 	struct mc13783_adcdone_data *adcdone_data = data;
 
-	mc13783_ackirq(adcdone_data->mc13783, irq);
+	mc13783_irq_ack(adcdone_data->mc13783, irq);
 
 	complete_all(&adcdone_data->done);
 
@@ -486,7 +486,7 @@ int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 	dev_dbg(&mc13783->spidev->dev, "%s: request irq\n", __func__);
 	mc13783_irq_request(mc13783, MC13783_IRQ_ADCDONE,
 			mc13783_handler_adcdone, __func__, &adcdone_data);
-	mc13783_ackirq(mc13783, MC13783_IRQ_ADCDONE);
+	mc13783_irq_ack(mc13783, MC13783_IRQ_ADCDONE);
 
 	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, adc0);
 	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, adc1);
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index 94cb51a64037..b8b9f3b4f3e2 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -26,10 +26,28 @@ int mc13783_irq_request(struct mc13783 *mc13783, int irq,
 int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
 		irq_handler_t handler, const char *name, void *dev);
 int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev);
-int mc13783_ackirq(struct mc13783 *mc13783, int irq);
 
-int mc13783_mask(struct mc13783 *mc13783, int irq);
-int mc13783_unmask(struct mc13783 *mc13783, int irq);
+int mc13783_irq_mask(struct mc13783 *mc13783, int irq);
+int mc13783_irq_unmask(struct mc13783 *mc13783, int irq);
+int mc13783_irq_ack(struct mc13783 *mc13783, int irq);
+
+static inline int mc13783_mask(struct mc13783 *mc13783, int irq) __deprecated;
+static inline int mc13783_mask(struct mc13783 *mc13783, int irq)
+{
+	return mc13783_irq_mask(mc13783, irq);
+}
+
+static inline int mc13783_unmask(struct mc13783 *mc13783, int irq) __deprecated;
+static inline int mc13783_unmask(struct mc13783 *mc13783, int irq)
+{
+	return mc13783_irq_unmask(mc13783, irq);
+}
+
+static inline int mc13783_ackirq(struct mc13783 *mc13783, int irq) __deprecated;
+static inline int mc13783_ackirq(struct mc13783 *mc13783, int irq)
+{
+	return mc13783_irq_ack(mc13783, irq);
+}
 
 #define MC13783_ADC0		43
 #define MC13783_ADC0_ADREFEN		(1 << 10)
-- 
cgit v1.2.3


From 86c3400810a7a33e176bf33b6b074d881e829374 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 5 Mar 2010 13:44:29 -0800
Subject: mfd/mc13783: new function reading irq mask and status register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver for the mc13783 rtc needs to know if the TODA irq is pending.

Instead of tracking in the rtc driver if the irq is enabled provide that
information, too.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Paul Gortmaker <p_gortmaker@yahoo.com>
Cc: Valentin Longchamp <valentin.longchamp@epfl.ch>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Luotao Fu <l.fu@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/mc13783-core.c  | 35 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/mc13783.h |  2 ++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c
index a2bd44552042..62a847e4c2d8 100644
--- a/drivers/mfd/mc13783-core.c
+++ b/drivers/mfd/mc13783-core.c
@@ -269,6 +269,41 @@ int mc13783_irq_unmask(struct mc13783 *mc13783, int irq)
 }
 EXPORT_SYMBOL(mc13783_irq_unmask);
 
+int mc13783_irq_status(struct mc13783 *mc13783, int irq,
+		int *enabled, int *pending)
+{
+	int ret;
+	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
+	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
+	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
+
+	if (irq < 0 || irq >= MC13783_NUM_IRQ)
+		return -EINVAL;
+
+	if (enabled) {
+		u32 mask;
+
+		ret = mc13783_reg_read(mc13783, offmask, &mask);
+		if (ret)
+			return ret;
+
+		*enabled = mask & irqbit;
+	}
+
+	if (pending) {
+		u32 stat;
+
+		ret = mc13783_reg_read(mc13783, offstat, &stat);
+		if (ret)
+			return ret;
+
+		*pending = stat & irqbit;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13783_irq_status);
+
 int mc13783_irq_ack(struct mc13783 *mc13783, int irq)
 {
 	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index b8b9f3b4f3e2..8895d9d8879c 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -29,6 +29,8 @@ int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev);
 
 int mc13783_irq_mask(struct mc13783 *mc13783, int irq);
 int mc13783_irq_unmask(struct mc13783 *mc13783, int irq);
+int mc13783_irq_status(struct mc13783 *mc13783, int irq,
+		int *enabled, int *pending);
 int mc13783_irq_ack(struct mc13783 *mc13783, int irq);
 
 static inline int mc13783_mask(struct mc13783 *mc13783, int irq) __deprecated;
-- 
cgit v1.2.3


From e952805d2d2e706aed182723e5ab3ec0b1f91de3 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 5 Mar 2010 13:44:33 -0800
Subject: gpio: add driver for MAX7300 I2C GPIO extender

Add the MAX7300-I2C variant of the MAX7301-SPI version.  Both chips share
the same core logic, so the generic part of the in-kernel SPI-driver is
refactored into a generic part.  The I2C and SPI specific funtions are
then wrapped into seperate drivers picking up the generic part.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Juergen Beisert <j.beisert@pengutronix.de>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |  13 +-
 drivers/gpio/Makefile       |   2 +
 drivers/gpio/max7300.c      |  94 ++++++++++++++
 drivers/gpio/max7301.c      | 293 +++++---------------------------------------
 drivers/gpio/max730x.c      | 244 ++++++++++++++++++++++++++++++++++++
 include/linux/spi/max7301.h |  18 +++
 6 files changed, 404 insertions(+), 260 deletions(-)
 create mode 100644 drivers/gpio/max7300.c
 create mode 100644 drivers/gpio/max730x.c

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 1f1d88ae68d6..f3549b8779d8 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -65,6 +65,9 @@ config GPIO_SYSFS
 
 # put expanders in the right section, in alphabetical order
 
+config GPIO_MAX730X
+	tristate
+
 comment "Memory mapped GPIO expanders:"
 
 config GPIO_PL061
@@ -87,6 +90,13 @@ config GPIO_VR41XX
 
 comment "I2C GPIO expanders:"
 
+config GPIO_MAX7300
+	tristate "Maxim MAX7300 GPIO expander"
+	depends on I2C
+	select GPIO_MAX730X
+	help
+	  GPIO driver for Maxim MAX7301 I2C-based GPIO expander.
+
 config GPIO_MAX732X
 	tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
 	depends on I2C
@@ -226,8 +236,9 @@ comment "SPI GPIO expanders:"
 config GPIO_MAX7301
 	tristate "Maxim MAX7301 GPIO expander"
 	depends on SPI_MASTER
+	select GPIO_MAX730X
 	help
-	  gpio driver for Maxim MAX7301 SPI GPIO expander.
+	  GPIO driver for Maxim MAX7301 SPI-based GPIO expander.
 
 config GPIO_MCP23S08
 	tristate "Microchip MCP23S08 I/O expander"
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 48687238edb1..508a1b202cdb 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -7,6 +7,8 @@ obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
 obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
+obj-$(CONFIG_GPIO_MAX730X)	+= max730x.o
+obj-$(CONFIG_GPIO_MAX7300)	+= max7300.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MC33880)	+= mc33880.o
diff --git a/drivers/gpio/max7300.c b/drivers/gpio/max7300.c
new file mode 100644
index 000000000000..9d74eef1157a
--- /dev/null
+++ b/drivers/gpio/max7300.c
@@ -0,0 +1,94 @@
+/*
+ * drivers/gpio/max7300.c
+ *
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Check max730x.c for further details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/i2c.h>
+#include <linux/spi/max7301.h>
+
+static int max7300_i2c_write(struct device *dev, unsigned int reg,
+				unsigned int val)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static int max7300_i2c_read(struct device *dev, unsigned int reg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return i2c_smbus_read_byte_data(client, reg);
+}
+
+static int __devinit max7300_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct max7301 *ts;
+	int ret;
+
+	if (!i2c_check_functionality(client->adapter,
+			I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	ts = kzalloc(sizeof(struct max7301), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ts->read = max7300_i2c_read;
+	ts->write = max7300_i2c_write;
+	ts->dev = &client->dev;
+
+	ret = __max730x_probe(ts);
+	if (ret)
+		kfree(ts);
+	return ret;
+}
+
+static int __devexit max7300_remove(struct i2c_client *client)
+{
+	return __max730x_remove(&client->dev);
+}
+
+static const struct i2c_device_id max7300_id[] = {
+	{ "max7300", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max7300_id);
+
+static struct i2c_driver max7300_driver = {
+	.driver = {
+		.name = "max7300",
+		.owner = THIS_MODULE,
+	},
+	.probe = max7300_probe,
+	.remove = __devexit_p(max7300_remove),
+	.id_table = max7300_id,
+};
+
+static int __init max7300_init(void)
+{
+	return i2c_add_driver(&max7300_driver);
+}
+subsys_initcall(max7300_init);
+
+static void __exit max7300_exit(void)
+{
+	i2c_del_driver(&max7300_driver);
+}
+module_exit(max7300_exit);
+
+MODULE_AUTHOR("Wolfram Sang");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MAX7300 GPIO-Expander");
diff --git a/drivers/gpio/max7301.c b/drivers/gpio/max7301.c
index 480956f1ca50..965d9b1ea13e 100644
--- a/drivers/gpio/max7301.c
+++ b/drivers/gpio/max7301.c
@@ -1,98 +1,41 @@
-/**
+/*
  * drivers/gpio/max7301.c
  *
  * Copyright (C) 2006 Juergen Beisert, Pengutronix
  * Copyright (C) 2008 Guennadi Liakhovetski, Pengutronix
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * The Maxim's MAX7301 device is an SPI driven GPIO expander. There are
- * 28 GPIOs. 8 of them can trigger an interrupt. See datasheet for more
- * details
- * Note:
- * - DIN must be stable at the rising edge of clock.
- * - when writing:
- *   - always clock in 16 clocks at once
- *   - at DIN: D15 first, D0 last
- *   - D0..D7 = databyte, D8..D14 = commandbyte
- *   - D15 = low -> write command
- * - when reading
- *   - always clock in 16 clocks at once
- *   - at DIN: D15 first, D0 last
- *   - D0..D7 = dummy, D8..D14 = register address
- *   - D15 = high -> read command
- *   - raise CS and assert it again
- *   - always clock in 16 clocks at once
- *   - at DOUT: D15 first, D0 last
- *   - D0..D7 contains the data from the first cycle
- *
- * The driver exports a standard gpiochip interface
+ * Check max730x.c for further details.
  */
 
+#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/max7301.h>
-#include <linux/gpio.h>
-
-#define DRIVER_NAME "max7301"
-
-/*
- * Pin configurations, see MAX7301 datasheet page 6
- */
-#define PIN_CONFIG_MASK 0x03
-#define PIN_CONFIG_IN_PULLUP 0x03
-#define PIN_CONFIG_IN_WO_PULLUP 0x02
-#define PIN_CONFIG_OUT 0x01
-
-#define PIN_NUMBER 28
-
-
-/*
- * Some registers must be read back to modify.
- * To save time we cache them here in memory
- */
-struct max7301 {
-	struct mutex	lock;
-	u8		port_config[8];	/* field 0 is unused */
-	u32		out_level;	/* cached output levels */
-	struct gpio_chip chip;
-	struct spi_device *spi;
-};
 
-/**
- * max7301_write - Write a new register content
- * @spi: The SPI device
- * @reg: Register offset
- * @val: Value to write
- *
- * A write to the MAX7301 means one message with one transfer
- *
- * Returns 0 if successful or a negative value on error
- */
-static int max7301_write(struct spi_device *spi, unsigned int reg, unsigned int val)
+/* A write to the MAX7301 means one message with one transfer */
+static int max7301_spi_write(struct device *dev, unsigned int reg,
+				unsigned int val)
 {
+	struct spi_device *spi = to_spi_device(dev);
 	u16 word = ((reg & 0x7F) << 8) | (val & 0xFF);
+
 	return spi_write(spi, (const u8 *)&word, sizeof(word));
 }
 
-/**
- * max7301_read - Read back register content
- * @spi: The SPI device
- * @reg: Register offset
- *
- * A read from the MAX7301 means two transfers; here, one message each
- *
- * Returns positive 8 bit value from device if successful or a
- * negative value on error
- */
-static int max7301_read(struct spi_device *spi, unsigned int reg)
+/* A read from the MAX7301 means two transfers; here, one message each */
+
+static int max7301_spi_read(struct device *dev, unsigned int reg)
 {
 	int ret;
 	u16 word;
+	struct spi_device *spi = to_spi_device(dev);
 
 	word = 0x8000 | (reg << 8);
 	ret = spi_write(spi, (const u8 *)&word, sizeof(word));
@@ -108,125 +51,13 @@ static int max7301_read(struct spi_device *spi, unsigned int reg)
 	return word & 0xff;
 }
 
-static int max7301_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	struct max7301 *ts = container_of(chip, struct max7301, chip);
-	u8 *config;
-	int ret;
-
-	/* First 4 pins are unused in the controller */
-	offset += 4;
-
-	config = &ts->port_config[offset >> 2];
-
-	mutex_lock(&ts->lock);
-
-	/* Standard GPIO API doesn't support pull-ups, has to be extended.
-	 * Hard-coding no pollup for now. */
-	*config = (*config & ~(3 << (offset & 3))) | (1 << (offset & 3));
-
-	ret = max7301_write(ts->spi, 0x08 + (offset >> 2), *config);
-
-	mutex_unlock(&ts->lock);
-
-	return ret;
-}
-
-static int __max7301_set(struct max7301 *ts, unsigned offset, int value)
-{
-	if (value) {
-		ts->out_level |= 1 << offset;
-		return max7301_write(ts->spi, 0x20 + offset, 0x01);
-	} else {
-		ts->out_level &= ~(1 << offset);
-		return max7301_write(ts->spi, 0x20 + offset, 0x00);
-	}
-}
-
-static int max7301_direction_output(struct gpio_chip *chip, unsigned offset,
-				    int value)
-{
-	struct max7301 *ts = container_of(chip, struct max7301, chip);
-	u8 *config;
-	int ret;
-
-	/* First 4 pins are unused in the controller */
-	offset += 4;
-
-	config = &ts->port_config[offset >> 2];
-
-	mutex_lock(&ts->lock);
-
-	*config = (*config & ~(3 << (offset & 3))) | (1 << (offset & 3));
-
-	ret = __max7301_set(ts, offset, value);
-
-	if (!ret)
-		ret = max7301_write(ts->spi, 0x08 + (offset >> 2), *config);
-
-	mutex_unlock(&ts->lock);
-
-	return ret;
-}
-
-static int max7301_get(struct gpio_chip *chip, unsigned offset)
-{
-	struct max7301 *ts = container_of(chip, struct max7301, chip);
-	int config, level = -EINVAL;
-
-	/* First 4 pins are unused in the controller */
-	offset += 4;
-
-	mutex_lock(&ts->lock);
-
-	config = (ts->port_config[offset >> 2] >> ((offset & 3) * 2)) & 3;
-
-	switch (config) {
-	case 1:
-		/* Output: return cached level */
-		level =  !!(ts->out_level & (1 << offset));
-		break;
-	case 2:
-	case 3:
-		/* Input: read out */
-		level = max7301_read(ts->spi, 0x20 + offset) & 0x01;
-	}
-	mutex_unlock(&ts->lock);
-
-	return level;
-}
-
-static void max7301_set(struct gpio_chip *chip, unsigned offset, int value)
-{
-	struct max7301 *ts = container_of(chip, struct max7301, chip);
-
-	/* First 4 pins are unused in the controller */
-	offset += 4;
-
-	mutex_lock(&ts->lock);
-
-	__max7301_set(ts, offset, value);
-
-	mutex_unlock(&ts->lock);
-}
-
 static int __devinit max7301_probe(struct spi_device *spi)
 {
 	struct max7301 *ts;
-	struct max7301_platform_data *pdata;
-	int i, ret;
-
-	pdata = spi->dev.platform_data;
-	if (!pdata || !pdata->base) {
-		dev_dbg(&spi->dev, "incorrect or missing platform data\n");
-		return -EINVAL;
-	}
+	int ret;
 
-	/*
-	 * bits_per_word cannot be configured in platform data
-	 */
+	/* bits_per_word cannot be configured in platform data */
 	spi->bits_per_word = 16;
-
 	ret = spi_setup(spi);
 	if (ret < 0)
 		return ret;
@@ -235,90 +66,35 @@ static int __devinit max7301_probe(struct spi_device *spi)
 	if (!ts)
 		return -ENOMEM;
 
-	mutex_init(&ts->lock);
-
-	dev_set_drvdata(&spi->dev, ts);
+	ts->read = max7301_spi_read;
+	ts->write = max7301_spi_write;
+	ts->dev = &spi->dev;
 
-	/* Power up the chip and disable IRQ output */
-	max7301_write(spi, 0x04, 0x01);
-
-	ts->spi = spi;
-
-	ts->chip.label = DRIVER_NAME,
-
-	ts->chip.direction_input = max7301_direction_input;
-	ts->chip.get = max7301_get;
-	ts->chip.direction_output = max7301_direction_output;
-	ts->chip.set = max7301_set;
-
-	ts->chip.base = pdata->base;
-	ts->chip.ngpio = PIN_NUMBER;
-	ts->chip.can_sleep = 1;
-	ts->chip.dev = &spi->dev;
-	ts->chip.owner = THIS_MODULE;
-
-	/*
-	 * tristate all pins in hardware and cache the
-	 * register values for later use.
-	 */
-	for (i = 1; i < 8; i++) {
-		int j;
-		/* 0xAA means input with internal pullup disabled */
-		max7301_write(spi, 0x08 + i, 0xAA);
-		ts->port_config[i] = 0xAA;
-		for (j = 0; j < 4; j++) {
-			int offset = (i - 1) * 4 + j;
-			ret = max7301_direction_input(&ts->chip, offset);
-			if (ret)
-				goto exit_destroy;
-		}
-	}
-
-	ret = gpiochip_add(&ts->chip);
+	ret = __max730x_probe(ts);
 	if (ret)
-		goto exit_destroy;
-
-	return ret;
-
-exit_destroy:
-	dev_set_drvdata(&spi->dev, NULL);
-	mutex_destroy(&ts->lock);
-	kfree(ts);
+		kfree(ts);
 	return ret;
 }
 
 static int __devexit max7301_remove(struct spi_device *spi)
 {
-	struct max7301 *ts;
-	int ret;
-
-	ts = dev_get_drvdata(&spi->dev);
-	if (ts == NULL)
-		return -ENODEV;
-
-	dev_set_drvdata(&spi->dev, NULL);
-
-	/* Power down the chip and disable IRQ output */
-	max7301_write(spi, 0x04, 0x00);
-
-	ret = gpiochip_remove(&ts->chip);
-	if (!ret) {
-		mutex_destroy(&ts->lock);
-		kfree(ts);
-	} else
-		dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n",
-			ret);
-
-	return ret;
+	return __max730x_remove(&spi->dev);
 }
 
+static const struct spi_device_id max7301_id[] = {
+	{ "max7301", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, max7301_id);
+
 static struct spi_driver max7301_driver = {
 	.driver = {
-		.name		= DRIVER_NAME,
-		.owner		= THIS_MODULE,
+		.name = "max7301",
+		.owner = THIS_MODULE,
 	},
-	.probe		= max7301_probe,
-	.remove		= __devexit_p(max7301_remove),
+	.probe = max7301_probe,
+	.remove = __devexit_p(max7301_remove),
+	.id_table = max7301_id,
 };
 
 static int __init max7301_init(void)
@@ -336,7 +112,6 @@ static void __exit max7301_exit(void)
 }
 module_exit(max7301_exit);
 
-MODULE_AUTHOR("Juergen Beisert");
+MODULE_AUTHOR("Juergen Beisert, Wolfram Sang");
 MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("MAX7301 SPI based GPIO-Expander");
-MODULE_ALIAS("spi:" DRIVER_NAME);
+MODULE_DESCRIPTION("MAX7301 GPIO-Expander");
diff --git a/drivers/gpio/max730x.c b/drivers/gpio/max730x.c
new file mode 100644
index 000000000000..c9bced55f82b
--- /dev/null
+++ b/drivers/gpio/max730x.c
@@ -0,0 +1,244 @@
+/**
+ * drivers/gpio/max7301.c
+ *
+ * Copyright (C) 2006 Juergen Beisert, Pengutronix
+ * Copyright (C) 2008 Guennadi Liakhovetski, Pengutronix
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * The Maxim MAX7300/1 device is an I2C/SPI driven GPIO expander. There are
+ * 28 GPIOs. 8 of them can trigger an interrupt. See datasheet for more
+ * details
+ * Note:
+ * - DIN must be stable at the rising edge of clock.
+ * - when writing:
+ *   - always clock in 16 clocks at once
+ *   - at DIN: D15 first, D0 last
+ *   - D0..D7 = databyte, D8..D14 = commandbyte
+ *   - D15 = low -> write command
+ * - when reading
+ *   - always clock in 16 clocks at once
+ *   - at DIN: D15 first, D0 last
+ *   - D0..D7 = dummy, D8..D14 = register address
+ *   - D15 = high -> read command
+ *   - raise CS and assert it again
+ *   - always clock in 16 clocks at once
+ *   - at DOUT: D15 first, D0 last
+ *   - D0..D7 contains the data from the first cycle
+ *
+ * The driver exports a standard gpiochip interface
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/spi/max7301.h>
+#include <linux/gpio.h>
+
+/*
+ * Pin configurations, see MAX7301 datasheet page 6
+ */
+#define PIN_CONFIG_MASK 0x03
+#define PIN_CONFIG_IN_PULLUP 0x03
+#define PIN_CONFIG_IN_WO_PULLUP 0x02
+#define PIN_CONFIG_OUT 0x01
+
+#define PIN_NUMBER 28
+
+static int max7301_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct max7301 *ts = container_of(chip, struct max7301, chip);
+	u8 *config;
+	u8 offset_bits;
+	int ret;
+
+	/* First 4 pins are unused in the controller */
+	offset += 4;
+	offset_bits = (offset & 3) << 1;
+
+	config = &ts->port_config[offset >> 2];
+
+	mutex_lock(&ts->lock);
+
+	/* Standard GPIO API doesn't support pull-ups, has to be extended.
+	 * Hard-coding no pollup for now. */
+	*config = (*config & ~(PIN_CONFIG_MASK << offset_bits))
+			   | (PIN_CONFIG_IN_WO_PULLUP << offset_bits);
+
+	ret = ts->write(ts->dev, 0x08 + (offset >> 2), *config);
+
+	mutex_unlock(&ts->lock);
+
+	return ret;
+}
+
+static int __max7301_set(struct max7301 *ts, unsigned offset, int value)
+{
+	if (value) {
+		ts->out_level |= 1 << offset;
+		return ts->write(ts->dev, 0x20 + offset, 0x01);
+	} else {
+		ts->out_level &= ~(1 << offset);
+		return ts->write(ts->dev, 0x20 + offset, 0x00);
+	}
+}
+
+static int max7301_direction_output(struct gpio_chip *chip, unsigned offset,
+				    int value)
+{
+	struct max7301 *ts = container_of(chip, struct max7301, chip);
+	u8 *config;
+	u8 offset_bits;
+	int ret;
+
+	/* First 4 pins are unused in the controller */
+	offset += 4;
+	offset_bits = (offset & 3) << 1;
+
+	config = &ts->port_config[offset >> 2];
+
+	mutex_lock(&ts->lock);
+
+	*config = (*config & ~(PIN_CONFIG_MASK << offset_bits))
+			   | (PIN_CONFIG_OUT << offset_bits);
+
+	ret = __max7301_set(ts, offset, value);
+
+	if (!ret)
+		ret = ts->write(ts->dev, 0x08 + (offset >> 2), *config);
+
+	mutex_unlock(&ts->lock);
+
+	return ret;
+}
+
+static int max7301_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct max7301 *ts = container_of(chip, struct max7301, chip);
+	int config, level = -EINVAL;
+
+	/* First 4 pins are unused in the controller */
+	offset += 4;
+
+	mutex_lock(&ts->lock);
+
+	config = (ts->port_config[offset >> 2] >> ((offset & 3) << 1))
+			& PIN_CONFIG_MASK;
+
+	switch (config) {
+	case PIN_CONFIG_OUT:
+		/* Output: return cached level */
+		level =  !!(ts->out_level & (1 << offset));
+		break;
+	case PIN_CONFIG_IN_WO_PULLUP:
+	case PIN_CONFIG_IN_PULLUP:
+		/* Input: read out */
+		level = ts->read(ts->dev, 0x20 + offset) & 0x01;
+	}
+	mutex_unlock(&ts->lock);
+
+	return level;
+}
+
+static void max7301_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct max7301 *ts = container_of(chip, struct max7301, chip);
+
+	/* First 4 pins are unused in the controller */
+	offset += 4;
+
+	mutex_lock(&ts->lock);
+
+	__max7301_set(ts, offset, value);
+
+	mutex_unlock(&ts->lock);
+}
+
+int __devinit __max730x_probe(struct max7301 *ts)
+{
+	struct device *dev = ts->dev;
+	struct max7301_platform_data *pdata;
+	int i, ret;
+
+	pdata = dev->platform_data;
+	if (!pdata || !pdata->base) {
+		dev_err(dev, "incorrect or missing platform data\n");
+		return -EINVAL;
+	}
+
+	mutex_init(&ts->lock);
+	dev_set_drvdata(dev, ts);
+
+	/* Power up the chip and disable IRQ output */
+	ts->write(dev, 0x04, 0x01);
+
+	ts->chip.label = dev->driver->name;
+
+	ts->chip.direction_input = max7301_direction_input;
+	ts->chip.get = max7301_get;
+	ts->chip.direction_output = max7301_direction_output;
+	ts->chip.set = max7301_set;
+
+	ts->chip.base = pdata->base;
+	ts->chip.ngpio = PIN_NUMBER;
+	ts->chip.can_sleep = 1;
+	ts->chip.dev = dev;
+	ts->chip.owner = THIS_MODULE;
+
+	/*
+	 * tristate all pins in hardware and cache the
+	 * register values for later use.
+	 */
+	for (i = 1; i < 8; i++) {
+		int j;
+		/* 0xAA means input with internal pullup disabled */
+		ts->write(dev, 0x08 + i, 0xAA);
+		ts->port_config[i] = 0xAA;
+		for (j = 0; j < 4; j++) {
+			int offset = (i - 1) * 4 + j;
+			ret = max7301_direction_input(&ts->chip, offset);
+			if (ret)
+				goto exit_destroy;
+		}
+	}
+
+	ret = gpiochip_add(&ts->chip);
+	if (ret)
+		goto exit_destroy;
+
+	return ret;
+
+exit_destroy:
+	dev_set_drvdata(dev, NULL);
+	mutex_destroy(&ts->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__max730x_probe);
+
+int __devexit __max730x_remove(struct device *dev)
+{
+	struct max7301 *ts = dev_get_drvdata(dev);
+	int ret;
+
+	if (ts == NULL)
+		return -ENODEV;
+
+	dev_set_drvdata(dev, NULL);
+
+	/* Power down the chip and disable IRQ output */
+	ts->write(dev, 0x04, 0x00);
+
+	ret = gpiochip_remove(&ts->chip);
+	if (!ret) {
+		mutex_destroy(&ts->lock);
+		kfree(ts);
+	} else
+		dev_err(dev, "Failed to remove GPIO controller: %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__max730x_remove);
diff --git a/include/linux/spi/max7301.h b/include/linux/spi/max7301.h
index 6dfd83f19b4b..34af0a3477bf 100644
--- a/include/linux/spi/max7301.h
+++ b/include/linux/spi/max7301.h
@@ -1,9 +1,27 @@
 #ifndef LINUX_SPI_MAX7301_H
 #define LINUX_SPI_MAX7301_H
 
+#include <linux/gpio.h>
+
+/*
+ * Some registers must be read back to modify.
+ * To save time we cache them here in memory
+ */
+struct max7301 {
+	struct mutex	lock;
+	u8		port_config[8];	/* field 0 is unused */
+	u32		out_level;	/* cached output levels */
+	struct gpio_chip chip;
+	struct device *dev;
+	int (*write)(struct device *dev, unsigned int reg, unsigned int val);
+	int (*read)(struct device *dev, unsigned int reg);
+};
+
 struct max7301_platform_data {
 	/* number assigned to the first GPIO */
 	unsigned	base;
 };
 
+extern int __max730x_remove(struct device *dev);
+extern int __max730x_probe(struct max7301 *ts);
 #endif
-- 
cgit v1.2.3


From 62fecb70cfaa9b4c6aa1981acd53b18f4ad925f0 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Fri, 5 Mar 2010 13:44:34 -0800
Subject: pca953x: minor include cleanup

linux/i2c/pca953x.h is a very bare include file.  Fix check for multiple
includes of linux/i2c/pca953x.h, and add dependent includes into the
header file.

Signed-off-by: Olof Johansson <olof@lixom.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/i2c/pca953x.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c/pca953x.h b/include/linux/i2c/pca953x.h
index 81736d6a8db7..29699f8dc5a4 100644
--- a/include/linux/i2c/pca953x.h
+++ b/include/linux/i2c/pca953x.h
@@ -1,3 +1,9 @@
+#ifndef _LINUX_PCA953X_H
+#define _LINUX_PCA953X_H
+
+#include <linux/types.h>
+#include <linux/i2c.h>
+
 /* platform data for the PCA9539 16-bit I/O expander driver */
 
 struct pca953x_platform_data {
@@ -17,3 +23,5 @@ struct pca953x_platform_data {
 				void *context);
 	char		**names;
 };
+
+#endif /* _LINUX_PCA953X_H */
-- 
cgit v1.2.3


From 89ea8bbe9c3eb2ea0cb57a4ecf283cab7326f0b0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@misterjones.org>
Date: Fri, 5 Mar 2010 13:44:36 -0800
Subject: gpio: pca953x.c: add interrupt handling capability

Most of the GPIO expanders controlled by the pca953x driver are able to
report changes on the input pins through an *INT pin.

This patch implements the irq_chip functionality (edge detection only).

The driver has been tested on an Arcom Zeus.

[akpm@linux-foundation.org: the compiler does inlining for us nowadays]
Signed-off-by: Marc Zyngier <maz@misterjones.org>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: David Brownell <david-b@pacbell.net>
Cc: Nate Case <ncase@xes-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   7 ++
 drivers/gpio/pca953x.c      | 249 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/i2c/pca953x.h |   3 +
 3 files changed, 247 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index f3549b8779d8..c5cc7d9d88e3 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -134,6 +134,13 @@ config GPIO_PCA953X
 	  This driver can also be built as a module.  If so, the module
 	  will be called pca953x.
 
+config GPIO_PCA953X_IRQ
+	bool "Interrupt controller support for PCA953x"
+	depends on GPIO_PCA953X=y
+	help
+	  Say yes here to enable the pca953x to be used as an interrupt
+	  controller. It requires the driver to be built in the kernel.
+
 config GPIO_PCF857X
 	tristate "PCF857x, PCA{85,96}7x, and MAX732[89] I2C GPIO expanders"
 	depends on I2C
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 6a2fb3fbb3d9..ab5daab14bc2 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -14,6 +14,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/i2c.h>
 #include <linux/i2c/pca953x.h>
 #ifdef CONFIG_OF_GPIO
@@ -26,23 +28,28 @@
 #define PCA953X_INVERT         2
 #define PCA953X_DIRECTION      3
 
+#define PCA953X_GPIOS	       0x00FF
+#define PCA953X_INT	       0x0100
+
 static const struct i2c_device_id pca953x_id[] = {
-	{ "pca9534", 8, },
-	{ "pca9535", 16, },
+	{ "pca9534", 8  | PCA953X_INT, },
+	{ "pca9535", 16 | PCA953X_INT, },
 	{ "pca9536", 4, },
-	{ "pca9537", 4, },
-	{ "pca9538", 8, },
-	{ "pca9539", 16, },
-	{ "pca9554", 8, },
-	{ "pca9555", 16, },
+	{ "pca9537", 4  | PCA953X_INT, },
+	{ "pca9538", 8  | PCA953X_INT, },
+	{ "pca9539", 16 | PCA953X_INT, },
+	{ "pca9554", 8  | PCA953X_INT, },
+	{ "pca9555", 16 | PCA953X_INT, },
 	{ "pca9556", 8, },
 	{ "pca9557", 8, },
 
 	{ "max7310", 8, },
-	{ "max7315", 8, },
-	{ "pca6107", 8, },
-	{ "tca6408", 8, },
-	{ "tca6416", 16, },
+	{ "max7312", 16 | PCA953X_INT, },
+	{ "max7313", 16 | PCA953X_INT, },
+	{ "max7315", 8  | PCA953X_INT, },
+	{ "pca6107", 8  | PCA953X_INT, },
+	{ "tca6408", 8  | PCA953X_INT, },
+	{ "tca6416", 16 | PCA953X_INT, },
 	/* NYET:  { "tca6424", 24, }, */
 	{ }
 };
@@ -53,6 +60,15 @@ struct pca953x_chip {
 	uint16_t reg_output;
 	uint16_t reg_direction;
 
+#ifdef CONFIG_GPIO_PCA953X_IRQ
+	struct mutex irq_lock;
+	uint16_t irq_mask;
+	uint16_t irq_stat;
+	uint16_t irq_trig_raise;
+	uint16_t irq_trig_fall;
+	int	 irq_base;
+#endif
+
 	struct i2c_client *client;
 	struct pca953x_platform_data *dyn_pdata;
 	struct gpio_chip gpio_chip;
@@ -202,6 +218,210 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
 	gc->names = chip->names;
 }
 
+#ifdef CONFIG_GPIO_PCA953X_IRQ
+static int pca953x_gpio_to_irq(struct gpio_chip *gc, unsigned off)
+{
+	struct pca953x_chip *chip;
+
+	chip = container_of(gc, struct pca953x_chip, gpio_chip);
+	return chip->irq_base + off;
+}
+
+static void pca953x_irq_mask(unsigned int irq)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+
+	chip->irq_mask &= ~(1 << (irq - chip->irq_base));
+}
+
+static void pca953x_irq_unmask(unsigned int irq)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+
+	chip->irq_mask |= 1 << (irq - chip->irq_base);
+}
+
+static void pca953x_irq_bus_lock(unsigned int irq)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+
+	mutex_lock(&chip->irq_lock);
+}
+
+static void pca953x_irq_bus_sync_unlock(unsigned int irq)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+
+	mutex_unlock(&chip->irq_lock);
+}
+
+static int pca953x_irq_set_type(unsigned int irq, unsigned int type)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+	uint16_t level = irq - chip->irq_base;
+	uint16_t mask = 1 << level;
+
+	if (!(type & IRQ_TYPE_EDGE_BOTH)) {
+		dev_err(&chip->client->dev, "irq %d: unsupported type %d\n",
+			irq, type);
+		return -EINVAL;
+	}
+
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		chip->irq_trig_fall |= mask;
+	else
+		chip->irq_trig_fall &= ~mask;
+
+	if (type & IRQ_TYPE_EDGE_RISING)
+		chip->irq_trig_raise |= mask;
+	else
+		chip->irq_trig_raise &= ~mask;
+
+	return pca953x_gpio_direction_input(&chip->gpio_chip, level);
+}
+
+static struct irq_chip pca953x_irq_chip = {
+	.name			= "pca953x",
+	.mask			= pca953x_irq_mask,
+	.unmask			= pca953x_irq_unmask,
+	.bus_lock		= pca953x_irq_bus_lock,
+	.bus_sync_unlock	= pca953x_irq_bus_sync_unlock,
+	.set_type		= pca953x_irq_set_type,
+};
+
+static uint16_t pca953x_irq_pending(struct pca953x_chip *chip)
+{
+	uint16_t cur_stat;
+	uint16_t old_stat;
+	uint16_t pending;
+	uint16_t trigger;
+	int ret;
+
+	ret = pca953x_read_reg(chip, PCA953X_INPUT, &cur_stat);
+	if (ret)
+		return 0;
+
+	/* Remove output pins from the equation */
+	cur_stat &= chip->reg_direction;
+
+	old_stat = chip->irq_stat;
+	trigger = (cur_stat ^ old_stat) & chip->irq_mask;
+
+	if (!trigger)
+		return 0;
+
+	chip->irq_stat = cur_stat;
+
+	pending = (old_stat & chip->irq_trig_fall) |
+		  (cur_stat & chip->irq_trig_raise);
+	pending &= trigger;
+
+	return pending;
+}
+
+static irqreturn_t pca953x_irq_handler(int irq, void *devid)
+{
+	struct pca953x_chip *chip = devid;
+	uint16_t pending;
+	uint16_t level;
+
+	pending = pca953x_irq_pending(chip);
+
+	if (!pending)
+		return IRQ_HANDLED;
+
+	do {
+		level = __ffs(pending);
+		handle_nested_irq(level + chip->irq_base);
+
+		pending &= ~(1 << level);
+	} while (pending);
+
+	return IRQ_HANDLED;
+}
+
+static int pca953x_irq_setup(struct pca953x_chip *chip,
+			     const struct i2c_device_id *id)
+{
+	struct i2c_client *client = chip->client;
+	struct pca953x_platform_data *pdata = client->dev.platform_data;
+	int ret;
+
+	if (pdata->irq_base && (id->driver_data & PCA953X_INT)) {
+		int lvl;
+
+		ret = pca953x_read_reg(chip, PCA953X_INPUT,
+				       &chip->irq_stat);
+		if (ret)
+			goto out_failed;
+
+		/*
+		 * There is no way to know which GPIO line generated the
+		 * interrupt.  We have to rely on the previous read for
+		 * this purpose.
+		 */
+		chip->irq_stat &= chip->reg_direction;
+		chip->irq_base = pdata->irq_base;
+		mutex_init(&chip->irq_lock);
+
+		for (lvl = 0; lvl < chip->gpio_chip.ngpio; lvl++) {
+			int irq = lvl + chip->irq_base;
+
+			set_irq_chip_data(irq, chip);
+			set_irq_chip_and_handler(irq, &pca953x_irq_chip,
+						 handle_edge_irq);
+			set_irq_nested_thread(irq, 1);
+#ifdef CONFIG_ARM
+			set_irq_flags(irq, IRQF_VALID);
+#else
+			set_irq_noprobe(irq);
+#endif
+		}
+
+		ret = request_threaded_irq(client->irq,
+					   NULL,
+					   pca953x_irq_handler,
+					   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					   dev_name(&client->dev), chip);
+		if (ret) {
+			dev_err(&client->dev, "failed to request irq %d\n",
+				client->irq);
+			goto out_failed;
+		}
+
+		chip->gpio_chip.to_irq = pca953x_gpio_to_irq;
+	}
+
+	return 0;
+
+out_failed:
+	chip->irq_base = 0;
+	return ret;
+}
+
+static void pca953x_irq_teardown(struct pca953x_chip *chip)
+{
+	if (chip->irq_base)
+		free_irq(chip->client->irq, chip);
+}
+#else /* CONFIG_GPIO_PCA953X_IRQ */
+static int pca953x_irq_setup(struct pca953x_chip *chip,
+			     const struct i2c_device_id *id)
+{
+	struct i2c_client *client = chip->client;
+	struct pca953x_platform_data *pdata = client->dev.platform_data;
+
+	if (pdata->irq_base && (id->driver_data & PCA953X_INT))
+		dev_warn(&client->dev, "interrupt support not compiled in\n");
+
+	return 0;
+}
+
+static void pca953x_irq_teardown(struct pca953x_chip *chip)
+{
+}
+#endif
+
 /*
  * Handlers for alternative sources of platform_data
  */
@@ -286,7 +506,7 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 	/* initialize cached registers from their original values.
 	 * we can't share this chip with another i2c master.
 	 */
-	pca953x_setup_gpio(chip, id->driver_data);
+	pca953x_setup_gpio(chip, id->driver_data & PCA953X_GPIOS);
 
 	ret = pca953x_read_reg(chip, PCA953X_OUTPUT, &chip->reg_output);
 	if (ret)
@@ -301,6 +521,9 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 	if (ret)
 		goto out_failed;
 
+	ret = pca953x_irq_setup(chip, id);
+	if (ret)
+		goto out_failed;
 
 	ret = gpiochip_add(&chip->gpio_chip);
 	if (ret)
@@ -317,6 +540,7 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 	return 0;
 
 out_failed:
+	pca953x_irq_teardown(chip);
 	kfree(chip->dyn_pdata);
 	kfree(chip);
 	return ret;
@@ -345,6 +569,7 @@ static int pca953x_remove(struct i2c_client *client)
 		return ret;
 	}
 
+	pca953x_irq_teardown(chip);
 	kfree(chip->dyn_pdata);
 	kfree(chip);
 	return 0;
diff --git a/include/linux/i2c/pca953x.h b/include/linux/i2c/pca953x.h
index 29699f8dc5a4..d5c5a60c8a0b 100644
--- a/include/linux/i2c/pca953x.h
+++ b/include/linux/i2c/pca953x.h
@@ -13,6 +13,9 @@ struct pca953x_platform_data {
 	/* initial polarity inversion setting */
 	uint16_t	invert;
 
+	/* interrupt base */
+	int		irq_base;
+
 	void		*context;	/* param to setup/teardown */
 
 	int		(*setup)(struct i2c_client *client,
-- 
cgit v1.2.3


From d690b2cd222afc75320b9b8e9da7df02e9e630ca Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 6 Mar 2010 21:28:37 +0100
Subject: PM: Provide generic subsystem-level callbacks

There are subsystems whose power management callbacks only need to
invoke the callbacks provided by device drivers.  Still, their system
sleep PM callbacks should play well with the runtime PM callbacks,
so that devices suspended at run time can be left in that state for
a system sleep transition.

Provide a set of generic PM callbacks for such subsystems and
define convenience macros for populating dev_pm_ops structures.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/runtime_pm.txt |  65 +++++++++++
 drivers/base/power/Makefile        |   1 +
 drivers/base/power/generic_ops.c   | 233 +++++++++++++++++++++++++++++++++++++
 include/linux/pm.h                 |  51 +++++++-
 include/linux/pm_runtime.h         |   6 +
 5 files changed, 350 insertions(+), 6 deletions(-)
 create mode 100644 drivers/base/power/generic_ops.c

(limited to 'include/linux')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index f19370641684..ab00eeddecaf 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -335,6 +335,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       'power.runtime_error' is set or 'power.disable_depth' is greater than
       zero)
 
+  bool pm_runtime_suspended(struct device *dev);
+    - return true if the device's runtime PM status is 'suspended', or false
+      otherwise
+
   void pm_runtime_allow(struct device *dev);
     - set the power.runtime_auto flag for the device and decrease its usage
       counter (used by the /sys/devices/.../power/control interface to
@@ -459,3 +463,64 @@ The PM core always increments the run-time usage counter before calling the
 ->prepare() callback and decrements it after calling the ->complete() callback.
 Hence disabling run-time PM temporarily like this will not cause any run-time
 suspend callbacks to be lost.
+
+7. Generic subsystem callbacks
+
+Subsystems may wish to conserve code space by using the set of generic power
+management callbacks provided by the PM core, defined in
+driver/base/power/generic_ops.c:
+
+  int pm_generic_runtime_idle(struct device *dev);
+    - invoke the ->runtime_idle() callback provided by the driver of this
+      device, if defined, and call pm_runtime_suspend() for this device if the
+      return value is 0 or the callback is not defined
+
+  int pm_generic_runtime_suspend(struct device *dev);
+    - invoke the ->runtime_suspend() callback provided by the driver of this
+      device and return its result, or return -EINVAL if not defined
+
+  int pm_generic_runtime_resume(struct device *dev);
+    - invoke the ->runtime_resume() callback provided by the driver of this
+      device and return its result, or return -EINVAL if not defined
+
+  int pm_generic_suspend(struct device *dev);
+    - if the device has not been suspended at run time, invoke the ->suspend()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  int pm_generic_resume(struct device *dev);
+    - invoke the ->resume() callback provided by the driver of this device and,
+      if successful, change the device's runtime PM status to 'active'
+
+  int pm_generic_freeze(struct device *dev);
+    - if the device has not been suspended at run time, invoke the ->freeze()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  int pm_generic_thaw(struct device *dev);
+    - if the device has not been suspended at run time, invoke the ->thaw()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  int pm_generic_poweroff(struct device *dev);
+    - if the device has not been suspended at run time, invoke the ->poweroff()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  int pm_generic_restore(struct device *dev);
+    - invoke the ->restore() callback provided by the driver of this device and,
+      if successful, change the device's runtime PM status to 'active'
+
+These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(),
+->runtime_resume(), ->suspend(), ->resume(), ->freeze(), ->thaw(), ->poweroff(),
+or ->restore() callback pointers in the subsystem-level dev_pm_ops structures.
+
+If a subsystem wishes to use all of them at the same time, it can simply assign
+the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its
+dev_pm_ops structure pointer.
+
+Device drivers that wish to use the same function as a system suspend, freeze,
+poweroff and run-time suspend callback, and similarly for system resume, thaw,
+restore, and run-time resume, can achieve this with the help of the
+UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
+last argument to NULL).
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 3ce3519e8f30..89de75325cea 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_PM)	+= sysfs.o
 obj-$(CONFIG_PM_SLEEP)	+= main.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
+obj-$(CONFIG_PM_OPS)	+= generic_ops.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
new file mode 100644
index 000000000000..4b29d4981253
--- /dev/null
+++ b/drivers/base/power/generic_ops.c
@@ -0,0 +1,233 @@
+/*
+ * drivers/base/power/generic_ops.c - Generic PM callbacks for subsystems
+ *
+ * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+
+#ifdef CONFIG_PM_RUNTIME
+/**
+ * pm_generic_runtime_idle - Generic runtime idle callback for subsystems.
+ * @dev: Device to handle.
+ *
+ * If PM operations are defined for the @dev's driver and they include
+ * ->runtime_idle(), execute it and return its error code, if nonzero.
+ * Otherwise, execute pm_runtime_suspend() for the device and return 0.
+ */
+int pm_generic_runtime_idle(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pm && pm->runtime_idle) {
+		int ret = pm->runtime_idle(dev);
+		if (ret)
+			return ret;
+	}
+
+	pm_runtime_suspend(dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_runtime_idle);
+
+/**
+ * pm_generic_runtime_suspend - Generic runtime suspend callback for subsystems.
+ * @dev: Device to suspend.
+ *
+ * If PM operations are defined for the @dev's driver and they include
+ * ->runtime_suspend(), execute it and return its error code.  Otherwise,
+ * return -EINVAL.
+ */
+int pm_generic_runtime_suspend(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int ret;
+
+	ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : -EINVAL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend);
+
+/**
+ * pm_generic_runtime_resume - Generic runtime resume callback for subsystems.
+ * @dev: Device to resume.
+ *
+ * If PM operations are defined for the @dev's driver and they include
+ * ->runtime_resume(), execute it and return its error code.  Otherwise,
+ * return -EINVAL.
+ */
+int pm_generic_runtime_resume(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int ret;
+
+	ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : -EINVAL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
+#endif /* CONFIG_PM_RUNTIME */
+
+#ifdef CONFIG_PM_SLEEP
+/**
+ * __pm_generic_call - Generic suspend/freeze/poweroff/thaw subsystem callback.
+ * @dev: Device to handle.
+ * @event: PM transition of the system under way.
+ *
+ * If the device has not been suspended at run time, execute the
+ * suspend/freeze/poweroff/thaw callback provided by its driver, if defined, and
+ * return its error code.  Otherwise, return zero.
+ */
+static int __pm_generic_call(struct device *dev, int event)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int (*callback)(struct device *);
+
+	if (!pm || pm_runtime_suspended(dev))
+		return 0;
+
+	switch (event) {
+	case PM_EVENT_SUSPEND:
+		callback = pm->suspend;
+		break;
+	case PM_EVENT_FREEZE:
+		callback = pm->freeze;
+		break;
+	case PM_EVENT_HIBERNATE:
+		callback = pm->poweroff;
+		break;
+	case PM_EVENT_THAW:
+		callback = pm->thaw;
+		break;
+	default:
+		callback = NULL;
+		break;
+	}
+
+	return callback ? callback(dev) : 0;
+}
+
+/**
+ * pm_generic_suspend - Generic suspend callback for subsystems.
+ * @dev: Device to suspend.
+ */
+int pm_generic_suspend(struct device *dev)
+{
+	return __pm_generic_call(dev, PM_EVENT_SUSPEND);
+}
+EXPORT_SYMBOL_GPL(pm_generic_suspend);
+
+/**
+ * pm_generic_freeze - Generic freeze callback for subsystems.
+ * @dev: Device to freeze.
+ */
+int pm_generic_freeze(struct device *dev)
+{
+	return __pm_generic_call(dev, PM_EVENT_FREEZE);
+}
+EXPORT_SYMBOL_GPL(pm_generic_freeze);
+
+/**
+ * pm_generic_poweroff - Generic poweroff callback for subsystems.
+ * @dev: Device to handle.
+ */
+int pm_generic_poweroff(struct device *dev)
+{
+	return __pm_generic_call(dev, PM_EVENT_HIBERNATE);
+}
+EXPORT_SYMBOL_GPL(pm_generic_poweroff);
+
+/**
+ * pm_generic_thaw - Generic thaw callback for subsystems.
+ * @dev: Device to thaw.
+ */
+int pm_generic_thaw(struct device *dev)
+{
+	return __pm_generic_call(dev, PM_EVENT_THAW);
+}
+EXPORT_SYMBOL_GPL(pm_generic_thaw);
+
+/**
+ * __pm_generic_resume - Generic resume/restore callback for subsystems.
+ * @dev: Device to handle.
+ * @event: PM transition of the system under way.
+ *
+ * Execute the resume/resotre callback provided by the @dev's driver, if
+ * defined.  If it returns 0, change the device's runtime PM status to 'active'.
+ * Return the callback's error code.
+ */
+static int __pm_generic_resume(struct device *dev, int event)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int (*callback)(struct device *);
+	int ret;
+
+	if (!pm)
+		return 0;
+
+	switch (event) {
+	case PM_EVENT_RESUME:
+		callback = pm->resume;
+		break;
+	case PM_EVENT_RESTORE:
+		callback = pm->restore;
+		break;
+	default:
+		callback = NULL;
+		break;
+	}
+
+	if (!callback)
+		return 0;
+
+	ret = callback(dev);
+	if (!ret) {
+		pm_runtime_disable(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+	}
+
+	return ret;
+}
+
+/**
+ * pm_generic_resume - Generic resume callback for subsystems.
+ * @dev: Device to resume.
+ */
+int pm_generic_resume(struct device *dev)
+{
+	return __pm_generic_resume(dev, PM_EVENT_RESUME);
+}
+EXPORT_SYMBOL_GPL(pm_generic_resume);
+
+/**
+ * pm_generic_restore - Generic restore callback for subsystems.
+ * @dev: Device to restore.
+ */
+int pm_generic_restore(struct device *dev)
+{
+	return __pm_generic_resume(dev, PM_EVENT_RESTORE);
+}
+EXPORT_SYMBOL_GPL(pm_generic_restore);
+#endif /* CONFIG_PM_SLEEP */
+
+struct dev_pm_ops generic_subsys_pm_ops = {
+#ifdef CONFIG_PM_SLEEP
+	.suspend = pm_generic_suspend,
+	.resume = pm_generic_resume,
+	.freeze = pm_generic_freeze,
+	.thaw = pm_generic_thaw,
+	.poweroff = pm_generic_poweroff,
+	.restore = pm_generic_restore,
+#endif
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = pm_generic_runtime_suspend,
+	.runtime_resume = pm_generic_runtime_resume,
+	.runtime_idle = pm_generic_runtime_idle,
+#endif
+};
+EXPORT_SYMBOL_GPL(generic_subsys_pm_ops);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e80df06ad22a..8e258c727971 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -215,20 +215,59 @@ struct dev_pm_ops {
 	int (*runtime_idle)(struct device *dev);
 };
 
+#ifdef CONFIG_PM_SLEEP
+#define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	.suspend = suspend_fn, \
+	.resume = resume_fn, \
+	.freeze = suspend_fn, \
+	.thaw = resume_fn, \
+	.poweroff = suspend_fn, \
+	.restore = resume_fn,
+#else
+#define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
+#endif
+
+#ifdef CONFIG_PM_RUNTIME
+#define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
+	.runtime_suspend = suspend_fn, \
+	.runtime_resume = resume_fn, \
+	.runtime_idle = idle_fn,
+#else
+#define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
+#endif
+
 /*
  * Use this if you want to use the same suspend and resume callbacks for suspend
  * to RAM and hibernation.
  */
 #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
 const struct dev_pm_ops name = { \
-	.suspend = suspend_fn, \
-	.resume = resume_fn, \
-	.freeze = suspend_fn, \
-	.thaw = resume_fn, \
-	.poweroff = suspend_fn, \
-	.restore = resume_fn, \
+	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+}
+
+/*
+ * Use this for defining a set of PM operations to be used in all situations
+ * (sustem suspend, hibernation or runtime PM).
+ */
+#define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+const struct dev_pm_ops name = { \
+	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 }
 
+/*
+ * Use this for subsystems (bus types, device types, device classes) that don't
+ * need any special suspend/resume handling in addition to invoking the PM
+ * callbacks provided by device drivers supporting both the system sleep PM and
+ * runtime PM, make the pm member point to generic_subsys_pm_ops.
+ */
+#ifdef CONFIG_PM_OPS
+extern struct dev_pm_ops generic_subsys_pm_ops;
+#define GENERIC_SUBSYS_PM_OPS	(&generic_subsys_pm_ops)
+#else
+#define GENERIC_SUBSYS_PM_OPS	NULL
+#endif
+
 /**
  * PM_EVENT_ messages
  *
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7d773aac5314..b776db737244 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -62,6 +62,11 @@ static inline void device_set_run_wake(struct device *dev, bool enable)
 	dev->power.run_wake = enable;
 }
 
+static inline bool pm_runtime_suspended(struct device *dev)
+{
+	return dev->power.runtime_status == RPM_SUSPENDED;
+}
+
 #else /* !CONFIG_PM_RUNTIME */
 
 static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; }
@@ -89,6 +94,7 @@ static inline void pm_runtime_get_noresume(struct device *dev) {}
 static inline void pm_runtime_put_noidle(struct device *dev) {}
 static inline bool device_run_wake(struct device *dev) { return false; }
 static inline void device_set_run_wake(struct device *dev, bool enable) {}
+static inline bool pm_runtime_suspended(struct device *dev) { return false; }
 
 #endif /* !CONFIG_PM_RUNTIME */
 
-- 
cgit v1.2.3


From f99344fc69c3df46786a39ea4283a4175ea40b3f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 13:59:07 +0000
Subject: mfd: Add a data argument to the WM8350 IRQ free function

To better match genirq.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-irq.c             |  2 +-
 drivers/power/wm8350_power.c         | 24 ++++++++++++------------
 drivers/regulator/wm8350-regulator.c |  2 +-
 drivers/rtc/rtc-wm8350.c             |  4 ++--
 include/linux/mfd/wm8350/core.h      |  3 ++-
 sound/soc/codecs/wm8350.c            |  4 ++--
 6 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
index 9025f29e2707..655836bc69a0 100644
--- a/drivers/mfd/wm8350-irq.c
+++ b/drivers/mfd/wm8350-irq.c
@@ -451,7 +451,7 @@ int wm8350_register_irq(struct wm8350 *wm8350, int irq,
 }
 EXPORT_SYMBOL_GPL(wm8350_register_irq);
 
-int wm8350_free_irq(struct wm8350 *wm8350, int irq)
+int wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data)
 {
 	if (irq < 0 || irq >= WM8350_NUM_IRQ)
 		return -EINVAL;
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index ad4f071e1287..3839a5e1c4a7 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -428,18 +428,18 @@ static void wm8350_init_charger(struct wm8350 *wm8350)
 
 static void free_charger_irq(struct wm8350 *wm8350)
 {
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
-	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
-	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
-	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_BAT_FB, wm8350);
 }
 
 static __devinit int wm8350_power_probe(struct platform_device *pdev)
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 94227dd6ba7b..723cd1fb4867 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1453,7 +1453,7 @@ static int wm8350_regulator_remove(struct platform_device *pdev)
 	struct regulator_dev *rdev = platform_get_drvdata(pdev);
 	struct wm8350 *wm8350 = rdev_get_drvdata(rdev);
 
-	wm8350_free_irq(wm8350, wm8350_reg[pdev->id].irq);
+	wm8350_free_irq(wm8350, wm8350_reg[pdev->id].irq, rdev);
 
 	regulator_unregister(rdev);
 
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index f1e440521c54..a5512f515998 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -478,8 +478,8 @@ static int __devexit wm8350_rtc_remove(struct platform_device *pdev)
 	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
 	struct wm8350_rtc *wm_rtc = &wm8350->rtc;
 
-	wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC);
-	wm8350_free_irq(wm8350, WM8350_IRQ_RTC_ALM);
+	wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350);
 
 	rtc_device_unregister(wm_rtc->rtc);
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 43868899bf49..8883125ddea1 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -680,7 +680,8 @@ int wm8350_block_write(struct wm8350 *wm8350, int reg, int size, u16 *src);
 int wm8350_register_irq(struct wm8350 *wm8350, int irq,
 			irq_handler_t handler, unsigned long flags,
 			const char *name, void *data);
-int wm8350_free_irq(struct wm8350 *wm8350, int irq);
+int wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data);
+
 int wm8350_mask_irq(struct wm8350 *wm8350, int irq);
 int wm8350_unmask_irq(struct wm8350 *wm8350, int irq);
 int wm8350_irq_init(struct wm8350 *wm8350, int irq,
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index 718ef912e758..079bf745bf05 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -1521,8 +1521,8 @@ static int wm8350_remove(struct platform_device *pdev)
 			  WM8350_JDL_ENA | WM8350_JDR_ENA);
 	wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_4, WM8350_TOCLK_ENA);
 
-	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, priv);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, priv);
 
 	priv->hpl.jack = NULL;
 	priv->hpr.jack = NULL;
-- 
cgit v1.2.3


From 29c71b138c83c8191f1f7e46fcc28b9d6bc8a5dd Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 13:59:08 +0000
Subject: rtc: Suppress duplicate enable/disable of WM8350 update interrupt

Unlike the wm8350-custom code genirq nests enable and disable calls
so we can't just unconditionally mask or unmask the interrupt,
we need to remember the state we set and only mask or unmask when
there is a real change.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: rtc-linux@googlegroups.com
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/rtc/rtc-wm8350.c       | 7 +++++++
 include/linux/mfd/wm8350/rtc.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index a5512f515998..3d0dc76b38af 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -307,11 +307,18 @@ static int wm8350_rtc_update_irq_enable(struct device *dev,
 {
 	struct wm8350 *wm8350 = dev_get_drvdata(dev);
 
+	/* Suppress duplicate changes since genirq nests enable and
+	 * disable calls. */
+	if (enabled == wm8350->rtc.update_enabled)
+		return 0;
+
 	if (enabled)
 		wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC);
 	else
 		wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC);
 
+	wm8350->rtc.update_enabled = enabled;
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/wm8350/rtc.h b/include/linux/mfd/wm8350/rtc.h
index 24add2bef6c9..ebd72ffc62d1 100644
--- a/include/linux/mfd/wm8350/rtc.h
+++ b/include/linux/mfd/wm8350/rtc.h
@@ -263,6 +263,7 @@ struct wm8350_rtc {
 	struct platform_device *pdev;
 	struct rtc_device *rtc;
 	int alarm_enabled;      /* used over suspend/resume */
+	int update_enabled;
 };
 
 #endif
-- 
cgit v1.2.3


From 760e4518788df6762700e6bb9dd8692379f11168 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 13:59:09 +0000
Subject: mfd: Convert WM8350 to genirq

This gives us use of the diagnostic facilities genirq provides and
will allow implementation of interrupt support for the WM8350 GPIOs.
Stub functions are provided to ease the transition of the individual
drivers, probably after additional work to pass the IRQ numbers via
the struct devices.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-irq.c        | 155 ++++++++++++++++++++++------------------
 include/linux/mfd/wm8350/core.h |  44 ++++++++----
 2 files changed, 118 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
index 655836bc69a0..f56c9adf9493 100644
--- a/drivers/mfd/wm8350-irq.c
+++ b/drivers/mfd/wm8350-irq.c
@@ -18,7 +18,7 @@
 #include <linux/bug.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
-#include <linux/workqueue.h>
+#include <linux/irq.h>
 
 #include <linux/mfd/wm8350/core.h>
 #include <linux/mfd/wm8350/audio.h>
@@ -29,8 +29,6 @@
 #include <linux/mfd/wm8350/supply.h>
 #include <linux/mfd/wm8350/wdt.h>
 
-#define WM8350_NUM_IRQ_REGS 7
-
 #define WM8350_INT_OFFSET_1                     0
 #define WM8350_INT_OFFSET_2                     1
 #define WM8350_POWER_UP_INT_OFFSET              2
@@ -366,19 +364,10 @@ static struct wm8350_irq_data wm8350_irqs[] = {
 	},
 };
 
-static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
+static inline struct wm8350_irq_data *irq_to_wm8350_irq(struct wm8350 *wm8350,
+							int irq)
 {
-	mutex_lock(&wm8350->irq_mutex);
-
-	if (wm8350->irq[irq].handler)
-		wm8350->irq[irq].handler(irq, wm8350->irq[irq].data);
-	else {
-		dev_err(wm8350->dev, "irq %d nobody cared. now masked.\n",
-			irq);
-		wm8350_mask_irq(wm8350, irq);
-	}
-
-	mutex_unlock(&wm8350->irq_mutex);
+	return &wm8350_irqs[irq - wm8350->irq_base];
 }
 
 /*
@@ -386,7 +375,9 @@ static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
  * interrupts are clear on read the IRQ line will be reasserted and
  * the physical IRQ will be handled again if another interrupt is
  * asserted while we run - in the normal course of events this is a
- * rare occurrence so we save I2C/SPI reads.
+ * rare occurrence so we save I2C/SPI reads.  We're also assuming that
+ * it's rare to get lots of interrupts firing simultaneously so try to
+ * minimise I/O.
  */
 static irqreturn_t wm8350_irq(int irq, void *irq_data)
 {
@@ -397,7 +388,6 @@ static irqreturn_t wm8350_irq(int irq, void *irq_data)
 	struct wm8350_irq_data *data;
 	int i;
 
-	/* TODO: Use block reads to improve performance? */
 	level_one = wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS)
 		& ~wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK);
 
@@ -416,93 +406,101 @@ static irqreturn_t wm8350_irq(int irq, void *irq_data)
 			sub_reg[data->reg] =
 				wm8350_reg_read(wm8350, WM8350_INT_STATUS_1 +
 						data->reg);
-			sub_reg[data->reg] &=
-				~wm8350_reg_read(wm8350,
-						 WM8350_INT_STATUS_1_MASK +
-						 data->reg);
+			sub_reg[data->reg] &= ~wm8350->irq_masks[data->reg];
 			read_done[data->reg] = 1;
 		}
 
 		if (sub_reg[data->reg] & data->mask)
-			wm8350_irq_call_handler(wm8350, i);
+			handle_nested_irq(wm8350->irq_base + i);
 	}
 
 	return IRQ_HANDLED;
 }
 
-int wm8350_register_irq(struct wm8350 *wm8350, int irq,
-			irq_handler_t handler, unsigned long flags,
-			const char *name, void *data)
+static void wm8350_irq_lock(unsigned int irq)
 {
-	if (irq < 0 || irq >= WM8350_NUM_IRQ || !handler)
-		return -EINVAL;
-
-	if (wm8350->irq[irq].handler)
-		return -EBUSY;
-
-	mutex_lock(&wm8350->irq_mutex);
-	wm8350->irq[irq].handler = handler;
-	wm8350->irq[irq].data = data;
-	mutex_unlock(&wm8350->irq_mutex);
-
-	wm8350_unmask_irq(wm8350, irq);
+	struct wm8350 *wm8350 = get_irq_chip_data(irq);
 
-	return 0;
+	mutex_lock(&wm8350->irq_lock);
 }
-EXPORT_SYMBOL_GPL(wm8350_register_irq);
 
-int wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data)
+static void wm8350_irq_sync_unlock(unsigned int irq)
 {
-	if (irq < 0 || irq >= WM8350_NUM_IRQ)
-		return -EINVAL;
+	struct wm8350 *wm8350 = get_irq_chip_data(irq);
+	int i;
 
-	wm8350_mask_irq(wm8350, irq);
+	for (i = 0; i < ARRAY_SIZE(wm8350->irq_masks); i++) {
+		/* If there's been a change in the mask write it back
+		 * to the hardware. */
+		if (wm8350->irq_masks[i] !=
+		    wm8350->reg_cache[WM8350_INT_STATUS_1_MASK + i])
+			WARN_ON(wm8350_reg_write(wm8350,
+					 WM8350_INT_STATUS_1_MASK + i,
+						 wm8350->irq_masks[i]));
+	}
 
-	mutex_lock(&wm8350->irq_mutex);
-	wm8350->irq[irq].handler = NULL;
-	mutex_unlock(&wm8350->irq_mutex);
-	return 0;
+	mutex_unlock(&wm8350->irq_lock);
 }
-EXPORT_SYMBOL_GPL(wm8350_free_irq);
 
-int wm8350_mask_irq(struct wm8350 *wm8350, int irq)
+static void wm8350_irq_enable(unsigned int irq)
 {
-	return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK +
-			       wm8350_irqs[irq].reg,
-			       wm8350_irqs[irq].mask);
+	struct wm8350 *wm8350 = get_irq_chip_data(irq);
+	struct wm8350_irq_data *irq_data = irq_to_wm8350_irq(wm8350, irq);
+
+	wm8350->irq_masks[irq_data->reg] &= ~irq_data->mask;
 }
-EXPORT_SYMBOL_GPL(wm8350_mask_irq);
 
-int wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
+static void wm8350_irq_disable(unsigned int irq)
 {
-	return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK +
-				 wm8350_irqs[irq].reg,
-				 wm8350_irqs[irq].mask);
+	struct wm8350 *wm8350 = get_irq_chip_data(irq);
+	struct wm8350_irq_data *irq_data = irq_to_wm8350_irq(wm8350, irq);
+
+	wm8350->irq_masks[irq_data->reg] |= irq_data->mask;
 }
-EXPORT_SYMBOL_GPL(wm8350_unmask_irq);
+
+static struct irq_chip wm8350_irq_chip = {
+	.name = "wm8350",
+	.bus_lock = wm8350_irq_lock,
+	.bus_sync_unlock = wm8350_irq_sync_unlock,
+	.disable = wm8350_irq_disable,
+	.enable = wm8350_irq_enable,
+};
 
 int wm8350_irq_init(struct wm8350 *wm8350, int irq,
 		    struct wm8350_platform_data *pdata)
 {
-	int ret;
+	int ret, cur_irq, i;
 	int flags = IRQF_ONESHOT;
 
 	if (!irq) {
-		dev_err(wm8350->dev, "No IRQ configured\n");
-		return -EINVAL;
+		dev_warn(wm8350->dev, "No interrupt support, no core IRQ\n");
+		return 0;
+	}
+
+	if (!pdata || !pdata->irq_base) {
+		dev_warn(wm8350->dev, "No interrupt support, no IRQ base\n");
+		return 0;
 	}
 
+	/* Mask top level interrupts */
 	wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_INT_STATUS_1_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_INT_STATUS_2_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_GPIO_INT_STATUS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK, 0xFFFF);
 
-	mutex_init(&wm8350->irq_mutex);
+	/* Mask all individual interrupts by default and cache the
+	 * masks.  We read the masks back since there are unwritable
+	 * bits in the mask registers. */
+	for (i = 0; i < ARRAY_SIZE(wm8350->irq_masks); i++) {
+		wm8350_reg_write(wm8350, WM8350_INT_STATUS_1_MASK + i,
+				 0xFFFF);
+		wm8350->irq_masks[i] =
+			wm8350_reg_read(wm8350,
+					WM8350_INT_STATUS_1_MASK + i);
+	}
+
+	mutex_init(&wm8350->irq_lock);
 	wm8350->chip_irq = irq;
+	wm8350->irq_base = pdata->irq_base;
 
-	if (pdata && pdata->irq_high) {
+	if (pdata->irq_high) {
 		flags |= IRQF_TRIGGER_HIGH;
 
 		wm8350_set_bits(wm8350, WM8350_SYSTEM_CONTROL_1,
@@ -514,11 +512,32 @@ int wm8350_irq_init(struct wm8350 *wm8350, int irq,
 				  WM8350_IRQ_POL);
 	}
 
+	/* Register with genirq */
+	for (cur_irq = wm8350->irq_base;
+	     cur_irq < ARRAY_SIZE(wm8350_irqs) + wm8350->irq_base;
+	     cur_irq++) {
+		set_irq_chip_data(cur_irq, wm8350);
+		set_irq_chip_and_handler(cur_irq, &wm8350_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(cur_irq, 1);
+
+		/* ARM needs us to explicitly flag the IRQ as valid
+		 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+		set_irq_flags(cur_irq, IRQF_VALID);
+#else
+		set_irq_noprobe(cur_irq);
+#endif
+	}
+
 	ret = request_threaded_irq(irq, NULL, wm8350_irq, flags,
 				   "wm8350", wm8350);
 	if (ret != 0)
 		dev_err(wm8350->dev, "Failed to request IRQ: %d\n", ret);
 
+	/* Allow interrupts to fire */
+	wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0);
+
 	return ret;
 }
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 8883125ddea1..04217a71f173 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -579,6 +579,8 @@
 
 #define WM8350_NUM_IRQ				63
 
+#define WM8350_NUM_IRQ_REGS 7
+
 struct wm8350_reg_access {
 	u16 readable;		/* Mask of readable bits */
 	u16 writable;		/* Mask of writable bits */
@@ -600,11 +602,6 @@ extern const u16 wm8352_mode3_defaults[];
 
 struct wm8350;
 
-struct wm8350_irq {
-	irq_handler_t handler;
-	void *data;
-};
-
 struct wm8350_hwmon {
 	struct platform_device *pdev;
 	struct device *classdev;
@@ -626,9 +623,10 @@ struct wm8350 {
 	struct mutex auxadc_mutex;
 
 	/* Interrupt handling */
-	struct mutex irq_mutex; /* IRQ table mutex */
-	struct wm8350_irq irq[WM8350_NUM_IRQ];
+	struct mutex irq_lock;
 	int chip_irq;
+	int irq_base;
+	u16 irq_masks[WM8350_NUM_IRQ_REGS];
 
 	/* Client devices */
 	struct wm8350_codec codec;
@@ -677,13 +675,33 @@ int wm8350_block_write(struct wm8350 *wm8350, int reg, int size, u16 *src);
 /*
  * WM8350 internal interrupts
  */
-int wm8350_register_irq(struct wm8350 *wm8350, int irq,
-			irq_handler_t handler, unsigned long flags,
-			const char *name, void *data);
-int wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data);
+static inline int wm8350_register_irq(struct wm8350 *wm8350, int irq,
+				      irq_handler_t handler,
+				      unsigned long flags,
+				      const char *name, void *data)
+{
+	if (!wm8350->irq_base)
+		return -ENODEV;
+
+	return request_threaded_irq(irq + wm8350->irq_base, NULL,
+				    handler, flags, name, data);
+}
+
+static inline void wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data)
+{
+	free_irq(irq + wm8350->irq_base, data);
+}
+
+static inline void wm8350_mask_irq(struct wm8350 *wm8350, int irq)
+{
+	disable_irq(irq + wm8350->irq_base);
+}
+
+static inline void wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
+{
+	enable_irq(irq + wm8350->irq_base);
+}
 
-int wm8350_mask_irq(struct wm8350 *wm8350, int irq);
-int wm8350_unmask_irq(struct wm8350 *wm8350, int irq);
 int wm8350_irq_init(struct wm8350 *wm8350, int irq,
 		    struct wm8350_platform_data *pdata);
 int wm8350_irq_exit(struct wm8350 *wm8350);
-- 
cgit v1.2.3


From 38f6ce45f0bca04ac653c57cacd375c469995321 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 7 Jan 2010 16:10:08 +0000
Subject: gpiolib: Add support for WM8350 GPIO controller

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/Kconfig            |   7 ++
 drivers/gpio/Makefile           |   1 +
 drivers/gpio/wm8350-gpiolib.c   | 181 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8350/core.h |   2 +
 include/linux/mfd/wm8350/gpio.h |   1 +
 5 files changed, 192 insertions(+)
 create mode 100644 drivers/gpio/wm8350-gpiolib.c

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index a4cdbd51b1c6..acac9f60db1d 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -185,6 +185,13 @@ config GPIO_WM831X
 	  Say yes here to access the GPIO signals of WM831x power management
 	  chips from Wolfson Microelectronics.
 
+config GPIO_WM8350
+	tristate "WM8350 GPIOs"
+	depends on MFD_WM8350
+	help
+	  Say yes here to access the GPIO signals of WM8350 power management
+	  chips from Wolfson Microelectronics.
+
 config GPIO_ADP5520
 	tristate "GPIO Support for ADP5520 PMIC"
 	depends on PMIC_ADP5520
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 128abf8a98da..90b0880923de 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
 obj-$(CONFIG_GPIO_IT8761E)	+= it8761e_gpio.o
 obj-$(CONFIG_GPIO_VR41XX)	+= vr41xx_giu.o
 obj-$(CONFIG_GPIO_WM831X)	+= wm831x-gpio.o
+obj-$(CONFIG_GPIO_WM8350)	+= wm8350-gpiolib.o
diff --git a/drivers/gpio/wm8350-gpiolib.c b/drivers/gpio/wm8350-gpiolib.c
new file mode 100644
index 000000000000..511840d1c7ba
--- /dev/null
+++ b/drivers/gpio/wm8350-gpiolib.c
@@ -0,0 +1,181 @@
+/*
+ * wm835x-gpiolib.c  --  gpiolib support for Wolfson WM835x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/mfd/core.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+
+#include <linux/mfd/wm8350/core.h>
+#include <linux/mfd/wm8350/gpio.h>
+
+struct wm8350_gpio_data {
+	struct wm8350 *wm8350;
+	struct gpio_chip gpio_chip;
+};
+
+static inline struct wm8350_gpio_data *to_wm8350_gpio(struct gpio_chip *chip)
+{
+	return container_of(chip, struct wm8350_gpio_data, gpio_chip);
+}
+
+static int wm8350_gpio_direction_in(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+
+	return wm8350_set_bits(wm8350, WM8350_GPIO_CONFIGURATION_I_O,
+			       1 << offset);
+}
+
+static int wm8350_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+	int ret;
+
+	ret = wm8350_reg_read(wm8350, WM8350_GPIO_LEVEL);
+	if (ret < 0)
+		return ret;
+
+	if (ret & (1 << offset))
+		return 1;
+	else
+		return 0;
+}
+
+static void wm8350_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+
+	if (value)
+		wm8350_set_bits(wm8350, WM8350_GPIO_LEVEL, 1 << offset);
+	else
+		wm8350_clear_bits(wm8350, WM8350_GPIO_LEVEL, 1 << offset);
+}
+
+static int wm8350_gpio_direction_out(struct gpio_chip *chip,
+				     unsigned offset, int value)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+	int ret;
+
+	ret = wm8350_clear_bits(wm8350, WM8350_GPIO_CONFIGURATION_I_O,
+				1 << offset);
+	if (ret < 0)
+		return ret;
+
+	/* Don't have an atomic direction/value setup */
+	wm8350_gpio_set(chip, offset, value);
+
+	return 0;
+}
+
+static int wm8350_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+
+	if (!wm8350->irq_base)
+		return -EINVAL;
+
+	return wm8350->irq_base + WM8350_IRQ_GPIO(offset);
+}
+
+static struct gpio_chip template_chip = {
+	.label			= "wm8350",
+	.owner			= THIS_MODULE,
+	.direction_input	= wm8350_gpio_direction_in,
+	.get			= wm8350_gpio_get,
+	.direction_output	= wm8350_gpio_direction_out,
+	.set			= wm8350_gpio_set,
+	.to_irq			= wm8350_gpio_to_irq,
+	.can_sleep		= 1,
+};
+
+static int __devinit wm8350_gpio_probe(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(pdev->dev.parent);
+	struct wm8350_platform_data *pdata = wm8350->dev->platform_data;
+	struct wm8350_gpio_data *wm8350_gpio;
+	int ret;
+
+	wm8350_gpio = kzalloc(sizeof(*wm8350_gpio), GFP_KERNEL);
+	if (wm8350_gpio == NULL)
+		return -ENOMEM;
+
+	wm8350_gpio->wm8350 = wm8350;
+	wm8350_gpio->gpio_chip = template_chip;
+	wm8350_gpio->gpio_chip.ngpio = 13;
+	wm8350_gpio->gpio_chip.dev = &pdev->dev;
+	if (pdata && pdata->gpio_base)
+		wm8350_gpio->gpio_chip.base = pdata->gpio_base;
+	else
+		wm8350_gpio->gpio_chip.base = -1;
+
+	ret = gpiochip_add(&wm8350_gpio->gpio_chip);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Could not register gpiochip, %d\n",
+			ret);
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, wm8350_gpio);
+
+	return ret;
+
+err:
+	kfree(wm8350_gpio);
+	return ret;
+}
+
+static int __devexit wm8350_gpio_remove(struct platform_device *pdev)
+{
+	struct wm8350_gpio_data *wm8350_gpio = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&wm8350_gpio->gpio_chip);
+	if (ret == 0)
+		kfree(wm8350_gpio);
+
+	return ret;
+}
+
+static struct platform_driver wm8350_gpio_driver = {
+	.driver.name	= "wm8350-gpio",
+	.driver.owner	= THIS_MODULE,
+	.probe		= wm8350_gpio_probe,
+	.remove		= __devexit_p(wm8350_gpio_remove),
+};
+
+static int __init wm8350_gpio_init(void)
+{
+	return platform_driver_register(&wm8350_gpio_driver);
+}
+subsys_initcall(wm8350_gpio_init);
+
+static void __exit wm8350_gpio_exit(void)
+{
+	platform_driver_unregister(&wm8350_gpio_driver);
+}
+module_exit(wm8350_gpio_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("GPIO interface for WM8350 PMICs");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm8350-gpio");
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 04217a71f173..fae08aa65413 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -645,11 +645,13 @@ struct wm8350 {
  *        used by the platform to configure GPIO functions and similar.
  * @irq_high: Set if WM8350 IRQ is active high.
  * @irq_base: Base IRQ for genirq (not currently used).
+ * @gpio_base: Base for gpiolib.
  */
 struct wm8350_platform_data {
 	int (*init)(struct wm8350 *wm8350);
 	int irq_high;
 	int irq_base;
+	int gpio_base;
 };
 
 
diff --git a/include/linux/mfd/wm8350/gpio.h b/include/linux/mfd/wm8350/gpio.h
index 71af3d6ebe9d..d657bcd6d955 100644
--- a/include/linux/mfd/wm8350/gpio.h
+++ b/include/linux/mfd/wm8350/gpio.h
@@ -29,6 +29,7 @@
 #define WM8350_GPIO_FUNCTION_SELECT_2           0x8D
 #define WM8350_GPIO_FUNCTION_SELECT_3           0x8E
 #define WM8350_GPIO_FUNCTION_SELECT_4           0x8F
+#define WM8350_GPIO_LEVEL			0xE6
 
 /*
  * GPIO Functions
-- 
cgit v1.2.3


From 0df883df8e8aea79b501f6262b595e66dec175dc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Fri, 8 Jan 2010 10:44:16 +0100
Subject: mfd: Convert AB3100 driver to threaded IRQ

This converts the AB3100 core MFD driver to use a threaded
interrupt handler instead of the explicit top/bottom-half
construction with a workqueue. This saves some code and make it
more similar to other modern MFD drivers.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3100-core.c  | 43 +++++++++++++------------------------------
 include/linux/mfd/ab3100.h |  3 ---
 2 files changed, 13 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index fd42a80e7bf9..aa3824a1b4f2 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -365,10 +365,13 @@ int ab3100_event_registers_startup_state_get(struct ab3100 *ab3100,
 }
 EXPORT_SYMBOL(ab3100_event_registers_startup_state_get);
 
-/* Interrupt handling worker */
-static void ab3100_work(struct work_struct *work)
+/*
+ * This is a threaded interrupt handler so we can make some
+ * I2C calls etc.
+ */
+static irqreturn_t ab3100_irq_handler(int irq, void *data)
 {
-	struct ab3100 *ab3100 = container_of(work, struct ab3100, work);
+	struct ab3100 *ab3100 = data;
 	u8 event_regs[3];
 	u32 fatevent;
 	int err;
@@ -376,7 +379,7 @@ static void ab3100_work(struct work_struct *work)
 	err = ab3100_get_register_page_interruptible(ab3100, AB3100_EVENTA1,
 				       event_regs, 3);
 	if (err)
-		goto err_event_wq;
+		goto err_event;
 
 	fatevent = (event_regs[0] << 16) |
 		(event_regs[1] << 8) |
@@ -398,29 +401,11 @@ static void ab3100_work(struct work_struct *work)
 	dev_dbg(ab3100->dev,
 		"IRQ Event: 0x%08x\n", fatevent);
 
-	/* By now the IRQ should be acked and deasserted so enable it again */
-	enable_irq(ab3100->i2c_client->irq);
-	return;
+	return IRQ_HANDLED;
 
- err_event_wq:
+ err_event:
 	dev_dbg(ab3100->dev,
-		"error in event workqueue\n");
-	/* Enable the IRQ anyway, what choice do we have? */
-	enable_irq(ab3100->i2c_client->irq);
-	return;
-}
-
-static irqreturn_t ab3100_irq_handler(int irq, void *data)
-{
-	struct ab3100 *ab3100 = data;
-	/*
-	 * Disable the IRQ and dispatch a worker to handle the
-	 * event. Since the chip resides on I2C this is slow
-	 * stuff and we will re-enable the interrupts once th
-	 * worker has finished.
-	 */
-	disable_irq_nosync(irq);
-	schedule_work(&ab3100->work);
+		"error reading event status\n");
 	return IRQ_HANDLED;
 }
 
@@ -904,12 +889,10 @@ static int __init ab3100_probe(struct i2c_client *client,
 	if (err)
 		goto exit_no_setup;
 
-	INIT_WORK(&ab3100->work, ab3100_work);
-
 	/* This real unpredictable IRQ is of course sampled for entropy */
-	err = request_irq(client->irq, ab3100_irq_handler,
-			  IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
-			  "AB3100 IRQ", ab3100);
+	err = request_threaded_irq(client->irq, NULL, ab3100_irq_handler,
+			  IRQF_ONESHOT,
+			  "ab3100-core", ab3100);
 	if (err)
 		goto exit_no_irq;
 
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
index e9aa4c9d749d..9a881c305a50 100644
--- a/include/linux/mfd/ab3100.h
+++ b/include/linux/mfd/ab3100.h
@@ -6,7 +6,6 @@
  */
 
 #include <linux/device.h>
-#include <linux/workqueue.h>
 #include <linux/regulator/machine.h>
 
 #ifndef MFD_AB3100_H
@@ -74,7 +73,6 @@
  * @testreg_client: secondary client for test registers
  * @chip_name: name of this chip variant
  * @chip_id: 8 bit chip ID for this chip variant
- * @work: an event handling worker
  * @event_subscribers: event subscribers are listed here
  * @startup_events: a copy of the first reading of the event registers
  * @startup_events_read: whether the first events have been read
@@ -90,7 +88,6 @@ struct ab3100 {
 	struct i2c_client *testreg_client;
 	char chip_name[32];
 	u8 chip_id;
-	struct work_struct work;
 	struct blocking_notifier_head event_subscribers;
 	u32 startup_events;
 	bool startup_events_read;
-- 
cgit v1.2.3


From bbd51b1ff1bf57b9ed7f062486a415509968d4d9 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Wed, 6 Jan 2010 17:04:18 -0500
Subject: mfd: Split 88pm8607 driver

Create 88pm8607-i2c driver to support all I2C operation of 88PM8607.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm8607.c       | 302 -------------------------------------------
 drivers/mfd/88pm860x-core.c  | 134 +++++++++++++++++++
 drivers/mfd/88pm860x-i2c.c   | 202 +++++++++++++++++++++++++++++
 drivers/mfd/Makefile         |   2 +-
 include/linux/mfd/88pm8607.h |  12 +-
 5 files changed, 346 insertions(+), 306 deletions(-)
 delete mode 100644 drivers/mfd/88pm8607.c
 create mode 100644 drivers/mfd/88pm860x-core.c
 create mode 100644 drivers/mfd/88pm860x-i2c.c

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm8607.c b/drivers/mfd/88pm8607.c
deleted file mode 100644
index 7e3f65907993..000000000000
--- a/drivers/mfd/88pm8607.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Base driver for Marvell 88PM8607
- *
- * Copyright (C) 2009 Marvell International Ltd.
- * 	Haojian Zhuang <haojian.zhuang@marvell.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/i2c.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/88pm8607.h>
-
-
-#define PM8607_REG_RESOURCE(_start, _end)		\
-{							\
-	.start	= PM8607_##_start,			\
-	.end	= PM8607_##_end,			\
-	.flags	= IORESOURCE_IO,			\
-}
-
-static struct resource pm8607_regulator_resources[] = {
-	PM8607_REG_RESOURCE(BUCK1, BUCK1),
-	PM8607_REG_RESOURCE(BUCK2, BUCK2),
-	PM8607_REG_RESOURCE(BUCK3, BUCK3),
-	PM8607_REG_RESOURCE(LDO1,  LDO1),
-	PM8607_REG_RESOURCE(LDO2,  LDO2),
-	PM8607_REG_RESOURCE(LDO3,  LDO3),
-	PM8607_REG_RESOURCE(LDO4,  LDO4),
-	PM8607_REG_RESOURCE(LDO5,  LDO5),
-	PM8607_REG_RESOURCE(LDO6,  LDO6),
-	PM8607_REG_RESOURCE(LDO7,  LDO7),
-	PM8607_REG_RESOURCE(LDO8,  LDO8),
-	PM8607_REG_RESOURCE(LDO9,  LDO9),
-	PM8607_REG_RESOURCE(LDO10, LDO10),
-	PM8607_REG_RESOURCE(LDO12, LDO12),
-	PM8607_REG_RESOURCE(LDO14, LDO14),
-};
-
-#define PM8607_REG_DEVS(_name, _id)					\
-{									\
-	.name		= "88pm8607-" #_name,				\
-	.num_resources	= 1,						\
-	.resources	= &pm8607_regulator_resources[PM8607_ID_##_id],	\
-}
-
-static struct mfd_cell pm8607_devs[] = {
-	PM8607_REG_DEVS(buck1, BUCK1),
-	PM8607_REG_DEVS(buck2, BUCK2),
-	PM8607_REG_DEVS(buck3, BUCK3),
-	PM8607_REG_DEVS(ldo1,  LDO1),
-	PM8607_REG_DEVS(ldo2,  LDO2),
-	PM8607_REG_DEVS(ldo3,  LDO3),
-	PM8607_REG_DEVS(ldo4,  LDO4),
-	PM8607_REG_DEVS(ldo5,  LDO5),
-	PM8607_REG_DEVS(ldo6,  LDO6),
-	PM8607_REG_DEVS(ldo7,  LDO7),
-	PM8607_REG_DEVS(ldo8,  LDO8),
-	PM8607_REG_DEVS(ldo9,  LDO9),
-	PM8607_REG_DEVS(ldo10, LDO10),
-	PM8607_REG_DEVS(ldo12, LDO12),
-	PM8607_REG_DEVS(ldo14, LDO14),
-};
-
-static inline int pm8607_read_device(struct pm8607_chip *chip,
-				     int reg, int bytes, void *dest)
-{
-	struct i2c_client *i2c = chip->client;
-	unsigned char data;
-	int ret;
-
-	data = (unsigned char)reg;
-	ret = i2c_master_send(i2c, &data, 1);
-	if (ret < 0)
-		return ret;
-
-	ret = i2c_master_recv(i2c, dest, bytes);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-static inline int pm8607_write_device(struct pm8607_chip *chip,
-				      int reg, int bytes, void *src)
-{
-	struct i2c_client *i2c = chip->client;
-	unsigned char buf[bytes + 1];
-	int ret;
-
-	buf[0] = (unsigned char)reg;
-	memcpy(&buf[1], src, bytes);
-
-	ret = i2c_master_send(i2c, buf, bytes + 1);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-int pm8607_reg_read(struct pm8607_chip *chip, int reg)
-{
-	unsigned char data;
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
-	if (ret < 0)
-		return ret;
-	else
-		return (int)data;
-}
-EXPORT_SYMBOL(pm8607_reg_read);
-
-int pm8607_reg_write(struct pm8607_chip *chip, int reg,
-		     unsigned char data)
-{
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->write(chip, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(pm8607_reg_write);
-
-int pm8607_bulk_read(struct pm8607_chip *chip, int reg,
-		     int count, unsigned char *buf)
-{
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(pm8607_bulk_read);
-
-int pm8607_bulk_write(struct pm8607_chip *chip, int reg,
-		      int count, unsigned char *buf)
-{
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->write(chip, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(pm8607_bulk_write);
-
-int pm8607_set_bits(struct pm8607_chip *chip, int reg,
-		    unsigned char mask, unsigned char data)
-{
-	unsigned char value;
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, 1, &value);
-	if (ret < 0)
-		goto out;
-	value &= ~mask;
-	value |= data;
-	ret = chip->write(chip, reg, 1, &value);
-out:
-	mutex_unlock(&chip->io_lock);
-	return ret;
-}
-EXPORT_SYMBOL(pm8607_set_bits);
-
-
-static const struct i2c_device_id pm8607_id_table[] = {
-	{ "88PM8607", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, pm8607_id_table);
-
-
-static int __devinit pm8607_probe(struct i2c_client *client,
-				  const struct i2c_device_id *id)
-{
-	struct pm8607_platform_data *pdata = client->dev.platform_data;
-	struct pm8607_chip *chip;
-	int i, count;
-	int ret;
-
-	chip = kzalloc(sizeof(struct pm8607_chip), GFP_KERNEL);
-	if (chip == NULL)
-		return -ENOMEM;
-
-	chip->client = client;
-	chip->dev = &client->dev;
-	chip->read = pm8607_read_device;
-	chip->write = pm8607_write_device;
-	i2c_set_clientdata(client, chip);
-
-	mutex_init(&chip->io_lock);
-	dev_set_drvdata(chip->dev, chip);
-
-	ret = pm8607_reg_read(chip, PM8607_CHIP_ID);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret);
-		goto out;
-	}
-	if ((ret & CHIP_ID_MASK) == CHIP_ID)
-		dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n",
-			 ret);
-	else {
-		dev_err(chip->dev, "Failed to detect Marvell 88PM8607. "
-			"Chip ID: %02x\n", ret);
-		goto out;
-	}
-	chip->chip_id = ret;
-
-	ret = pm8607_reg_read(chip, PM8607_BUCK3);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to read BUCK3 register: %d\n", ret);
-		goto out;
-	}
-	if (ret & PM8607_BUCK3_DOUBLE)
-		chip->buck3_double = 1;
-
-	ret = pm8607_reg_read(chip, PM8607_MISC1);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
-		goto out;
-	}
-	if (pdata->i2c_port == PI2C_PORT)
-		ret |= PM8607_MISC1_PI2C;
-	else
-		ret &= ~PM8607_MISC1_PI2C;
-	ret = pm8607_reg_write(chip, PM8607_MISC1, ret);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to write MISC1 register: %d\n", ret);
-		goto out;
-	}
-
-
-	count = ARRAY_SIZE(pm8607_devs);
-	for (i = 0; i < count; i++) {
-		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
-				      1, NULL, 0);
-		if (ret != 0) {
-			dev_err(chip->dev, "Failed to add subdevs\n");
-			goto out;
-		}
-	}
-
-	return 0;
-
-out:
-	i2c_set_clientdata(client, NULL);
-	kfree(chip);
-	return ret;
-}
-
-static int __devexit pm8607_remove(struct i2c_client *client)
-{
-	struct pm8607_chip *chip = i2c_get_clientdata(client);
-
-	mfd_remove_devices(chip->dev);
-	kfree(chip);
-	return 0;
-}
-
-static struct i2c_driver pm8607_driver = {
-	.driver	= {
-		.name	= "88PM8607",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= pm8607_probe,
-	.remove		= __devexit_p(pm8607_remove),
-	.id_table	= pm8607_id_table,
-};
-
-static int __init pm8607_init(void)
-{
-	int ret;
-	ret = i2c_add_driver(&pm8607_driver);
-	if (ret != 0)
-		pr_err("Failed to register 88PM8607 I2C driver: %d\n", ret);
-	return ret;
-}
-subsys_initcall(pm8607_init);
-
-static void __exit pm8607_exit(void)
-{
-	i2c_del_driver(&pm8607_driver);
-}
-module_exit(pm8607_exit);
-
-MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM8607");
-MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
new file mode 100644
index 000000000000..d1464e54e656
--- /dev/null
+++ b/drivers/mfd/88pm860x-core.c
@@ -0,0 +1,134 @@
+/*
+ * Base driver for Marvell 88PM8607
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/88pm8607.h>
+
+
+#define PM8607_REG_RESOURCE(_start, _end)		\
+{							\
+	.start	= PM8607_##_start,			\
+	.end	= PM8607_##_end,			\
+	.flags	= IORESOURCE_IO,			\
+}
+
+static struct resource pm8607_regulator_resources[] = {
+	PM8607_REG_RESOURCE(BUCK1, BUCK1),
+	PM8607_REG_RESOURCE(BUCK2, BUCK2),
+	PM8607_REG_RESOURCE(BUCK3, BUCK3),
+	PM8607_REG_RESOURCE(LDO1,  LDO1),
+	PM8607_REG_RESOURCE(LDO2,  LDO2),
+	PM8607_REG_RESOURCE(LDO3,  LDO3),
+	PM8607_REG_RESOURCE(LDO4,  LDO4),
+	PM8607_REG_RESOURCE(LDO5,  LDO5),
+	PM8607_REG_RESOURCE(LDO6,  LDO6),
+	PM8607_REG_RESOURCE(LDO7,  LDO7),
+	PM8607_REG_RESOURCE(LDO8,  LDO8),
+	PM8607_REG_RESOURCE(LDO9,  LDO9),
+	PM8607_REG_RESOURCE(LDO10, LDO10),
+	PM8607_REG_RESOURCE(LDO12, LDO12),
+	PM8607_REG_RESOURCE(LDO14, LDO14),
+};
+
+#define PM8607_REG_DEVS(_name, _id)					\
+{									\
+	.name		= "88pm8607-" #_name,				\
+	.num_resources	= 1,						\
+	.resources	= &pm8607_regulator_resources[PM8607_ID_##_id],	\
+}
+
+static struct mfd_cell pm8607_devs[] = {
+	PM8607_REG_DEVS(buck1, BUCK1),
+	PM8607_REG_DEVS(buck2, BUCK2),
+	PM8607_REG_DEVS(buck3, BUCK3),
+	PM8607_REG_DEVS(ldo1,  LDO1),
+	PM8607_REG_DEVS(ldo2,  LDO2),
+	PM8607_REG_DEVS(ldo3,  LDO3),
+	PM8607_REG_DEVS(ldo4,  LDO4),
+	PM8607_REG_DEVS(ldo5,  LDO5),
+	PM8607_REG_DEVS(ldo6,  LDO6),
+	PM8607_REG_DEVS(ldo7,  LDO7),
+	PM8607_REG_DEVS(ldo8,  LDO8),
+	PM8607_REG_DEVS(ldo9,  LDO9),
+	PM8607_REG_DEVS(ldo10, LDO10),
+	PM8607_REG_DEVS(ldo12, LDO12),
+	PM8607_REG_DEVS(ldo14, LDO14),
+};
+
+int pm860x_device_init(struct pm8607_chip *chip,
+		       struct pm8607_platform_data *pdata)
+{
+	int i, count;
+	int ret;
+
+	ret = pm8607_reg_read(chip, PM8607_CHIP_ID);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret);
+		goto out;
+	}
+	if ((ret & PM8607_ID_MASK) == PM8607_ID)
+		dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n",
+			 ret);
+	else {
+		dev_err(chip->dev, "Failed to detect Marvell 88PM8607. "
+			"Chip ID: %02x\n", ret);
+		goto out;
+	}
+	chip->chip_id = ret;
+
+	ret = pm8607_reg_read(chip, PM8607_BUCK3);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read BUCK3 register: %d\n", ret);
+		goto out;
+	}
+	if (ret & PM8607_BUCK3_DOUBLE)
+		chip->buck3_double = 1;
+
+	ret = pm8607_reg_read(chip, PM8607_MISC1);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
+		goto out;
+	}
+	if (pdata->i2c_port == PI2C_PORT)
+		ret |= PM8607_MISC1_PI2C;
+	else
+		ret &= ~PM8607_MISC1_PI2C;
+	ret = pm8607_reg_write(chip, PM8607_MISC1, ret);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to write MISC1 register: %d\n", ret);
+		goto out;
+	}
+
+	count = ARRAY_SIZE(pm8607_devs);
+	for (i = 0; i < count; i++) {
+		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
+				      1, NULL, 0);
+		if (ret != 0) {
+			dev_err(chip->dev, "Failed to add subdevs\n");
+			goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+void pm8607_device_exit(struct pm8607_chip *chip)
+{
+	mfd_remove_devices(chip->dev);
+}
+
+MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM8607");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c
new file mode 100644
index 000000000000..dda23cbfe415
--- /dev/null
+++ b/drivers/mfd/88pm860x-i2c.c
@@ -0,0 +1,202 @@
+/*
+ * I2C driver for Marvell 88PM8607
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/mfd/88pm8607.h>
+
+static inline int pm8607_read_device(struct pm8607_chip *chip,
+				     int reg, int bytes, void *dest)
+{
+	struct i2c_client *i2c = chip->client;
+	unsigned char data;
+	int ret;
+
+	data = (unsigned char)reg;
+	ret = i2c_master_send(i2c, &data, 1);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static inline int pm8607_write_device(struct pm8607_chip *chip,
+				      int reg, int bytes, void *src)
+{
+	struct i2c_client *i2c = chip->client;
+	unsigned char buf[bytes + 1];
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+
+	ret = i2c_master_send(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int pm8607_reg_read(struct pm8607_chip *chip, int reg)
+{
+	unsigned char data;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return (int)data;
+}
+EXPORT_SYMBOL(pm8607_reg_read);
+
+int pm8607_reg_write(struct pm8607_chip *chip, int reg,
+		     unsigned char data)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->write(chip, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_reg_write);
+
+int pm8607_bulk_read(struct pm8607_chip *chip, int reg,
+		     int count, unsigned char *buf)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_bulk_read);
+
+int pm8607_bulk_write(struct pm8607_chip *chip, int reg,
+		      int count, unsigned char *buf)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->write(chip, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_bulk_write);
+
+int pm8607_set_bits(struct pm8607_chip *chip, int reg,
+		    unsigned char mask, unsigned char data)
+{
+	unsigned char value;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, 1, &value);
+	if (ret < 0)
+		goto out;
+	value &= ~mask;
+	value |= data;
+	ret = chip->write(chip, reg, 1, &value);
+out:
+	mutex_unlock(&chip->io_lock);
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_set_bits);
+
+
+static const struct i2c_device_id pm860x_id_table[] = {
+	{ "88PM8607", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, pm860x_id_table);
+
+static int __devinit pm860x_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct pm8607_platform_data *pdata = client->dev.platform_data;
+	struct pm8607_chip *chip;
+	int ret;
+
+	chip = kzalloc(sizeof(struct pm8607_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	chip->client = client;
+	chip->dev = &client->dev;
+	chip->read = pm8607_read_device;
+	chip->write = pm8607_write_device;
+	memcpy(&chip->id, id, sizeof(struct i2c_device_id));
+	i2c_set_clientdata(client, chip);
+
+	mutex_init(&chip->io_lock);
+	dev_set_drvdata(chip->dev, chip);
+
+	ret = pm860x_device_init(chip, pdata);
+	if (ret < 0)
+		goto out;
+
+
+	return 0;
+
+out:
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return ret;
+}
+
+static int __devexit pm860x_remove(struct i2c_client *client)
+{
+	struct pm8607_chip *chip = i2c_get_clientdata(client);
+
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver pm860x_driver = {
+	.driver	= {
+		.name	= "88PM860x",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pm860x_probe,
+	.remove		= __devexit_p(pm860x_remove),
+	.id_table	= pm860x_id_table,
+};
+
+static int __init pm860x_i2c_init(void)
+{
+	int ret;
+	ret = i2c_add_driver(&pm860x_driver);
+	if (ret != 0)
+		pr_err("Failed to register 88PM860x I2C driver: %d\n", ret);
+	return ret;
+}
+subsys_initcall(pm860x_i2c_init);
+
+static void __exit pm860x_i2c_exit(void)
+{
+	i2c_del_driver(&pm860x_driver);
+}
+module_exit(pm860x_i2c_exit);
+
+MODULE_DESCRIPTION("I2C Driver for Marvell 88PM860x");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 78295d6a75f7..88fa200188cf 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -2,6 +2,7 @@
 # Makefile for multifunction miscellaneous devices
 #
 
+obj-$(CONFIG_MFD_88PM8607)	+= 88pm860x-core.o 88pm860x-i2c.o
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o tmio_core.o
 obj-$(CONFIG_MFD_SH_MOBILE_SDHI)		+= sh_mobile_sdhi.o
@@ -55,5 +56,4 @@ obj-$(CONFIG_AB3100_CORE)	+= ab3100-core.o
 obj-$(CONFIG_AB3100_OTP)	+= ab3100-otp.o
 obj-$(CONFIG_AB4500_CORE)	+= ab4500-core.o
 obj-$(CONFIG_MFD_TIMBERDALE)    += timberdale.o
-obj-$(CONFIG_MFD_88PM8607)	+= 88pm8607.o
 obj-$(CONFIG_PMIC_ADP5520)	+= adp5520.o
diff --git a/include/linux/mfd/88pm8607.h b/include/linux/mfd/88pm8607.h
index f41b428d2cec..6e4dcdca02a8 100644
--- a/include/linux/mfd/88pm8607.h
+++ b/include/linux/mfd/88pm8607.h
@@ -33,8 +33,8 @@ enum {
 	PM8607_ID_RG_MAX,
 };
 
-#define CHIP_ID				(0x40)
-#define CHIP_ID_MASK			(0xF8)
+#define PM8607_ID			(0x40)	/* 8607 chip ID */
+#define PM8607_ID_MASK			(0xF8)	/* 8607 chip ID mask */
 
 /* Interrupt Registers */
 #define PM8607_STATUS_1			(0x01)
@@ -185,6 +185,7 @@ struct pm8607_chip {
 	struct device		*dev;
 	struct mutex		io_lock;
 	struct i2c_client	*client;
+	struct i2c_device_id	id;
 
 	int (*read)(struct pm8607_chip *chip, int reg, int bytes, void *dest);
 	int (*write)(struct pm8607_chip *chip, int reg, int bytes, void *src);
@@ -214,4 +215,9 @@ extern int pm8607_bulk_write(struct pm8607_chip *, int, int,
 			     unsigned char *);
 extern int pm8607_set_bits(struct pm8607_chip *, int, unsigned char,
 			   unsigned char);
-#endif /* __LINUX_MFD_88PM8607_H */
+
+extern int pm860x_device_init(struct pm8607_chip *chip,
+			      struct pm8607_platform_data *pdata);
+extern void pm860x_device_exit(struct pm8607_chip *chip);
+
+#endif /* __LINUX_MFD_88PM860X_H */
-- 
cgit v1.2.3


From 53dbab7af9ca13fa95605e9a5c31bb803dcba363 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 8 Jan 2010 06:01:24 -0500
Subject: mfd: Support 88pm8606 in 860x driver

88PM8606 and 88PM8607 are two discrete chips used for power management.
Hardware designer can use them together or only one of them according to
requirement.

There's some logic tightly linked between these two chips. For example, USB
charger driver needs to access both chips by I2C interface.

Now share one driver to these two devices. Only one I2C client is identified
in platform init data. If another chip is also used, user should mark it in
companion_addr field of platform init data. Then driver could create another
I2C client for the companion chip.

All I2C operations are accessed by 860x-i2c driver. In order to support both
I2C client address, the read/write API is changed in below.

reg_read(client, offset)
reg_write(client, offset, data)

The benefit is that client drivers only need one kind of read/write API. I2C
and MFD driver can be shared in both 8606 and 8607.

Since API is changed, update API in 8607 regulator driver.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-core.c  |  61 ++++++++----
 drivers/mfd/88pm860x-i2c.c   | 172 ++++++++++++++++++++++----------
 drivers/regulator/88pm8607.c |  32 +++---
 include/linux/mfd/88pm8607.h | 223 ------------------------------------------
 include/linux/mfd/88pm860x.h | 227 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 402 insertions(+), 313 deletions(-)
 delete mode 100644 include/linux/mfd/88pm8607.h
 create mode 100644 include/linux/mfd/88pm860x.h

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index d1464e54e656..72b00304dc3a 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/88pm8607.h>
+#include <linux/mfd/88pm860x.h>
 
 
 #define PM8607_REG_RESOURCE(_start, _end)		\
@@ -67,18 +67,23 @@ static struct mfd_cell pm8607_devs[] = {
 	PM8607_REG_DEVS(ldo14, LDO14),
 };
 
-int pm860x_device_init(struct pm8607_chip *chip,
-		       struct pm8607_platform_data *pdata)
+static void device_8606_init(struct pm860x_chip *chip, struct i2c_client *i2c,
+			     struct pm860x_platform_data *pdata)
+{
+}
+
+static void device_8607_init(struct pm860x_chip *chip, struct i2c_client *i2c,
+			     struct pm860x_platform_data *pdata)
 {
 	int i, count;
 	int ret;
 
-	ret = pm8607_reg_read(chip, PM8607_CHIP_ID);
+	ret = pm860x_reg_read(i2c, PM8607_CHIP_ID);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret);
 		goto out;
 	}
-	if ((ret & PM8607_ID_MASK) == PM8607_ID)
+	if ((ret & PM8607_VERSION_MASK) == PM8607_VERSION)
 		dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n",
 			 ret);
 	else {
@@ -86,9 +91,9 @@ int pm860x_device_init(struct pm8607_chip *chip,
 			"Chip ID: %02x\n", ret);
 		goto out;
 	}
-	chip->chip_id = ret;
+	chip->chip_version = ret;
 
-	ret = pm8607_reg_read(chip, PM8607_BUCK3);
+	ret = pm860x_reg_read(i2c, PM8607_BUCK3);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to read BUCK3 register: %d\n", ret);
 		goto out;
@@ -96,20 +101,11 @@ int pm860x_device_init(struct pm8607_chip *chip,
 	if (ret & PM8607_BUCK3_DOUBLE)
 		chip->buck3_double = 1;
 
-	ret = pm8607_reg_read(chip, PM8607_MISC1);
+	ret = pm860x_reg_read(i2c, PM8607_MISC1);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
 		goto out;
 	}
-	if (pdata->i2c_port == PI2C_PORT)
-		ret |= PM8607_MISC1_PI2C;
-	else
-		ret &= ~PM8607_MISC1_PI2C;
-	ret = pm8607_reg_write(chip, PM8607_MISC1, ret);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to write MISC1 register: %d\n", ret);
-		goto out;
-	}
 
 	count = ARRAY_SIZE(pm8607_devs);
 	for (i = 0; i < count; i++) {
@@ -121,14 +117,39 @@ int pm860x_device_init(struct pm8607_chip *chip,
 		}
 	}
 out:
-	return ret;
+	return;
+}
+
+int pm860x_device_init(struct pm860x_chip *chip,
+		       struct pm860x_platform_data *pdata)
+{
+	switch (chip->id) {
+	case CHIP_PM8606:
+		device_8606_init(chip, chip->client, pdata);
+		break;
+	case CHIP_PM8607:
+		device_8607_init(chip, chip->client, pdata);
+		break;
+	}
+
+	if (chip->companion) {
+		switch (chip->id) {
+		case CHIP_PM8607:
+			device_8606_init(chip, chip->companion, pdata);
+			break;
+		case CHIP_PM8606:
+			device_8607_init(chip, chip->companion, pdata);
+			break;
+		}
+	}
+	return 0;
 }
 
-void pm8607_device_exit(struct pm8607_chip *chip)
+void pm860x_device_exit(struct pm860x_chip *chip)
 {
 	mfd_remove_devices(chip->dev);
 }
 
-MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM8607");
+MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM860x");
 MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c
index dda23cbfe415..6d7dba2bce8a 100644
--- a/drivers/mfd/88pm860x-i2c.c
+++ b/drivers/mfd/88pm860x-i2c.c
@@ -1,5 +1,5 @@
 /*
- * I2C driver for Marvell 88PM8607
+ * I2C driver for Marvell 88PM860x
  *
  * Copyright (C) 2009 Marvell International Ltd.
  * 	Haojian Zhuang <haojian.zhuang@marvell.com>
@@ -12,12 +12,11 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
-#include <linux/mfd/88pm8607.h>
+#include <linux/mfd/88pm860x.h>
 
-static inline int pm8607_read_device(struct pm8607_chip *chip,
+static inline int pm860x_read_device(struct i2c_client *i2c,
 				     int reg, int bytes, void *dest)
 {
-	struct i2c_client *i2c = chip->client;
 	unsigned char data;
 	int ret;
 
@@ -32,10 +31,9 @@ static inline int pm8607_read_device(struct pm8607_chip *chip,
 	return 0;
 }
 
-static inline int pm8607_write_device(struct pm8607_chip *chip,
+static inline int pm860x_write_device(struct i2c_client *i2c,
 				      int reg, int bytes, void *src)
 {
-	struct i2c_client *i2c = chip->client;
 	unsigned char buf[bytes + 1];
 	int ret;
 
@@ -48,13 +46,14 @@ static inline int pm8607_write_device(struct pm8607_chip *chip,
 	return 0;
 }
 
-int pm8607_reg_read(struct pm8607_chip *chip, int reg)
+int pm860x_reg_read(struct i2c_client *i2c, int reg)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char data;
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, 1, &data);
+	ret = pm860x_read_device(i2c, reg, 1, &data);
 	mutex_unlock(&chip->io_lock);
 
 	if (ret < 0)
@@ -62,111 +61,178 @@ int pm8607_reg_read(struct pm8607_chip *chip, int reg)
 	else
 		return (int)data;
 }
-EXPORT_SYMBOL(pm8607_reg_read);
+EXPORT_SYMBOL(pm860x_reg_read);
 
-int pm8607_reg_write(struct pm8607_chip *chip, int reg,
+int pm860x_reg_write(struct i2c_client *i2c, int reg,
 		     unsigned char data)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->write(chip, reg, 1, &data);
+	ret = pm860x_write_device(i2c, reg, 1, &data);
 	mutex_unlock(&chip->io_lock);
 
 	return ret;
 }
-EXPORT_SYMBOL(pm8607_reg_write);
+EXPORT_SYMBOL(pm860x_reg_write);
 
-int pm8607_bulk_read(struct pm8607_chip *chip, int reg,
+int pm860x_bulk_read(struct i2c_client *i2c, int reg,
 		     int count, unsigned char *buf)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, count, buf);
+	ret = pm860x_read_device(i2c, reg, count, buf);
 	mutex_unlock(&chip->io_lock);
 
 	return ret;
 }
-EXPORT_SYMBOL(pm8607_bulk_read);
+EXPORT_SYMBOL(pm860x_bulk_read);
 
-int pm8607_bulk_write(struct pm8607_chip *chip, int reg,
+int pm860x_bulk_write(struct i2c_client *i2c, int reg,
 		      int count, unsigned char *buf)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->write(chip, reg, count, buf);
+	ret = pm860x_write_device(i2c, reg, count, buf);
 	mutex_unlock(&chip->io_lock);
 
 	return ret;
 }
-EXPORT_SYMBOL(pm8607_bulk_write);
+EXPORT_SYMBOL(pm860x_bulk_write);
 
-int pm8607_set_bits(struct pm8607_chip *chip, int reg,
+int pm860x_set_bits(struct i2c_client *i2c, int reg,
 		    unsigned char mask, unsigned char data)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char value;
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, 1, &value);
+	ret = pm860x_read_device(i2c, reg, 1, &value);
 	if (ret < 0)
 		goto out;
 	value &= ~mask;
 	value |= data;
-	ret = chip->write(chip, reg, 1, &value);
+	ret = pm860x_write_device(i2c, reg, 1, &value);
 out:
 	mutex_unlock(&chip->io_lock);
 	return ret;
 }
-EXPORT_SYMBOL(pm8607_set_bits);
+EXPORT_SYMBOL(pm860x_set_bits);
 
 
 static const struct i2c_device_id pm860x_id_table[] = {
-	{ "88PM8607", 0 },
+	{ "88PM860x", 0 },
 	{}
 };
 MODULE_DEVICE_TABLE(i2c, pm860x_id_table);
 
+static int verify_addr(struct i2c_client *i2c)
+{
+	unsigned short addr_8607[] = {0x30, 0x34};
+	unsigned short addr_8606[] = {0x10, 0x11};
+	int size, i;
+
+	if (i2c == NULL)
+		return 0;
+	size = ARRAY_SIZE(addr_8606);
+	for (i = 0; i < size; i++) {
+		if (i2c->addr == *(addr_8606 + i))
+			return CHIP_PM8606;
+	}
+	size = ARRAY_SIZE(addr_8607);
+	for (i = 0; i < size; i++) {
+		if (i2c->addr == *(addr_8607 + i))
+			return CHIP_PM8607;
+	}
+	return 0;
+}
+
 static int __devinit pm860x_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
-	struct pm8607_platform_data *pdata = client->dev.platform_data;
-	struct pm8607_chip *chip;
-	int ret;
-
-	chip = kzalloc(sizeof(struct pm8607_chip), GFP_KERNEL);
-	if (chip == NULL)
-		return -ENOMEM;
-
-	chip->client = client;
-	chip->dev = &client->dev;
-	chip->read = pm8607_read_device;
-	chip->write = pm8607_write_device;
-	memcpy(&chip->id, id, sizeof(struct i2c_device_id));
-	i2c_set_clientdata(client, chip);
-
-	mutex_init(&chip->io_lock);
-	dev_set_drvdata(chip->dev, chip);
-
-	ret = pm860x_device_init(chip, pdata);
-	if (ret < 0)
-		goto out;
-
-
+	struct pm860x_platform_data *pdata = client->dev.platform_data;
+	static struct pm860x_chip *chip;
+	struct i2c_board_info i2c_info = {
+		.type		= "88PM860x",
+		.platform_data	= client->dev.platform_data,
+	};
+	int addr_c, found_companion = 0;
+
+	if (pdata == NULL) {
+		pr_info("No platform data in %s!\n", __func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * Both client and companion client shares same platform driver.
+	 * Driver distinguishes them by pdata->companion_addr.
+	 * pdata->companion_addr is only assigned if companion chip exists.
+	 * At the same time, the companion_addr shouldn't equal to client
+	 * address.
+	 */
+	addr_c = pdata->companion_addr;
+	if (addr_c && (addr_c != client->addr)) {
+		i2c_info.addr = addr_c;
+		found_companion = 1;
+	}
+
+	if (found_companion || (addr_c == 0)) {
+		chip = kzalloc(sizeof(struct pm860x_chip), GFP_KERNEL);
+		if (chip == NULL)
+			return -ENOMEM;
+
+		chip->id = verify_addr(client);
+		chip->companion_addr = addr_c;
+		chip->client = client;
+		i2c_set_clientdata(client, chip);
+		chip->dev = &client->dev;
+		mutex_init(&chip->io_lock);
+		dev_set_drvdata(chip->dev, chip);
+
+		if (found_companion) {
+			/*
+			 * If this driver is built in, probe function is
+			 * recursive.
+			 * If this driver is built as module, the next probe
+			 * function is called after the first one finished.
+			 */
+			chip->companion = i2c_new_device(client->adapter,
+							 &i2c_info);
+		}
+	}
+
+	/*
+	 * If companion chip existes, it's called by companion probe.
+	 * If there's no companion chip, it's called by client probe.
+	 */
+	if ((addr_c == 0) || (addr_c == client->addr)) {
+		chip->companion = client;
+		i2c_set_clientdata(chip->companion, chip);
+		pm860x_device_init(chip, pdata);
+	}
 	return 0;
-
-out:
-	i2c_set_clientdata(client, NULL);
-	kfree(chip);
-	return ret;
 }
 
 static int __devexit pm860x_remove(struct i2c_client *client)
 {
-	struct pm8607_chip *chip = i2c_get_clientdata(client);
-
+	struct pm860x_chip *chip = i2c_get_clientdata(client);
+
+	/*
+	 * If companion existes, companion client is removed first.
+	 * Because companion client is registered last and removed first.
+	 */
+	if (chip->companion_addr == client->addr)
+		return 0;
+	pm860x_device_exit(chip);
+	i2c_unregister_device(chip->companion);
+	i2c_set_clientdata(chip->companion, NULL);
+	i2c_set_clientdata(chip->client, NULL);
 	kfree(chip);
 	return 0;
 }
diff --git a/drivers/regulator/88pm8607.c b/drivers/regulator/88pm8607.c
index 04719551381b..97897a6bf4f3 100644
--- a/drivers/regulator/88pm8607.c
+++ b/drivers/regulator/88pm8607.c
@@ -11,15 +11,17 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/err.h>
+#include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
-#include <linux/mfd/88pm8607.h>
+#include <linux/mfd/88pm860x.h>
 
 struct pm8607_regulator_info {
 	struct regulator_desc	desc;
-	struct pm8607_chip	*chip;
+	struct pm860x_chip	*chip;
 	struct regulator_dev	*regulator;
+	struct i2c_client	*i2c;
 
 	int	min_uV;
 	int	max_uV;
@@ -46,7 +48,7 @@ static inline int check_range(struct pm8607_regulator_info *info,
 static int pm8607_list_voltage(struct regulator_dev *rdev, unsigned index)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	uint8_t chip_id = info->chip->chip_id;
+	uint8_t chip_id = info->chip->chip_version;
 	int ret = -EINVAL;
 
 	switch (info->desc.id) {
@@ -169,7 +171,7 @@ static int pm8607_list_voltage(struct regulator_dev *rdev, unsigned index)
 static int choose_voltage(struct regulator_dev *rdev, int min_uV, int max_uV)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	uint8_t chip_id = info->chip->chip_id;
+	uint8_t chip_id = info->chip->chip_version;
 	int val = -ENOENT;
 	int ret;
 
@@ -428,7 +430,6 @@ static int pm8607_set_voltage(struct regulator_dev *rdev,
 			      int min_uV, int max_uV)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 	uint8_t val, mask;
 	int ret;
 
@@ -443,13 +444,13 @@ static int pm8607_set_voltage(struct regulator_dev *rdev,
 	val = (uint8_t)(ret << info->vol_shift);
 	mask = ((1 << info->vol_nbits) - 1)  << info->vol_shift;
 
-	ret = pm8607_set_bits(chip, info->vol_reg, mask, val);
+	ret = pm860x_set_bits(info->i2c, info->vol_reg, mask, val);
 	if (ret)
 		return ret;
 	switch (info->desc.id) {
 	case PM8607_ID_BUCK1:
 	case PM8607_ID_BUCK3:
-		ret = pm8607_set_bits(chip, info->update_reg,
+		ret = pm860x_set_bits(info->i2c, info->update_reg,
 				      1 << info->update_bit,
 				      1 << info->update_bit);
 		break;
@@ -460,11 +461,10 @@ static int pm8607_set_voltage(struct regulator_dev *rdev,
 static int pm8607_get_voltage(struct regulator_dev *rdev)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 	uint8_t val, mask;
 	int ret;
 
-	ret = pm8607_reg_read(chip, info->vol_reg);
+	ret = pm860x_reg_read(info->i2c, info->vol_reg);
 	if (ret < 0)
 		return ret;
 
@@ -477,9 +477,8 @@ static int pm8607_get_voltage(struct regulator_dev *rdev)
 static int pm8607_enable(struct regulator_dev *rdev)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 
-	return pm8607_set_bits(chip, info->enable_reg,
+	return pm860x_set_bits(info->i2c, info->enable_reg,
 			       1 << info->enable_bit,
 			       1 << info->enable_bit);
 }
@@ -487,19 +486,17 @@ static int pm8607_enable(struct regulator_dev *rdev)
 static int pm8607_disable(struct regulator_dev *rdev)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 
-	return pm8607_set_bits(chip, info->enable_reg,
+	return pm860x_set_bits(info->i2c, info->enable_reg,
 			       1 << info->enable_bit, 0);
 }
 
 static int pm8607_is_enabled(struct regulator_dev *rdev)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 	int ret;
 
-	ret = pm8607_reg_read(chip, info->enable_reg);
+	ret = pm860x_reg_read(info->i2c, info->enable_reg);
 	if (ret < 0)
 		return ret;
 
@@ -589,8 +586,8 @@ static inline struct pm8607_regulator_info *find_regulator_info(int id)
 
 static int __devinit pm8607_regulator_probe(struct platform_device *pdev)
 {
-	struct pm8607_chip *chip = dev_get_drvdata(pdev->dev.parent);
-	struct pm8607_platform_data *pdata = chip->dev->platform_data;
+	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
+	struct pm860x_platform_data *pdata = chip->dev->platform_data;
 	struct pm8607_regulator_info *info = NULL;
 
 	info = find_regulator_info(pdev->id);
@@ -599,6 +596,7 @@ static int __devinit pm8607_regulator_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	info->i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
 	info->chip = chip;
 
 	info->regulator = regulator_register(&info->desc, &pdev->dev,
diff --git a/include/linux/mfd/88pm8607.h b/include/linux/mfd/88pm8607.h
deleted file mode 100644
index 6e4dcdca02a8..000000000000
--- a/include/linux/mfd/88pm8607.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Marvell 88PM8607 Interface
- *
- * Copyright (C) 2009 Marvell International Ltd.
- * 	Haojian Zhuang <haojian.zhuang@marvell.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __LINUX_MFD_88PM8607_H
-#define __LINUX_MFD_88PM8607_H
-
-enum {
-	PM8607_ID_BUCK1 = 0,
-	PM8607_ID_BUCK2,
-	PM8607_ID_BUCK3,
-
-	PM8607_ID_LDO1,
-	PM8607_ID_LDO2,
-	PM8607_ID_LDO3,
-	PM8607_ID_LDO4,
-	PM8607_ID_LDO5,
-	PM8607_ID_LDO6,
-	PM8607_ID_LDO7,
-	PM8607_ID_LDO8,
-	PM8607_ID_LDO9,
-	PM8607_ID_LDO10,
-	PM8607_ID_LDO12,
-	PM8607_ID_LDO14,
-
-	PM8607_ID_RG_MAX,
-};
-
-#define PM8607_ID			(0x40)	/* 8607 chip ID */
-#define PM8607_ID_MASK			(0xF8)	/* 8607 chip ID mask */
-
-/* Interrupt Registers */
-#define PM8607_STATUS_1			(0x01)
-#define PM8607_STATUS_2			(0x02)
-#define PM8607_INT_STATUS1		(0x03)
-#define PM8607_INT_STATUS2		(0x04)
-#define PM8607_INT_STATUS3		(0x05)
-#define PM8607_INT_MASK_1		(0x06)
-#define PM8607_INT_MASK_2		(0x07)
-#define PM8607_INT_MASK_3		(0x08)
-
-/* Regulator Control Registers */
-#define PM8607_LDO1			(0x10)
-#define PM8607_LDO2			(0x11)
-#define PM8607_LDO3			(0x12)
-#define PM8607_LDO4			(0x13)
-#define PM8607_LDO5			(0x14)
-#define PM8607_LDO6			(0x15)
-#define PM8607_LDO7			(0x16)
-#define PM8607_LDO8			(0x17)
-#define PM8607_LDO9			(0x18)
-#define PM8607_LDO10			(0x19)
-#define PM8607_LDO12			(0x1A)
-#define PM8607_LDO14			(0x1B)
-#define PM8607_SLEEP_MODE1		(0x1C)
-#define PM8607_SLEEP_MODE2		(0x1D)
-#define PM8607_SLEEP_MODE3		(0x1E)
-#define PM8607_SLEEP_MODE4		(0x1F)
-#define PM8607_GO			(0x20)
-#define PM8607_SLEEP_BUCK1		(0x21)
-#define PM8607_SLEEP_BUCK2		(0x22)
-#define PM8607_SLEEP_BUCK3		(0x23)
-#define PM8607_BUCK1			(0x24)
-#define PM8607_BUCK2			(0x25)
-#define PM8607_BUCK3			(0x26)
-#define PM8607_BUCK_CONTROLS		(0x27)
-#define PM8607_SUPPLIES_EN11		(0x2B)
-#define PM8607_SUPPLIES_EN12		(0x2C)
-#define PM8607_GROUP1			(0x2D)
-#define PM8607_GROUP2			(0x2E)
-#define PM8607_GROUP3			(0x2F)
-#define PM8607_GROUP4			(0x30)
-#define PM8607_GROUP5			(0x31)
-#define PM8607_GROUP6			(0x32)
-#define PM8607_SUPPLIES_EN21		(0x33)
-#define PM8607_SUPPLIES_EN22		(0x34)
-
-/* RTC Control Registers */
-#define PM8607_RTC1			(0xA0)
-#define PM8607_RTC_COUNTER1		(0xA1)
-#define PM8607_RTC_COUNTER2		(0xA2)
-#define PM8607_RTC_COUNTER3		(0xA3)
-#define PM8607_RTC_COUNTER4		(0xA4)
-#define PM8607_RTC_EXPIRE1		(0xA5)
-#define PM8607_RTC_EXPIRE2		(0xA6)
-#define PM8607_RTC_EXPIRE3		(0xA7)
-#define PM8607_RTC_EXPIRE4		(0xA8)
-#define PM8607_RTC_TRIM1		(0xA9)
-#define PM8607_RTC_TRIM2		(0xAA)
-#define PM8607_RTC_TRIM3		(0xAB)
-#define PM8607_RTC_TRIM4		(0xAC)
-#define PM8607_RTC_MISC1		(0xAD)
-#define PM8607_RTC_MISC2		(0xAE)
-#define PM8607_RTC_MISC3		(0xAF)
-
-/* Misc Registers */
-#define PM8607_CHIP_ID			(0x00)
-#define PM8607_LDO1			(0x10)
-#define PM8607_DVC3			(0x26)
-#define PM8607_MISC1			(0x40)
-
-/* bit definitions for PM8607 events */
-#define PM8607_EVENT_ONKEY		(1 << 0)
-#define PM8607_EVENT_EXTON		(1 << 1)
-#define PM8607_EVENT_CHG		(1 << 2)
-#define PM8607_EVENT_BAT		(1 << 3)
-#define PM8607_EVENT_RTC		(1 << 4)
-#define PM8607_EVENT_CC			(1 << 5)
-#define PM8607_EVENT_VBAT		(1 << 8)
-#define PM8607_EVENT_VCHG		(1 << 9)
-#define PM8607_EVENT_VSYS		(1 << 10)
-#define PM8607_EVENT_TINT		(1 << 11)
-#define PM8607_EVENT_GPADC0		(1 << 12)
-#define PM8607_EVENT_GPADC1		(1 << 13)
-#define PM8607_EVENT_GPADC2		(1 << 14)
-#define PM8607_EVENT_GPADC3		(1 << 15)
-#define PM8607_EVENT_AUDIO_SHORT	(1 << 16)
-#define PM8607_EVENT_PEN		(1 << 17)
-#define PM8607_EVENT_HEADSET		(1 << 18)
-#define PM8607_EVENT_HOOK		(1 << 19)
-#define PM8607_EVENT_MICIN		(1 << 20)
-#define PM8607_EVENT_CHG_TIMEOUT	(1 << 21)
-#define PM8607_EVENT_CHG_DONE		(1 << 22)
-#define PM8607_EVENT_CHG_FAULT		(1 << 23)
-
-/* bit definitions of Status Query Interface */
-#define PM8607_STATUS_CC		(1 << 3)
-#define PM8607_STATUS_PEN		(1 << 4)
-#define PM8607_STATUS_HEADSET		(1 << 5)
-#define PM8607_STATUS_HOOK		(1 << 6)
-#define PM8607_STATUS_MICIN		(1 << 7)
-#define PM8607_STATUS_ONKEY		(1 << 8)
-#define PM8607_STATUS_EXTON		(1 << 9)
-#define PM8607_STATUS_CHG		(1 << 10)
-#define PM8607_STATUS_BAT		(1 << 11)
-#define PM8607_STATUS_VBUS		(1 << 12)
-#define PM8607_STATUS_OV		(1 << 13)
-
-/* bit definitions of BUCK3 */
-#define PM8607_BUCK3_DOUBLE		(1 << 6)
-
-/* bit definitions of Misc1 */
-#define PM8607_MISC1_PI2C		(1 << 0)
-
-/* Interrupt Number in 88PM8607 */
-enum {
-	PM8607_IRQ_ONKEY = 0,
-	PM8607_IRQ_EXTON,
-	PM8607_IRQ_CHG,
-	PM8607_IRQ_BAT,
-	PM8607_IRQ_RTC,
-	PM8607_IRQ_VBAT = 8,
-	PM8607_IRQ_VCHG,
-	PM8607_IRQ_VSYS,
-	PM8607_IRQ_TINT,
-	PM8607_IRQ_GPADC0,
-	PM8607_IRQ_GPADC1,
-	PM8607_IRQ_GPADC2,
-	PM8607_IRQ_GPADC3,
-	PM8607_IRQ_AUDIO_SHORT = 16,
-	PM8607_IRQ_PEN,
-	PM8607_IRQ_HEADSET,
-	PM8607_IRQ_HOOK,
-	PM8607_IRQ_MICIN,
-	PM8607_IRQ_CHG_FAIL,
-	PM8607_IRQ_CHG_DONE,
-	PM8607_IRQ_CHG_FAULT,
-};
-
-enum {
-	PM8607_CHIP_A0 = 0x40,
-	PM8607_CHIP_A1 = 0x41,
-	PM8607_CHIP_B0 = 0x48,
-};
-
-
-struct pm8607_chip {
-	struct device		*dev;
-	struct mutex		io_lock;
-	struct i2c_client	*client;
-	struct i2c_device_id	id;
-
-	int (*read)(struct pm8607_chip *chip, int reg, int bytes, void *dest);
-	int (*write)(struct pm8607_chip *chip, int reg, int bytes, void *src);
-
-	int			buck3_double;	/* DVC ramp slope double */
-	unsigned char		chip_id;
-
-};
-
-#define PM8607_MAX_REGULATOR	15	/* 3 Bucks, 12 LDOs */
-
-enum {
-	GI2C_PORT = 0,
-	PI2C_PORT,
-};
-
-struct pm8607_platform_data {
-	int	i2c_port;	/* Controlled by GI2C or PI2C */
-	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
-};
-
-extern int pm8607_reg_read(struct pm8607_chip *, int);
-extern int pm8607_reg_write(struct pm8607_chip *, int, unsigned char);
-extern int pm8607_bulk_read(struct pm8607_chip *, int, int,
-			    unsigned char *);
-extern int pm8607_bulk_write(struct pm8607_chip *, int, int,
-			     unsigned char *);
-extern int pm8607_set_bits(struct pm8607_chip *, int, unsigned char,
-			   unsigned char);
-
-extern int pm860x_device_init(struct pm8607_chip *chip,
-			      struct pm8607_platform_data *pdata);
-extern void pm860x_device_exit(struct pm8607_chip *chip);
-
-#endif /* __LINUX_MFD_88PM860X_H */
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
new file mode 100644
index 000000000000..5845ae47df30
--- /dev/null
+++ b/include/linux/mfd/88pm860x.h
@@ -0,0 +1,227 @@
+/*
+ * Marvell 88PM860x Interface
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_MFD_88PM860X_H
+#define __LINUX_MFD_88PM860X_H
+
+enum {
+	CHIP_INVALID = 0,
+	CHIP_PM8606,
+	CHIP_PM8607,
+	CHIP_MAX,
+};
+
+enum {
+	PM8607_ID_BUCK1 = 0,
+	PM8607_ID_BUCK2,
+	PM8607_ID_BUCK3,
+
+	PM8607_ID_LDO1,
+	PM8607_ID_LDO2,
+	PM8607_ID_LDO3,
+	PM8607_ID_LDO4,
+	PM8607_ID_LDO5,
+	PM8607_ID_LDO6,
+	PM8607_ID_LDO7,
+	PM8607_ID_LDO8,
+	PM8607_ID_LDO9,
+	PM8607_ID_LDO10,
+	PM8607_ID_LDO12,
+	PM8607_ID_LDO14,
+
+	PM8607_ID_RG_MAX,
+};
+
+#define PM8607_VERSION			(0x40)	/* 8607 chip ID */
+#define PM8607_VERSION_MASK		(0xF0)	/* 8607 chip ID mask */
+
+/* Interrupt Registers */
+#define PM8607_STATUS_1			(0x01)
+#define PM8607_STATUS_2			(0x02)
+#define PM8607_INT_STATUS1		(0x03)
+#define PM8607_INT_STATUS2		(0x04)
+#define PM8607_INT_STATUS3		(0x05)
+#define PM8607_INT_MASK_1		(0x06)
+#define PM8607_INT_MASK_2		(0x07)
+#define PM8607_INT_MASK_3		(0x08)
+
+/* Regulator Control Registers */
+#define PM8607_LDO1			(0x10)
+#define PM8607_LDO2			(0x11)
+#define PM8607_LDO3			(0x12)
+#define PM8607_LDO4			(0x13)
+#define PM8607_LDO5			(0x14)
+#define PM8607_LDO6			(0x15)
+#define PM8607_LDO7			(0x16)
+#define PM8607_LDO8			(0x17)
+#define PM8607_LDO9			(0x18)
+#define PM8607_LDO10			(0x19)
+#define PM8607_LDO12			(0x1A)
+#define PM8607_LDO14			(0x1B)
+#define PM8607_SLEEP_MODE1		(0x1C)
+#define PM8607_SLEEP_MODE2		(0x1D)
+#define PM8607_SLEEP_MODE3		(0x1E)
+#define PM8607_SLEEP_MODE4		(0x1F)
+#define PM8607_GO			(0x20)
+#define PM8607_SLEEP_BUCK1		(0x21)
+#define PM8607_SLEEP_BUCK2		(0x22)
+#define PM8607_SLEEP_BUCK3		(0x23)
+#define PM8607_BUCK1			(0x24)
+#define PM8607_BUCK2			(0x25)
+#define PM8607_BUCK3			(0x26)
+#define PM8607_BUCK_CONTROLS		(0x27)
+#define PM8607_SUPPLIES_EN11		(0x2B)
+#define PM8607_SUPPLIES_EN12		(0x2C)
+#define PM8607_GROUP1			(0x2D)
+#define PM8607_GROUP2			(0x2E)
+#define PM8607_GROUP3			(0x2F)
+#define PM8607_GROUP4			(0x30)
+#define PM8607_GROUP5			(0x31)
+#define PM8607_GROUP6			(0x32)
+#define PM8607_SUPPLIES_EN21		(0x33)
+#define PM8607_SUPPLIES_EN22		(0x34)
+
+/* RTC Control Registers */
+#define PM8607_RTC1			(0xA0)
+#define PM8607_RTC_COUNTER1		(0xA1)
+#define PM8607_RTC_COUNTER2		(0xA2)
+#define PM8607_RTC_COUNTER3		(0xA3)
+#define PM8607_RTC_COUNTER4		(0xA4)
+#define PM8607_RTC_EXPIRE1		(0xA5)
+#define PM8607_RTC_EXPIRE2		(0xA6)
+#define PM8607_RTC_EXPIRE3		(0xA7)
+#define PM8607_RTC_EXPIRE4		(0xA8)
+#define PM8607_RTC_TRIM1		(0xA9)
+#define PM8607_RTC_TRIM2		(0xAA)
+#define PM8607_RTC_TRIM3		(0xAB)
+#define PM8607_RTC_TRIM4		(0xAC)
+#define PM8607_RTC_MISC1		(0xAD)
+#define PM8607_RTC_MISC2		(0xAE)
+#define PM8607_RTC_MISC3		(0xAF)
+
+/* Misc Registers */
+#define PM8607_CHIP_ID			(0x00)
+#define PM8607_LDO1			(0x10)
+#define PM8607_DVC3			(0x26)
+#define PM8607_MISC1			(0x40)
+
+/* bit definitions for PM8607 events */
+#define PM8607_EVENT_ONKEY		(1 << 0)
+#define PM8607_EVENT_EXTON		(1 << 1)
+#define PM8607_EVENT_CHG		(1 << 2)
+#define PM8607_EVENT_BAT		(1 << 3)
+#define PM8607_EVENT_RTC		(1 << 4)
+#define PM8607_EVENT_CC			(1 << 5)
+#define PM8607_EVENT_VBAT		(1 << 8)
+#define PM8607_EVENT_VCHG		(1 << 9)
+#define PM8607_EVENT_VSYS		(1 << 10)
+#define PM8607_EVENT_TINT		(1 << 11)
+#define PM8607_EVENT_GPADC0		(1 << 12)
+#define PM8607_EVENT_GPADC1		(1 << 13)
+#define PM8607_EVENT_GPADC2		(1 << 14)
+#define PM8607_EVENT_GPADC3		(1 << 15)
+#define PM8607_EVENT_AUDIO_SHORT	(1 << 16)
+#define PM8607_EVENT_PEN		(1 << 17)
+#define PM8607_EVENT_HEADSET		(1 << 18)
+#define PM8607_EVENT_HOOK		(1 << 19)
+#define PM8607_EVENT_MICIN		(1 << 20)
+#define PM8607_EVENT_CHG_TIMEOUT	(1 << 21)
+#define PM8607_EVENT_CHG_DONE		(1 << 22)
+#define PM8607_EVENT_CHG_FAULT		(1 << 23)
+
+/* bit definitions of Status Query Interface */
+#define PM8607_STATUS_CC		(1 << 3)
+#define PM8607_STATUS_PEN		(1 << 4)
+#define PM8607_STATUS_HEADSET		(1 << 5)
+#define PM8607_STATUS_HOOK		(1 << 6)
+#define PM8607_STATUS_MICIN		(1 << 7)
+#define PM8607_STATUS_ONKEY		(1 << 8)
+#define PM8607_STATUS_EXTON		(1 << 9)
+#define PM8607_STATUS_CHG		(1 << 10)
+#define PM8607_STATUS_BAT		(1 << 11)
+#define PM8607_STATUS_VBUS		(1 << 12)
+#define PM8607_STATUS_OV		(1 << 13)
+
+/* bit definitions of BUCK3 */
+#define PM8607_BUCK3_DOUBLE		(1 << 6)
+
+/* bit definitions of Misc1 */
+#define PM8607_MISC1_PI2C		(1 << 0)
+
+/* Interrupt Number in 88PM8607 */
+enum {
+	PM8607_IRQ_ONKEY = 0,
+	PM8607_IRQ_EXTON,
+	PM8607_IRQ_CHG,
+	PM8607_IRQ_BAT,
+	PM8607_IRQ_RTC,
+	PM8607_IRQ_VBAT = 8,
+	PM8607_IRQ_VCHG,
+	PM8607_IRQ_VSYS,
+	PM8607_IRQ_TINT,
+	PM8607_IRQ_GPADC0,
+	PM8607_IRQ_GPADC1,
+	PM8607_IRQ_GPADC2,
+	PM8607_IRQ_GPADC3,
+	PM8607_IRQ_AUDIO_SHORT = 16,
+	PM8607_IRQ_PEN,
+	PM8607_IRQ_HEADSET,
+	PM8607_IRQ_HOOK,
+	PM8607_IRQ_MICIN,
+	PM8607_IRQ_CHG_FAIL,
+	PM8607_IRQ_CHG_DONE,
+	PM8607_IRQ_CHG_FAULT,
+};
+
+enum {
+	PM8607_CHIP_A0 = 0x40,
+	PM8607_CHIP_A1 = 0x41,
+	PM8607_CHIP_B0 = 0x48,
+};
+
+struct pm860x_chip {
+	struct device		*dev;
+	struct mutex		io_lock;
+	struct i2c_client	*client;
+	struct i2c_client	*companion;	/* companion chip client */
+
+	int			buck3_double;	/* DVC ramp slope double */
+	unsigned short		companion_addr;
+	int			id;
+	unsigned char		chip_version;
+
+};
+
+#define PM8607_MAX_REGULATOR	15	/* 3 Bucks, 12 LDOs */
+
+enum {
+	GI2C_PORT = 0,
+	PI2C_PORT,
+};
+
+struct pm860x_platform_data {
+	unsigned short	companion_addr;	/* I2C address of companion chip */
+	int		i2c_port;	/* Controlled by GI2C or PI2C */
+	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
+};
+
+extern int pm860x_reg_read(struct i2c_client *, int);
+extern int pm860x_reg_write(struct i2c_client *, int, unsigned char);
+extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *);
+extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *);
+extern int pm860x_set_bits(struct i2c_client *, int, unsigned char,
+			   unsigned char);
+
+extern int pm860x_device_init(struct pm860x_chip *chip,
+			      struct pm860x_platform_data *pdata);
+extern void pm860x_device_exit(struct pm860x_chip *chip);
+
+#endif /* __LINUX_MFD_88PM860X_H */
-- 
cgit v1.2.3


From 5c42e8c4a9c86ea26ed4ecb732a842dea0dfb6b6 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Tue, 15 Dec 2009 16:01:47 -0500
Subject: mfd: Add irq support in 88pm860x

88PM860x is a complex PMIC device. It contains touch, charger, sound, rtc,
backlight, led, and so on.

Host communicates to 88PM860x by I2C bus. Use thread irq to support this
usage case.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-core.c  | 226 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/mfd/88pm860x.h |  54 ++++++-----
 2 files changed, 247 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 72b00304dc3a..9185f0d945f4 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -11,6 +11,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
@@ -67,15 +68,209 @@ static struct mfd_cell pm8607_devs[] = {
 	PM8607_REG_DEVS(ldo14, LDO14),
 };
 
-static void device_8606_init(struct pm860x_chip *chip, struct i2c_client *i2c,
-			     struct pm860x_platform_data *pdata)
+#define CHECK_IRQ(irq)					\
+do {							\
+	if ((irq < 0) || (irq >= PM860X_NUM_IRQ))	\
+		return -EINVAL;				\
+} while (0)
+
+/* IRQs only occur on 88PM8607 */
+int pm860x_mask_irq(struct pm860x_chip *chip, int irq)
+{
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	int offset, data, ret;
+
+	CHECK_IRQ(irq);
+
+	offset = (irq >> 3) + PM8607_INT_MASK_1;
+	data = 1 << (irq % 8);
+	ret = pm860x_set_bits(i2c, offset, data, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm860x_mask_irq);
+
+int pm860x_unmask_irq(struct pm860x_chip *chip, int irq)
+{
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	int offset, data, ret;
+
+	CHECK_IRQ(irq);
+
+	offset = (irq >> 3) + PM8607_INT_MASK_1;
+	data = 1 << (irq % 8);
+	ret = pm860x_set_bits(i2c, offset, data, data);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm860x_unmask_irq);
+
+#define INT_STATUS_NUM		(3)
+
+static irqreturn_t pm8607_irq_thread(int irq, void *data)
+{
+	DECLARE_BITMAP(irq_status, PM860X_NUM_IRQ);
+	struct pm860x_chip *chip = data;
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	unsigned char status_buf[INT_STATUS_NUM << 1];
+	unsigned long value;
+	int i, ret;
+
+	irq_status[0] = 0;
+
+	/* read out status register */
+	ret = pm860x_bulk_read(i2c, PM8607_INT_STATUS1,
+				INT_STATUS_NUM << 1, status_buf);
+	if (ret < 0)
+		goto out;
+	if (chip->irq_mode) {
+		/* 0, clear by read. 1, clear by write */
+		ret = pm860x_bulk_write(i2c, PM8607_INT_STATUS1,
+					INT_STATUS_NUM, status_buf);
+		if (ret < 0)
+			goto out;
+	}
+
+	/* clear masked interrupt status */
+	for (i = 0, value = 0; i < INT_STATUS_NUM; i++) {
+		status_buf[i] &= status_buf[i + INT_STATUS_NUM];
+		irq_status[0] |= status_buf[i] << (i * 8);
+	}
+
+	while (!bitmap_empty(irq_status, PM860X_NUM_IRQ)) {
+		irq = find_first_bit(irq_status, PM860X_NUM_IRQ);
+		clear_bit(irq, irq_status);
+		dev_dbg(chip->dev, "Servicing IRQ #%d\n", irq);
+
+		mutex_lock(&chip->irq_lock);
+		if (chip->irq[irq].handler)
+			chip->irq[irq].handler(irq, chip->irq[irq].data);
+		else {
+			pm860x_mask_irq(chip, irq);
+			dev_err(chip->dev, "Nobody cares IRQ %d. "
+				"Now mask it.\n", irq);
+			for (i = 0; i < (INT_STATUS_NUM << 1); i++) {
+				dev_err(chip->dev, "status[%d]:%x\n", i,
+					status_buf[i]);
+			}
+		}
+		mutex_unlock(&chip->irq_lock);
+	}
+out:
+	return IRQ_HANDLED;
+}
+
+int pm860x_request_irq(struct pm860x_chip *chip, int irq,
+		       irq_handler_t handler, void *data)
 {
+	CHECK_IRQ(irq);
+	if (!handler)
+		return -EINVAL;
+
+	mutex_lock(&chip->irq_lock);
+	chip->irq[irq].handler = handler;
+	chip->irq[irq].data = data;
+	mutex_unlock(&chip->irq_lock);
+
+	return 0;
 }
+EXPORT_SYMBOL(pm860x_request_irq);
 
-static void device_8607_init(struct pm860x_chip *chip, struct i2c_client *i2c,
-			     struct pm860x_platform_data *pdata)
+int pm860x_free_irq(struct pm860x_chip *chip, int irq)
 {
-	int i, count;
+	CHECK_IRQ(irq);
+
+	mutex_lock(&chip->irq_lock);
+	chip->irq[irq].handler = NULL;
+	chip->irq[irq].data = NULL;
+	mutex_unlock(&chip->irq_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(pm860x_free_irq);
+
+static int __devinit device_irq_init(struct pm860x_chip *chip,
+				     struct pm860x_platform_data *pdata)
+{
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	unsigned char status_buf[INT_STATUS_NUM];
+	int data, mask, ret = -EINVAL;
+
+	mutex_init(&chip->irq_lock);
+
+	mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR
+		| PM8607_B0_MISC1_INT_MASK;
+	data = 0;
+	chip->irq_mode = 0;
+	if (pdata && pdata->irq_mode) {
+		/*
+		 * irq_mode defines the way of clearing interrupt. If it's 1,
+		 * clear IRQ by write. Otherwise, clear it by read.
+		 * This control bit is valid from 88PM8607 B0 steping.
+		 */
+		data |= PM8607_B0_MISC1_INT_CLEAR;
+		chip->irq_mode = 1;
+	}
+	ret = pm860x_set_bits(i2c, PM8607_B0_MISC1, mask, data);
+	if (ret < 0)
+		goto out;
+
+	/* mask all IRQs */
+	memset(status_buf, 0, INT_STATUS_NUM);
+	ret = pm860x_bulk_write(i2c, PM8607_INT_MASK_1,
+				INT_STATUS_NUM, status_buf);
+	if (ret < 0)
+		goto out;
+
+	if (chip->irq_mode) {
+		/* clear interrupt status by write */
+		memset(status_buf, 0xFF, INT_STATUS_NUM);
+		ret = pm860x_bulk_write(i2c, PM8607_INT_STATUS1,
+					INT_STATUS_NUM, status_buf);
+	} else {
+		/* clear interrupt status by read */
+		ret = pm860x_bulk_read(i2c, PM8607_INT_STATUS1,
+					INT_STATUS_NUM, status_buf);
+	}
+	if (ret < 0)
+		goto out;
+
+	memset(chip->irq, 0, sizeof(struct pm860x_irq) * PM860X_NUM_IRQ);
+
+	ret = request_threaded_irq(i2c->irq, NULL, pm8607_irq_thread,
+				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+				"88PM8607", chip);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
+		goto out;
+	}
+	chip->chip_irq = i2c->irq;
+	return 0;
+out:
+	return ret;
+}
+
+static void __devexit device_irq_exit(struct pm860x_chip *chip)
+{
+	if (chip->chip_irq >= 0)
+		free_irq(chip->chip_irq, chip);
+}
+
+static void __devinit device_8606_init(struct pm860x_chip *chip,
+				       struct i2c_client *i2c,
+				       struct pm860x_platform_data *pdata)
+{
+}
+
+static void __devinit device_8607_init(struct pm860x_chip *chip,
+				       struct i2c_client *i2c,
+				       struct pm860x_platform_data *pdata)
+{
+	int i, count, data;
 	int ret;
 
 	ret = pm860x_reg_read(i2c, PM8607_CHIP_ID);
@@ -91,7 +286,6 @@ static void device_8607_init(struct pm860x_chip *chip, struct i2c_client *i2c,
 			"Chip ID: %02x\n", ret);
 		goto out;
 	}
-	chip->chip_version = ret;
 
 	ret = pm860x_reg_read(i2c, PM8607_BUCK3);
 	if (ret < 0) {
@@ -101,12 +295,26 @@ static void device_8607_init(struct pm860x_chip *chip, struct i2c_client *i2c,
 	if (ret & PM8607_BUCK3_DOUBLE)
 		chip->buck3_double = 1;
 
-	ret = pm860x_reg_read(i2c, PM8607_MISC1);
+	ret = pm860x_reg_read(i2c, PM8607_B0_MISC1);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
 		goto out;
 	}
 
+	if (pdata && (pdata->i2c_port == PI2C_PORT))
+		data = PM8607_B0_MISC1_PI2C;
+	else
+		data = 0;
+	ret = pm860x_set_bits(i2c, PM8607_B0_MISC1, PM8607_B0_MISC1_PI2C, data);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to access MISC1:%d\n", ret);
+		goto out;
+	}
+
+	ret = device_irq_init(chip, pdata);
+	if (ret < 0)
+		goto out;
+
 	count = ARRAY_SIZE(pm8607_devs);
 	for (i = 0; i < count; i++) {
 		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
@@ -123,6 +331,8 @@ out:
 int pm860x_device_init(struct pm860x_chip *chip,
 		       struct pm860x_platform_data *pdata)
 {
+	chip->chip_irq = -EINVAL;
+
 	switch (chip->id) {
 	case CHIP_PM8606:
 		device_8606_init(chip, chip->client, pdata);
@@ -142,11 +352,13 @@ int pm860x_device_init(struct pm860x_chip *chip,
 			break;
 		}
 	}
+
 	return 0;
 }
 
 void pm860x_device_exit(struct pm860x_chip *chip)
 {
+	device_irq_exit(chip);
 	mfd_remove_devices(chip->dev);
 }
 
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 5845ae47df30..b4d6018ba0d6 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -12,6 +12,8 @@
 #ifndef __LINUX_MFD_88PM860X_H
 #define __LINUX_MFD_88PM860X_H
 
+#include <linux/interrupt.h>
+
 enum {
 	CHIP_INVALID = 0,
 	CHIP_PM8606,
@@ -109,33 +111,10 @@ enum {
 
 /* Misc Registers */
 #define PM8607_CHIP_ID			(0x00)
+#define PM8607_B0_MISC1			(0x0C)
 #define PM8607_LDO1			(0x10)
 #define PM8607_DVC3			(0x26)
-#define PM8607_MISC1			(0x40)
-
-/* bit definitions for PM8607 events */
-#define PM8607_EVENT_ONKEY		(1 << 0)
-#define PM8607_EVENT_EXTON		(1 << 1)
-#define PM8607_EVENT_CHG		(1 << 2)
-#define PM8607_EVENT_BAT		(1 << 3)
-#define PM8607_EVENT_RTC		(1 << 4)
-#define PM8607_EVENT_CC			(1 << 5)
-#define PM8607_EVENT_VBAT		(1 << 8)
-#define PM8607_EVENT_VCHG		(1 << 9)
-#define PM8607_EVENT_VSYS		(1 << 10)
-#define PM8607_EVENT_TINT		(1 << 11)
-#define PM8607_EVENT_GPADC0		(1 << 12)
-#define PM8607_EVENT_GPADC1		(1 << 13)
-#define PM8607_EVENT_GPADC2		(1 << 14)
-#define PM8607_EVENT_GPADC3		(1 << 15)
-#define PM8607_EVENT_AUDIO_SHORT	(1 << 16)
-#define PM8607_EVENT_PEN		(1 << 17)
-#define PM8607_EVENT_HEADSET		(1 << 18)
-#define PM8607_EVENT_HOOK		(1 << 19)
-#define PM8607_EVENT_MICIN		(1 << 20)
-#define PM8607_EVENT_CHG_TIMEOUT	(1 << 21)
-#define PM8607_EVENT_CHG_DONE		(1 << 22)
-#define PM8607_EVENT_CHG_FAULT		(1 << 23)
+#define PM8607_A1_MISC1			(0x40)
 
 /* bit definitions of Status Query Interface */
 #define PM8607_STATUS_CC		(1 << 3)
@@ -154,7 +133,12 @@ enum {
 #define PM8607_BUCK3_DOUBLE		(1 << 6)
 
 /* bit definitions of Misc1 */
-#define PM8607_MISC1_PI2C		(1 << 0)
+#define PM8607_A1_MISC1_PI2C		(1 << 0)
+#define PM8607_B0_MISC1_INV_INT		(1 << 0)
+#define PM8607_B0_MISC1_INT_CLEAR	(1 << 1)
+#define PM8607_B0_MISC1_INT_MASK	(1 << 2)
+#define PM8607_B0_MISC1_PI2C		(1 << 3)
+#define PM8607_B0_MISC1_RESET		(1 << 6)
 
 /* Interrupt Number in 88PM8607 */
 enum {
@@ -187,15 +171,26 @@ enum {
 	PM8607_CHIP_B0 = 0x48,
 };
 
+#define PM860X_NUM_IRQ		24
+
+struct pm860x_irq {
+	irq_handler_t		handler;
+	void			*data;
+};
+
 struct pm860x_chip {
 	struct device		*dev;
 	struct mutex		io_lock;
+	struct mutex		irq_lock;
 	struct i2c_client	*client;
 	struct i2c_client	*companion;	/* companion chip client */
+	struct pm860x_irq	irq[PM860X_NUM_IRQ];
 
 	int			buck3_double;	/* DVC ramp slope double */
 	unsigned short		companion_addr;
 	int			id;
+	int			irq_mode;
+	int			chip_irq;
 	unsigned char		chip_version;
 
 };
@@ -210,6 +205,7 @@ enum {
 struct pm860x_platform_data {
 	unsigned short	companion_addr;	/* I2C address of companion chip */
 	int		i2c_port;	/* Controlled by GI2C or PI2C */
+	int		irq_mode;	/* Clear interrupt by read/write(0/1) */
 	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
 };
 
@@ -220,6 +216,12 @@ extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int pm860x_set_bits(struct i2c_client *, int, unsigned char,
 			   unsigned char);
 
+extern int pm860x_mask_irq(struct pm860x_chip *, int);
+extern int pm860x_unmask_irq(struct pm860x_chip *, int);
+extern int pm860x_request_irq(struct pm860x_chip *, int,
+			      irq_handler_t handler, void *);
+extern int pm860x_free_irq(struct pm860x_chip *, int);
+
 extern int pm860x_device_init(struct pm860x_chip *chip,
 			      struct pm860x_platform_data *pdata);
 extern void pm860x_device_exit(struct pm860x_chip *chip);
-- 
cgit v1.2.3


From a16122bcacf050e7f83015183053cf799713cc37 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Tue, 15 Dec 2009 16:04:36 -0500
Subject: mfd: Append subdev into 88pm860x driver

Append backlight, led & touch subdevs into 88pm860x driver.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-core.c  | 217 ++++++++++++++++++++++++++++++++++++++++---
 include/linux/mfd/88pm860x.h | 151 ++++++++++++++++++++++++++++++
 2 files changed, 356 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 9185f0d945f4..16f0dca707a7 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -17,6 +17,100 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/88pm860x.h>
 
+char pm860x_backlight_name[][MFD_NAME_SIZE] = {
+	"backlight-0",
+	"backlight-1",
+	"backlight-2",
+};
+EXPORT_SYMBOL(pm860x_backlight_name);
+
+char pm860x_led_name[][MFD_NAME_SIZE] = {
+	"led0-red",
+	"led0-green",
+	"led0-blue",
+	"led1-red",
+	"led1-green",
+	"led1-blue",
+};
+EXPORT_SYMBOL(pm860x_led_name);
+
+#define PM8606_BACKLIGHT_RESOURCE(_i, _x)		\
+{							\
+	.name	= pm860x_backlight_name[_i],		\
+	.start	= PM8606_##_x,				\
+	.end	= PM8606_##_x,				\
+	.flags	= IORESOURCE_IO,			\
+}
+
+static struct resource backlight_resources[] = {
+	PM8606_BACKLIGHT_RESOURCE(PM8606_BACKLIGHT1, WLED1A),
+	PM8606_BACKLIGHT_RESOURCE(PM8606_BACKLIGHT2, WLED2A),
+	PM8606_BACKLIGHT_RESOURCE(PM8606_BACKLIGHT3, WLED3A),
+};
+
+#define PM8606_BACKLIGHT_DEVS(_i)			\
+{							\
+	.name		= "88pm860x-backlight",		\
+	.num_resources	= 1,				\
+	.resources	= &backlight_resources[_i],	\
+	.id		= _i,				\
+}
+
+static struct mfd_cell backlight_devs[] = {
+	PM8606_BACKLIGHT_DEVS(PM8606_BACKLIGHT1),
+	PM8606_BACKLIGHT_DEVS(PM8606_BACKLIGHT2),
+	PM8606_BACKLIGHT_DEVS(PM8606_BACKLIGHT3),
+};
+
+#define PM8606_LED_RESOURCE(_i, _x)			\
+{							\
+	.name	= pm860x_led_name[_i],			\
+	.start	= PM8606_##_x,				\
+	.end	= PM8606_##_x,				\
+	.flags	= IORESOURCE_IO,			\
+}
+
+static struct resource led_resources[] = {
+	PM8606_LED_RESOURCE(PM8606_LED1_RED, RGB2B),
+	PM8606_LED_RESOURCE(PM8606_LED1_GREEN, RGB2C),
+	PM8606_LED_RESOURCE(PM8606_LED1_BLUE, RGB2D),
+	PM8606_LED_RESOURCE(PM8606_LED2_RED, RGB1B),
+	PM8606_LED_RESOURCE(PM8606_LED2_GREEN, RGB1C),
+	PM8606_LED_RESOURCE(PM8606_LED2_BLUE, RGB1D),
+};
+
+#define PM8606_LED_DEVS(_i)				\
+{							\
+	.name		= "88pm860x-led",		\
+	.num_resources	= 1,				\
+	.resources	= &led_resources[_i],		\
+	.id		= _i,				\
+}
+
+static struct mfd_cell led_devs[] = {
+	PM8606_LED_DEVS(PM8606_LED1_RED),
+	PM8606_LED_DEVS(PM8606_LED1_GREEN),
+	PM8606_LED_DEVS(PM8606_LED1_BLUE),
+	PM8606_LED_DEVS(PM8606_LED2_RED),
+	PM8606_LED_DEVS(PM8606_LED2_GREEN),
+	PM8606_LED_DEVS(PM8606_LED2_BLUE),
+};
+
+static struct resource touch_resources[] = {
+	{
+		.start	= PM8607_IRQ_PEN,
+		.end	= PM8607_IRQ_PEN,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell touch_devs[] = {
+	{
+		.name		= "88pm860x-touch",
+		.num_resources	= 1,
+		.resources	= &touch_resources[0],
+	},
+};
 
 #define PM8607_REG_RESOURCE(_start, _end)		\
 {							\
@@ -25,7 +119,7 @@
 	.flags	= IORESOURCE_IO,			\
 }
 
-static struct resource pm8607_regulator_resources[] = {
+static struct resource regulator_resources[] = {
 	PM8607_REG_RESOURCE(BUCK1, BUCK1),
 	PM8607_REG_RESOURCE(BUCK2, BUCK2),
 	PM8607_REG_RESOURCE(BUCK3, BUCK3),
@@ -47,10 +141,11 @@ static struct resource pm8607_regulator_resources[] = {
 {									\
 	.name		= "88pm8607-" #_name,				\
 	.num_resources	= 1,						\
-	.resources	= &pm8607_regulator_resources[PM8607_ID_##_id],	\
+	.resources	= &regulator_resources[PM8607_ID_##_id],	\
+	.id		= PM8607_ID_##_id,				\
 }
 
-static struct mfd_cell pm8607_devs[] = {
+static struct mfd_cell regulator_devs[] = {
 	PM8607_REG_DEVS(buck1, BUCK1),
 	PM8607_REG_DEVS(buck2, BUCK2),
 	PM8607_REG_DEVS(buck3, BUCK3),
@@ -192,6 +287,61 @@ int pm860x_free_irq(struct pm860x_chip *chip, int irq)
 }
 EXPORT_SYMBOL(pm860x_free_irq);
 
+static int __devinit device_gpadc_init(struct pm860x_chip *chip,
+				       struct pm860x_platform_data *pdata)
+{
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	int use_gpadc = 0, data, ret;
+
+	/* initialize GPADC without activating it */
+
+	if (pdata && pdata->touch) {
+		/* set GPADC MISC1 register */
+		data = 0;
+		data |= (pdata->touch->gpadc_prebias << 1)
+			& PM8607_GPADC_PREBIAS_MASK;
+		data |= (pdata->touch->slot_cycle << 3)
+			& PM8607_GPADC_SLOT_CYCLE_MASK;
+		data |= (pdata->touch->off_scale << 5)
+			& PM8607_GPADC_OFF_SCALE_MASK;
+		data |= (pdata->touch->sw_cal << 7)
+			& PM8607_GPADC_SW_CAL_MASK;
+		if (data) {
+			ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data);
+			if (ret < 0)
+				goto out;
+		}
+		/* set tsi prebias time */
+		if (pdata->touch->tsi_prebias) {
+			data = pdata->touch->tsi_prebias;
+			ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data);
+			if (ret < 0)
+				goto out;
+		}
+		/* set prebias & prechg time of pen detect */
+		data = 0;
+		data |= pdata->touch->pen_prebias & PM8607_PD_PREBIAS_MASK;
+		data |= (pdata->touch->pen_prechg << 5)
+			& PM8607_PD_PRECHG_MASK;
+		if (data) {
+			ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data);
+			if (ret < 0)
+				goto out;
+		}
+
+		use_gpadc = 1;
+	}
+
+	/* turn on GPADC */
+	if (use_gpadc) {
+		ret = pm860x_set_bits(i2c, PM8607_GPADC_MISC1,
+				      PM8607_GPADC_EN, PM8607_GPADC_EN);
+	}
+out:
+	return ret;
+}
+
 static int __devinit device_irq_init(struct pm860x_chip *chip,
 				     struct pm860x_platform_data *pdata)
 {
@@ -264,14 +414,40 @@ static void __devinit device_8606_init(struct pm860x_chip *chip,
 				       struct i2c_client *i2c,
 				       struct pm860x_platform_data *pdata)
 {
+	int ret;
+
+	if (pdata && pdata->backlight) {
+		ret = mfd_add_devices(chip->dev, 0, &backlight_devs[0],
+				      ARRAY_SIZE(backlight_devs),
+				      &backlight_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add backlight "
+				"subdev\n");
+			goto out_dev;
+		}
+	}
+
+	if (pdata && pdata->led) {
+		ret = mfd_add_devices(chip->dev, 0, &led_devs[0],
+				      ARRAY_SIZE(led_devs),
+				      &led_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add led "
+				"subdev\n");
+			goto out_dev;
+		}
+	}
+	return;
+out_dev:
+	mfd_remove_devices(chip->dev);
+	device_irq_exit(chip);
 }
 
 static void __devinit device_8607_init(struct pm860x_chip *chip,
 				       struct i2c_client *i2c,
 				       struct pm860x_platform_data *pdata)
 {
-	int i, count, data;
-	int ret;
+	int data, ret;
 
 	ret = pm860x_reg_read(i2c, PM8607_CHIP_ID);
 	if (ret < 0) {
@@ -311,19 +487,36 @@ static void __devinit device_8607_init(struct pm860x_chip *chip,
 		goto out;
 	}
 
+	ret = device_gpadc_init(chip, pdata);
+	if (ret < 0)
+		goto out;
+
 	ret = device_irq_init(chip, pdata);
 	if (ret < 0)
 		goto out;
 
-	count = ARRAY_SIZE(pm8607_devs);
-	for (i = 0; i < count; i++) {
-		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
-				      1, NULL, 0);
-		if (ret != 0) {
-			dev_err(chip->dev, "Failed to add subdevs\n");
-			goto out;
+	ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
+			      ARRAY_SIZE(regulator_devs),
+			      &regulator_resources[0], 0);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to add regulator subdev\n");
+		goto out_dev;
+	}
+
+	if (pdata && pdata->touch) {
+		ret = mfd_add_devices(chip->dev, 0, &touch_devs[0],
+				      ARRAY_SIZE(touch_devs),
+				      &touch_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add touch "
+				"subdev\n");
+			goto out_dev;
 		}
 	}
+	return;
+out_dev:
+	mfd_remove_devices(chip->dev);
+	device_irq_exit(chip);
 out:
 	return;
 }
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index b4d6018ba0d6..d7edf11784f5 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -14,6 +14,8 @@
 
 #include <linux/interrupt.h>
 
+#define MFD_NAME_SIZE		(40)
+
 enum {
 	CHIP_INVALID = 0,
 	CHIP_PM8606,
@@ -21,6 +23,99 @@ enum {
 	CHIP_MAX,
 };
 
+enum {
+	PM8606_ID_INVALID,
+	PM8606_ID_BACKLIGHT,
+	PM8606_ID_LED,
+	PM8606_ID_VIBRATOR,
+	PM8606_ID_TOUCH,
+	PM8606_ID_SOUND,
+	PM8606_ID_CHARGER,
+	PM8606_ID_MAX,
+};
+
+enum {
+	PM8606_BACKLIGHT1 = 0,
+	PM8606_BACKLIGHT2,
+	PM8606_BACKLIGHT3,
+};
+
+enum {
+	PM8606_LED1_RED = 0,
+	PM8606_LED1_GREEN,
+	PM8606_LED1_BLUE,
+	PM8606_LED2_RED,
+	PM8606_LED2_GREEN,
+	PM8606_LED2_BLUE,
+	PM8607_LED_VIBRATOR,
+};
+
+
+/* 8606 Registers */
+#define PM8606_DCM_BOOST		(0x00)
+#define PM8606_PWM			(0x01)
+
+/* Backlight Registers */
+#define PM8606_WLED1A			(0x02)
+#define PM8606_WLED1B			(0x03)
+#define PM8606_WLED2A			(0x04)
+#define PM8606_WLED2B			(0x05)
+#define PM8606_WLED3A			(0x06)
+#define PM8606_WLED3B			(0x07)
+
+/* LED Registers */
+#define PM8606_RGB2A			(0x08)
+#define PM8606_RGB2B			(0x09)
+#define PM8606_RGB2C			(0x0A)
+#define PM8606_RGB2D			(0x0B)
+#define PM8606_RGB1A			(0x0C)
+#define PM8606_RGB1B			(0x0D)
+#define PM8606_RGB1C			(0x0E)
+#define PM8606_RGB1D			(0x0F)
+
+#define PM8606_PREREGULATORA		(0x10)
+#define PM8606_PREREGULATORB		(0x11)
+#define PM8606_VIBRATORA		(0x12)
+#define PM8606_VIBRATORB		(0x13)
+#define PM8606_VCHG			(0x14)
+#define PM8606_VSYS			(0x15)
+#define PM8606_MISC			(0x16)
+#define PM8606_CHIP_ID			(0x17)
+#define PM8606_STATUS			(0x18)
+#define PM8606_FLAGS			(0x19)
+#define PM8606_PROTECTA			(0x1A)
+#define PM8606_PROTECTB			(0x1B)
+#define PM8606_PROTECTC			(0x1C)
+
+/* Bit definitions of PM8606 registers */
+#define PM8606_DCM_500MA		(0x0)	/* current limit */
+#define PM8606_DCM_750MA		(0x1)
+#define PM8606_DCM_1000MA		(0x2)
+#define PM8606_DCM_1250MA		(0x3)
+#define PM8606_DCM_250MV		(0x0 << 2)
+#define PM8606_DCM_300MV		(0x1 << 2)
+#define PM8606_DCM_350MV		(0x2 << 2)
+#define PM8606_DCM_400MV		(0x3 << 2)
+
+#define PM8606_PWM_31200HZ		(0x0)
+#define PM8606_PWM_15600HZ		(0x1)
+#define PM8606_PWM_7800HZ		(0x2)
+#define PM8606_PWM_3900HZ		(0x3)
+#define PM8606_PWM_1950HZ		(0x4)
+#define PM8606_PWM_976HZ		(0x5)
+#define PM8606_PWM_488HZ		(0x6)
+#define PM8606_PWM_244HZ		(0x7)
+#define PM8606_PWM_FREQ_MASK		(0x7)
+
+#define PM8606_WLED_ON			(1 << 0)
+#define PM8606_WLED_CURRENT(x)		((x & 0x1F) << 1)
+
+#define PM8606_LED_CURRENT(x)		(((x >> 2) & 0x07) << 5)
+
+#define PM8606_VSYS_EN			(1 << 1)
+
+#define PM8606_MISC_OSC_EN		(1 << 4)
+
 enum {
 	PM8607_ID_BUCK1 = 0,
 	PM8607_ID_BUCK2,
@@ -91,6 +186,21 @@ enum {
 #define PM8607_SUPPLIES_EN21		(0x33)
 #define PM8607_SUPPLIES_EN22		(0x34)
 
+/* Vibrator Control Registers */
+#define PM8607_VIBRATOR_SET		(0x28)
+#define PM8607_VIBRATOR_PWM		(0x29)
+
+/* GPADC Registers */
+#define PM8607_GP_BIAS1			(0x4F)
+#define PM8607_MEAS_EN1			(0x50)
+#define PM8607_MEAS_EN2			(0x51)
+#define PM8607_MEAS_EN3			(0x52)
+#define PM8607_MEAS_OFF_TIME1		(0x53)
+#define PM8607_MEAS_OFF_TIME2		(0x54)
+#define PM8607_TSI_PREBIAS		(0x55)	/* prebias time */
+#define PM8607_PD_PREBIAS		(0x56)	/* prebias time */
+#define PM8607_GPADC_MISC1		(0x57)
+
 /* RTC Control Registers */
 #define PM8607_RTC1			(0xA0)
 #define PM8607_RTC_COUNTER1		(0xA1)
@@ -140,6 +250,16 @@ enum {
 #define PM8607_B0_MISC1_PI2C		(1 << 3)
 #define PM8607_B0_MISC1_RESET		(1 << 6)
 
+/* bits definitions of GPADC */
+#define PM8607_GPADC_EN			(1 << 0)
+#define PM8607_GPADC_PREBIAS_MASK	(3 << 1)
+#define PM8607_GPADC_SLOT_CYCLE_MASK	(3 << 3)	/* slow mode */
+#define PM8607_GPADC_OFF_SCALE_MASK	(3 << 5)	/* GP sleep mode */
+#define PM8607_GPADC_SW_CAL_MASK	(1 << 7)
+
+#define PM8607_PD_PREBIAS_MASK		(0x1F << 0)
+#define PM8607_PD_PRECHG_MASK		(7 << 5)
+
 /* Interrupt Number in 88PM8607 */
 enum {
 	PM8607_IRQ_ONKEY = 0,
@@ -202,13 +322,44 @@ enum {
 	PI2C_PORT,
 };
 
+struct pm860x_backlight_pdata {
+	int		id;
+	int		pwm;
+	int		iset;
+	unsigned long	flags;
+};
+
+struct pm860x_led_pdata {
+	int		id;
+	int		iset;
+	unsigned long	flags;
+};
+
+struct pm860x_touch_pdata {
+	int		gpadc_prebias;
+	int		slot_cycle;
+	int		off_scale;
+	int		sw_cal;
+	int		tsi_prebias;	/* time, slot */
+	int		pen_prebias;	/* time, slot */
+	int		pen_prechg;	/* time, slot */
+	unsigned long	flags;
+};
+
 struct pm860x_platform_data {
+	struct pm860x_backlight_pdata	*backlight;
+	struct pm860x_led_pdata		*led;
+	struct pm860x_touch_pdata	*touch;
+
 	unsigned short	companion_addr;	/* I2C address of companion chip */
 	int		i2c_port;	/* Controlled by GI2C or PI2C */
 	int		irq_mode;	/* Clear interrupt by read/write(0/1) */
 	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
 };
 
+extern char pm860x_backlight_name[][MFD_NAME_SIZE];
+extern char pm860x_led_name[][MFD_NAME_SIZE];
+
 extern int pm860x_reg_read(struct i2c_client *, int);
 extern int pm860x_reg_write(struct i2c_client *, int, unsigned char);
 extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *);
-- 
cgit v1.2.3


From 866a98ae6e1a9768cd25fe1185481569c7e4b4a9 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Tue, 15 Dec 2009 16:06:17 -0500
Subject: input: Enable touch on 88pm860x

Enable touchscreen driver for the 88pm860x multi function core.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/88pm860x-ts.c | 241 ++++++++++++++++++++++++++++++++
 drivers/input/touchscreen/Kconfig       |  12 ++
 drivers/input/touchscreen/Makefile      |   1 +
 include/linux/mfd/88pm860x.h            |   1 +
 4 files changed, 255 insertions(+)
 create mode 100644 drivers/input/touchscreen/88pm860x-ts.c

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
new file mode 100644
index 000000000000..56254d2a1f6e
--- /dev/null
+++ b/drivers/input/touchscreen/88pm860x-ts.c
@@ -0,0 +1,241 @@
+/*
+ * Touchscreen driver for Marvell 88PM860x
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/mfd/88pm860x.h>
+
+#define MEAS_LEN		(8)
+#define ACCURATE_BIT		(12)
+
+/* touch register */
+#define MEAS_EN3		(0x52)
+
+#define MEAS_TSIX_1		(0x8D)
+#define MEAS_TSIX_2		(0x8E)
+#define MEAS_TSIY_1		(0x8F)
+#define MEAS_TSIY_2		(0x90)
+#define MEAS_TSIZ1_1		(0x91)
+#define MEAS_TSIZ1_2		(0x92)
+#define MEAS_TSIZ2_1		(0x93)
+#define MEAS_TSIZ2_2		(0x94)
+
+/* bit definitions of touch */
+#define MEAS_PD_EN		(1 << 3)
+#define MEAS_TSIX_EN		(1 << 4)
+#define MEAS_TSIY_EN		(1 << 5)
+#define MEAS_TSIZ1_EN		(1 << 6)
+#define MEAS_TSIZ2_EN		(1 << 7)
+
+struct pm860x_touch {
+	struct input_dev *idev;
+	struct i2c_client *i2c;
+	struct pm860x_chip *chip;
+	int irq;
+	int res_x;		/* resistor of Xplate */
+};
+
+static irqreturn_t pm860x_touch_handler(int irq, void *data)
+{
+	struct pm860x_touch *touch = data;
+	struct pm860x_chip *chip = touch->chip;
+	unsigned char buf[MEAS_LEN];
+	int x, y, pen_down;
+	int z1, z2, rt = 0;
+	int ret;
+
+	pm860x_mask_irq(chip, irq);
+	ret = pm860x_bulk_read(touch->i2c, MEAS_TSIX_1, MEAS_LEN, buf);
+	if (ret < 0)
+		goto out;
+
+	pen_down = buf[1] & (1 << 6);
+	x = ((buf[0] & 0xFF) << 4) | (buf[1] & 0x0F);
+	y = ((buf[2] & 0xFF) << 4) | (buf[3] & 0x0F);
+	z1 = ((buf[4] & 0xFF) << 4) | (buf[5] & 0x0F);
+	z2 = ((buf[6] & 0xFF) << 4) | (buf[7] & 0x0F);
+
+	if (pen_down) {
+		if ((x != 0) && (z1 != 0) && (touch->res_x != 0)) {
+			rt = z2 / z1 - 1;
+			rt = (rt * touch->res_x * x) >> ACCURATE_BIT;
+			dev_dbg(chip->dev, "z1:%d, z2:%d, rt:%d\n",
+				z1, z2, rt);
+		}
+		input_report_abs(touch->idev, ABS_X, x);
+		input_report_abs(touch->idev, ABS_Y, y);
+		input_report_abs(touch->idev, ABS_PRESSURE, rt);
+		input_report_key(touch->idev, BTN_TOUCH, 1);
+		dev_dbg(chip->dev, "pen down at [%d, %d].\n", x, y);
+	} else {
+		input_report_abs(touch->idev, ABS_PRESSURE, 0);
+		input_report_key(touch->idev, BTN_TOUCH, 0);
+		dev_dbg(chip->dev, "pen release\n");
+	}
+	input_sync(touch->idev);
+	pm860x_unmask_irq(chip, irq);
+
+out:
+	return IRQ_HANDLED;
+}
+
+static int pm860x_touch_open(struct input_dev *dev)
+{
+	struct pm860x_touch *touch = input_get_drvdata(dev);
+	struct pm860x_chip *chip = touch->chip;
+	int data, ret;
+
+	data = MEAS_PD_EN | MEAS_TSIX_EN | MEAS_TSIY_EN
+		| MEAS_TSIZ1_EN | MEAS_TSIZ2_EN;
+	ret = pm860x_set_bits(touch->i2c, MEAS_EN3, data, data);
+	if (ret < 0)
+		goto out;
+	pm860x_unmask_irq(chip, touch->irq);
+	return 0;
+out:
+	return ret;
+}
+
+static void pm860x_touch_close(struct input_dev *dev)
+{
+	struct pm860x_touch *touch = input_get_drvdata(dev);
+	struct pm860x_chip *chip = touch->chip;
+	int data;
+
+	data = MEAS_PD_EN | MEAS_TSIX_EN | MEAS_TSIY_EN
+		| MEAS_TSIZ1_EN | MEAS_TSIZ2_EN;
+	pm860x_set_bits(touch->i2c, MEAS_EN3, data, 0);
+	pm860x_mask_irq(chip, touch->irq);
+}
+
+static int __devinit pm860x_touch_probe(struct platform_device *pdev)
+{
+	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
+	struct pm860x_platform_data *pm860x_pdata =		\
+				pdev->dev.parent->platform_data;
+	struct pm860x_touch_pdata *pdata = NULL;
+	struct pm860x_touch *touch;
+	int irq, ret;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "No IRQ resource!\n");
+		return -EINVAL;
+	}
+
+	if (!pm860x_pdata) {
+		dev_err(&pdev->dev, "platform data is missing\n");
+		return -EINVAL;
+	}
+
+	pdata = pm860x_pdata->touch;
+	if (!pdata) {
+		dev_err(&pdev->dev, "touchscreen data is missing\n");
+		return -EINVAL;
+	}
+
+	touch = kzalloc(sizeof(struct pm860x_touch), GFP_KERNEL);
+	if (touch == NULL)
+		return -ENOMEM;
+	dev_set_drvdata(&pdev->dev, touch);
+
+	touch->idev = input_allocate_device();
+	if (touch->idev == NULL) {
+		dev_err(&pdev->dev, "Failed to allocate input device!\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	touch->idev->name = "88pm860x-touch";
+	touch->idev->phys = "88pm860x/input0";
+	touch->idev->id.bustype = BUS_I2C;
+	touch->idev->dev.parent = &pdev->dev;
+	touch->idev->open = pm860x_touch_open;
+	touch->idev->close = pm860x_touch_close;
+	touch->chip = chip;
+	touch->i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
+	touch->irq = irq;
+	touch->res_x = pdata->res_x;
+	input_set_drvdata(touch->idev, touch);
+
+	ret = pm860x_request_irq(chip, irq, pm860x_touch_handler, touch);
+	if (ret < 0)
+		goto out_irq;
+
+	__set_bit(EV_ABS, touch->idev->evbit);
+	__set_bit(ABS_X, touch->idev->absbit);
+	__set_bit(ABS_Y, touch->idev->absbit);
+	__set_bit(ABS_PRESSURE, touch->idev->absbit);
+	__set_bit(EV_SYN, touch->idev->evbit);
+	__set_bit(EV_KEY, touch->idev->evbit);
+	__set_bit(BTN_TOUCH, touch->idev->keybit);
+
+	input_set_abs_params(touch->idev, ABS_X, 0, 1 << ACCURATE_BIT, 0, 0);
+	input_set_abs_params(touch->idev, ABS_Y, 0, 1 << ACCURATE_BIT, 0, 0);
+	input_set_abs_params(touch->idev, ABS_PRESSURE, 0, 1 << ACCURATE_BIT,
+				0, 0);
+
+	ret = input_register_device(touch->idev);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to register touch!\n");
+		goto out_rg;
+	}
+
+	platform_set_drvdata(pdev, touch);
+	return 0;
+out_rg:
+	pm860x_free_irq(chip, irq);
+out_irq:
+	input_free_device(touch->idev);
+out:
+	kfree(touch);
+	return ret;
+}
+
+static int __devexit pm860x_touch_remove(struct platform_device *pdev)
+{
+	struct pm860x_touch *touch = platform_get_drvdata(pdev);
+
+	input_unregister_device(touch->idev);
+	pm860x_free_irq(touch->chip, touch->irq);
+	platform_set_drvdata(pdev, NULL);
+	kfree(touch);
+	return 0;
+}
+
+static struct platform_driver pm860x_touch_driver = {
+	.driver	= {
+		.name	= "88pm860x-touch",
+		.owner	= THIS_MODULE,
+	},
+	.probe	= pm860x_touch_probe,
+	.remove	= __devexit_p(pm860x_touch_remove),
+};
+
+static int __init pm860x_touch_init(void)
+{
+	return platform_driver_register(&pm860x_touch_driver);
+}
+module_init(pm860x_touch_init);
+
+static void __exit pm860x_touch_exit(void)
+{
+	platform_driver_unregister(&pm860x_touch_driver);
+}
+module_exit(pm860x_touch_exit);
+
+MODULE_DESCRIPTION("Touchscreen driver for Marvell Semiconductor 88PM860x");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:88pm860x-touch");
+
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 6457e060ae49..7208654a94ae 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -11,6 +11,18 @@ menuconfig INPUT_TOUCHSCREEN
 
 if INPUT_TOUCHSCREEN
 
+config TOUCHSCREEN_88PM860X
+	tristate "Marvell 88PM860x touchscreen"
+	depends on MFD_88PM860X
+	help
+	  Say Y here if you have a 88PM860x PMIC and want to enable
+	  support for the built-in touchscreen.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called 88pm860x-ts.
+
 config TOUCHSCREEN_ADS7846
 	tristate "ADS7846/TSC2046 and ADS7843 based touchscreens"
 	depends on SPI_MASTER
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index d61a3b4def9a..7fef7d5cca23 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -6,6 +6,7 @@
 
 wm97xx-ts-y := wm97xx-core.o
 
+obj-$(CONFIG_TOUCHSCREEN_88PM860X)	+= 88pm860x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_AD7877)	+= ad7877.o
 obj-$(CONFIG_TOUCHSCREEN_AD7879)	+= ad7879.o
 obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index d7edf11784f5..80bc82a7ac96 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -343,6 +343,7 @@ struct pm860x_touch_pdata {
 	int		tsi_prebias;	/* time, slot */
 	int		pen_prebias;	/* time, slot */
 	int		pen_prechg;	/* time, slot */
+	int		res_x;		/* resistor of Xplate */
 	unsigned long	flags;
 };
 
-- 
cgit v1.2.3


From d50f8f339f6901fccc9d4292b65ce8b69d7413d4 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 8 Jan 2010 12:29:23 +0100
Subject: mfd: Initial max8925 support

Basic Max8925 support, which is a power management IC from Maxim
Semiconductor.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   9 ++
 drivers/mfd/Makefile        |   2 +
 drivers/mfd/max8925-core.c  | 262 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/max8925-i2c.c   | 210 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/max8925.h | 119 ++++++++++++++++++++
 5 files changed, 602 insertions(+)
 create mode 100644 drivers/mfd/max8925-core.c
 create mode 100644 drivers/mfd/max8925-i2c.c
 create mode 100644 include/linux/mfd/max8925.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 815907eb70a4..ee416eefb8e9 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -194,6 +194,15 @@ config PMIC_ADP5520
 	  individual components like LCD backlight, LEDs, GPIOs and Kepad
 	  under the corresponding menus.
 
+config MFD_MAX8925
+	tristate "Maxim Semiconductor MAX8925 PMIC Support"
+	depends on I2C
+	help
+	  Say yes here to support for Maxim Semiconductor MAX8925. This is
+	  a Power Management IC. This driver provies common support for
+	  accessing the device, additional drivers must be enabled in order
+	  to use the functionality of the device.
+
 config MFD_WM8400
 	tristate "Support Wolfson Microelectronics WM8400"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 1e3ae062c1f6..261635700243 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -49,6 +49,8 @@ endif
 obj-$(CONFIG_UCB1400_CORE)	+= ucb1400_core.o
 
 obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
+max8925-objs			:= max8925-core.o max8925-i2c.o
+obj-$(CONFIG_MFD_MAX8925)	+= max8925.o
 
 obj-$(CONFIG_MFD_PCF50633)	+= pcf50633-core.o
 obj-$(CONFIG_PCF50633_ADC)	+= pcf50633-adc.o
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
new file mode 100644
index 000000000000..3e26267960b1
--- /dev/null
+++ b/drivers/mfd/max8925-core.c
@@ -0,0 +1,262 @@
+/*
+ * Base driver for Maxim MAX8925
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ *	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/max8925.h>
+
+#define IRQ_MODE_STATUS		0
+#define IRQ_MODE_MASK		1
+
+static int __get_irq_offset(struct max8925_chip *chip, int irq, int mode,
+			    int *offset, int *bit)
+{
+	if (!offset || !bit)
+		return -EINVAL;
+
+	switch (chip->chip_id) {
+	case MAX8925_GPM:
+		*bit = irq % BITS_PER_BYTE;
+		if (irq < (BITS_PER_BYTE << 1)) {	/* irq = [0,15] */
+			*offset = (mode) ? MAX8925_CHG_IRQ1_MASK
+				: MAX8925_CHG_IRQ1;
+			if (irq >= BITS_PER_BYTE)
+				(*offset)++;
+		} else {				/* irq = [16,31] */
+			*offset = (mode) ? MAX8925_ON_OFF_IRQ1_MASK
+				: MAX8925_ON_OFF_IRQ1;
+			if (irq >= (BITS_PER_BYTE * 3))
+				(*offset)++;
+		}
+		break;
+	case MAX8925_ADC:
+		*bit = irq % BITS_PER_BYTE;
+		*offset = (mode) ? MAX8925_TSC_IRQ_MASK : MAX8925_TSC_IRQ;
+		break;
+	default:
+		goto out;
+	}
+	return 0;
+out:
+	dev_err(chip->dev, "Wrong irq #%d is assigned\n", irq);
+	return -EINVAL;
+}
+
+static int __check_irq(int irq)
+{
+	if ((irq < 0) || (irq >= MAX8925_NUM_IRQ))
+		return -EINVAL;
+	return 0;
+}
+
+int max8925_mask_irq(struct max8925_chip *chip, int irq)
+{
+	int offset, bit, ret;
+
+	ret = __get_irq_offset(chip, irq, IRQ_MODE_MASK, &offset, &bit);
+	if (ret < 0)
+		return ret;
+	ret = max8925_set_bits(chip->i2c, offset, 1 << bit, 1 << bit);
+	return ret;
+}
+
+int max8925_unmask_irq(struct max8925_chip *chip, int irq)
+{
+	int offset, bit, ret;
+
+	ret = __get_irq_offset(chip, irq, IRQ_MODE_MASK, &offset, &bit);
+	if (ret < 0)
+		return ret;
+	ret = max8925_set_bits(chip->i2c, offset, 1 << bit, 0);
+	return ret;
+}
+
+#define INT_STATUS_NUM		(MAX8925_NUM_IRQ / BITS_PER_BYTE)
+
+static irqreturn_t max8925_irq_thread(int irq, void *data)
+{
+	struct max8925_chip *chip = data;
+	unsigned long irq_status[INT_STATUS_NUM];
+	unsigned char status_buf[INT_STATUS_NUM << 1];
+	int i, ret;
+
+	memset(irq_status, 0, sizeof(unsigned long) * INT_STATUS_NUM);
+
+	/* all these interrupt status registers are read-only */
+	switch (chip->chip_id) {
+	case MAX8925_GPM:
+		ret = max8925_bulk_read(chip->i2c, MAX8925_CHG_IRQ1,
+					4, status_buf);
+		if (ret < 0)
+			goto out;
+		ret = max8925_bulk_read(chip->i2c, MAX8925_ON_OFF_IRQ1,
+					2, &status_buf[4]);
+		if (ret < 0)
+			goto out;
+		ret = max8925_bulk_read(chip->i2c, MAX8925_ON_OFF_IRQ2,
+					2, &status_buf[6]);
+		if (ret < 0)
+			goto out;
+		/* clear masked interrupt status */
+		status_buf[0] &= (~status_buf[2] & CHG_IRQ1_MASK);
+		irq_status[0] |= status_buf[0];
+		status_buf[1] &= (~status_buf[3] & CHG_IRQ2_MASK);
+		irq_status[0] |= (status_buf[1] << BITS_PER_BYTE);
+		status_buf[4] &= (~status_buf[5] & ON_OFF_IRQ1_MASK);
+		irq_status[0] |= (status_buf[4] << (BITS_PER_BYTE * 2));
+		status_buf[6] &= (~status_buf[7] & ON_OFF_IRQ2_MASK);
+		irq_status[0] |= (status_buf[6] << (BITS_PER_BYTE * 3));
+		break;
+	case MAX8925_ADC:
+		ret = max8925_bulk_read(chip->i2c, MAX8925_TSC_IRQ,
+					2, status_buf);
+		if (ret < 0)
+			goto out;
+		/* clear masked interrupt status */
+		status_buf[0] &= (~status_buf[1] & TSC_IRQ_MASK);
+		irq_status[0] |= status_buf[0];
+		break;
+	default:
+		goto out;
+	}
+
+	for_each_bit(i, &irq_status[0], MAX8925_NUM_IRQ) {
+		clear_bit(i, irq_status);
+		dev_dbg(chip->dev, "Servicing IRQ #%d in %s\n", i, chip->name);
+
+		mutex_lock(&chip->irq_lock);
+		if (chip->irq[i].handler)
+			chip->irq[i].handler(i, chip->irq[i].data);
+		else {
+			max8925_mask_irq(chip, i);
+			dev_err(chip->dev, "Noboday cares IRQ #%d in %s. "
+				"Now mask it.\n", i, chip->name);
+		}
+		mutex_unlock(&chip->irq_lock);
+	}
+out:
+	return IRQ_HANDLED;
+}
+
+int max8925_request_irq(struct max8925_chip *chip, int irq,
+			irq_handler_t handler, void *data)
+{
+	if ((__check_irq(irq) < 0) || !handler)
+		return -EINVAL;
+
+	mutex_lock(&chip->irq_lock);
+	chip->irq[irq].handler = handler;
+	chip->irq[irq].data = data;
+	mutex_unlock(&chip->irq_lock);
+	return 0;
+}
+EXPORT_SYMBOL(max8925_request_irq);
+
+int max8925_free_irq(struct max8925_chip *chip, int irq)
+{
+	if (__check_irq(irq) < 0)
+		return -EINVAL;
+
+	mutex_lock(&chip->irq_lock);
+	chip->irq[irq].handler = NULL;
+	chip->irq[irq].data = NULL;
+	mutex_unlock(&chip->irq_lock);
+	return 0;
+}
+EXPORT_SYMBOL(max8925_free_irq);
+
+static int __devinit device_gpm_init(struct max8925_chip *chip,
+				      struct i2c_client *i2c,
+				      struct max8925_platform_data *pdata)
+{
+	int ret;
+
+	/* mask all IRQs */
+	ret = max8925_set_bits(i2c, MAX8925_CHG_IRQ1_MASK, 0x7, 0x7);
+	if (ret < 0)
+		goto out;
+	ret = max8925_set_bits(i2c, MAX8925_CHG_IRQ2_MASK, 0xff, 0xff);
+	if (ret < 0)
+		goto out;
+	ret = max8925_set_bits(i2c, MAX8925_ON_OFF_IRQ1_MASK, 0xff, 0xff);
+	if (ret < 0)
+		goto out;
+	ret = max8925_set_bits(i2c, MAX8925_ON_OFF_IRQ2_MASK, 0x3, 0x3);
+	if (ret < 0)
+		goto out;
+
+	chip->name = "GPM";
+	memset(chip->irq, 0, sizeof(struct max8925_irq) * MAX8925_NUM_IRQ);
+	ret = request_threaded_irq(i2c->irq, NULL, max8925_irq_thread,
+				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+				"max8925-gpm", chip);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
+		goto out;
+	}
+	chip->chip_irq = i2c->irq;
+
+	/* enable hard-reset for ONKEY power-off */
+	max8925_set_bits(i2c, MAX8925_SYSENSEL, 0x80, 0x80);
+out:
+	return ret;
+}
+
+static int __devinit device_adc_init(struct max8925_chip *chip,
+				     struct i2c_client *i2c,
+				     struct max8925_platform_data *pdata)
+{
+	int ret;
+
+	/* mask all IRQs */
+	ret = max8925_set_bits(i2c, MAX8925_TSC_IRQ_MASK, 3, 3);
+
+	chip->name = "ADC";
+	memset(chip->irq, 0, sizeof(struct max8925_irq) * MAX8925_NUM_IRQ);
+	ret = request_threaded_irq(i2c->irq, NULL, max8925_irq_thread,
+				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+				"max8925-adc", chip);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
+		goto out;
+	}
+	chip->chip_irq = i2c->irq;
+out:
+	return ret;
+}
+
+int __devinit max8925_device_init(struct max8925_chip *chip,
+				  struct max8925_platform_data *pdata)
+{
+	switch (chip->chip_id) {
+	case MAX8925_GPM:
+		device_gpm_init(chip, chip->i2c, pdata);
+		break;
+	case MAX8925_ADC:
+		device_adc_init(chip, chip->i2c, pdata);
+		break;
+	}
+	return 0;
+}
+
+void max8925_device_exit(struct max8925_chip *chip)
+{
+	if (chip->chip_irq >= 0)
+		free_irq(chip->chip_irq, chip);
+}
+
+MODULE_DESCRIPTION("PMIC Driver for Maxim MAX8925");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c
new file mode 100644
index 000000000000..942068e730f9
--- /dev/null
+++ b/drivers/mfd/max8925-i2c.c
@@ -0,0 +1,210 @@
+/*
+ * I2C driver for Maxim MAX8925
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ *	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/mfd/max8925.h>
+
+static inline int max8925_read_device(struct i2c_client *i2c,
+				      int reg, int bytes, void *dest)
+{
+	unsigned char data;
+	unsigned char *buf;
+	int ret;
+
+	buf = kzalloc(bytes + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	data = (unsigned char)reg;
+	ret = i2c_master_send(i2c, &data, 1);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_master_recv(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	memcpy(dest, buf, bytes);
+	return 0;
+}
+
+static inline int max8925_write_device(struct i2c_client *i2c,
+				       int reg, int bytes, void *src)
+{
+	unsigned char buf[bytes + 1];
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+
+	ret = i2c_master_send(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int max8925_reg_read(struct i2c_client *i2c, int reg)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	unsigned char data;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_read_device(i2c, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return (int)data;
+}
+EXPORT_SYMBOL(max8925_reg_read);
+
+int max8925_reg_write(struct i2c_client *i2c, int reg,
+		unsigned char data)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_write_device(i2c, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(max8925_reg_write);
+
+int max8925_bulk_read(struct i2c_client *i2c, int reg,
+		int count, unsigned char *buf)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_read_device(i2c, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(max8925_bulk_read);
+
+int max8925_bulk_write(struct i2c_client *i2c, int reg,
+		int count, unsigned char *buf)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_write_device(i2c, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(max8925_bulk_write);
+
+int max8925_set_bits(struct i2c_client *i2c, int reg,
+		unsigned char mask, unsigned char data)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	unsigned char value;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_read_device(i2c, reg, 1, &value);
+	if (ret < 0)
+		goto out;
+	value &= ~mask;
+	value |= data;
+	ret = max8925_write_device(i2c, reg, 1, &value);
+out:
+	mutex_unlock(&chip->io_lock);
+	return ret;
+}
+EXPORT_SYMBOL(max8925_set_bits);
+
+
+static const struct i2c_device_id max8925_id_table[] = {
+	{ "max8925", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, max8925_id_table);
+
+static int __devinit max8925_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct max8925_platform_data *pdata = client->dev.platform_data;
+	struct max8925_chip *chip;
+
+	if (!pdata) {
+		pr_info("%s: platform data is missing\n", __func__);
+		return -EINVAL;
+	}
+	if ((pdata->chip_id <= MAX8925_INVALID)
+		|| (pdata->chip_id >= MAX8925_MAX)) {
+		pr_info("#%s: wrong chip identification\n", __func__);
+		return -EINVAL;
+	}
+
+	chip = kzalloc(sizeof(struct max8925_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+	chip->i2c = client;
+	chip->chip_id = pdata->chip_id;
+	i2c_set_clientdata(client, chip);
+	chip->dev = &client->dev;
+	mutex_init(&chip->io_lock);
+	dev_set_drvdata(chip->dev, chip);
+	max8925_device_init(chip, pdata);
+
+	return 0;
+}
+
+static int __devexit max8925_remove(struct i2c_client *client)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(client);
+
+	max8925_device_exit(chip);
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver max8925_driver = {
+	.driver	= {
+		.name	= "max8925",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= max8925_probe,
+	.remove		= __devexit_p(max8925_remove),
+	.id_table	= max8925_id_table,
+};
+
+static int __init max8925_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&max8925_driver);
+	if (ret != 0)
+		pr_err("Failed to register MAX8925 I2C driver: %d\n", ret);
+	return ret;
+}
+subsys_initcall(max8925_i2c_init);
+
+static void __exit max8925_i2c_exit(void)
+{
+	i2c_del_driver(&max8925_driver);
+}
+module_exit(max8925_i2c_exit);
+
+MODULE_DESCRIPTION("I2C Driver for Maxim 8925");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
new file mode 100644
index 000000000000..2326246ddef2
--- /dev/null
+++ b/include/linux/mfd/max8925.h
@@ -0,0 +1,119 @@
+/*
+ * Maxim8925 Interface
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ *	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_MFD_MAX8925_H
+#define __LINUX_MFD_MAX8925_H
+
+#include <linux/interrupt.h>
+
+/* Charger registers */
+#define MAX8925_CHG_IRQ1		(0x7e)
+#define MAX8925_CHG_IRQ2		(0x7f)
+#define MAX8925_CHG_IRQ1_MASK		(0x80)
+#define MAX8925_CHG_IRQ2_MASK		(0x81)
+
+/* GPM registers */
+#define MAX8925_SYSENSEL		(0x00)
+#define MAX8925_ON_OFF_IRQ1		(0x01)
+#define MAX8925_ON_OFF_IRQ1_MASK	(0x02)
+#define MAX8925_ON_OFF_STAT		(0x03)
+#define MAX8925_ON_OFF_IRQ2		(0x0d)
+#define MAX8925_ON_OFF_IRQ2_MASK	(0x0e)
+#define MAX8925_RESET_CNFG		(0x0f)
+
+/* Touch registers */
+#define MAX8925_TSC_IRQ			(0x00)
+#define MAX8925_TSC_IRQ_MASK		(0x01)
+
+/* RTC registers */
+#define MAX8925_RTC_STATUS		(0x1a)
+#define MAX8925_RTC_IRQ			(0x1c)
+#define MAX8925_RTC_IRQ_MASK		(0x1d)
+
+/* bit definitions */
+#define CHG_IRQ1_MASK			(0x07)
+#define CHG_IRQ2_MASK			(0xff)
+#define ON_OFF_IRQ1_MASK		(0xff)
+#define ON_OFF_IRQ2_MASK		(0x03)
+#define TSC_IRQ_MASK			(0x03)
+#define RTC_IRQ_MASK			(0x0c)
+
+#define MAX8925_NUM_IRQ			(32)
+
+#define MAX8925_NAME_SIZE		(32)
+
+enum {
+	MAX8925_INVALID = 0,
+	MAX8925_RTC,
+	MAX8925_ADC,
+	MAX8925_GPM,	/* general power management */
+	MAX8925_MAX,
+};
+
+#define MAX8925_IRQ_VCHG_OVP		(0)
+#define MAX8925_IRQ_VCHG_F		(1)
+#define MAX8925_IRQ_VCHG_R		(2)
+#define MAX8925_IRQ_VCHG_THM_OK_R	(8)
+#define MAX8925_IRQ_VCHG_THM_OK_F	(9)
+#define MAX8925_IRQ_VCHG_BATTLOW_F	(10)
+#define MAX8925_IRQ_VCHG_BATTLOW_R	(11)
+#define MAX8925_IRQ_VCHG_RST		(12)
+#define MAX8925_IRQ_VCHG_DONE		(13)
+#define MAX8925_IRQ_VCHG_TOPOFF		(14)
+#define MAX8925_IRQ_VCHG_TMR_FAULT	(15)
+#define MAX8925_IRQ_GPM_RSTIN		(16)
+#define MAX8925_IRQ_GPM_MPL		(17)
+#define MAX8925_IRQ_GPM_SW_3SEC		(18)
+#define MAX8925_IRQ_GPM_EXTON_F		(19)
+#define MAX8925_IRQ_GPM_EXTON_R		(20)
+#define MAX8925_IRQ_GPM_SW_1SEC		(21)
+#define MAX8925_IRQ_GPM_SW_F		(22)
+#define MAX8925_IRQ_GPM_SW_R		(23)
+#define MAX8925_IRQ_GPM_SYSCKEN_F	(24)
+#define MAX8925_IRQ_GPM_SYSCKEN_R	(25)
+
+#define MAX8925_IRQ_TSC_STICK		(0)
+#define MAX8925_IRQ_TSC_NSTICK		(1)
+
+struct max8925_irq {
+	irq_handler_t		handler;
+	void			*data;
+};
+
+struct max8925_chip {
+	struct device		*dev;
+	struct mutex		io_lock;
+	struct mutex		irq_lock;
+	struct i2c_client	*i2c;
+	struct max8925_irq	irq[MAX8925_NUM_IRQ];
+
+	const char		*name;
+	int			chip_id;
+	int			chip_irq;
+};
+
+struct max8925_platform_data {
+	int	chip_id;
+	int	chip_irq;
+};
+
+extern int max8925_reg_read(struct i2c_client *, int);
+extern int max8925_reg_write(struct i2c_client *, int, unsigned char);
+extern int max8925_bulk_read(struct i2c_client *, int, int, unsigned char *);
+extern int max8925_bulk_write(struct i2c_client *, int, int, unsigned char *);
+extern int max8925_set_bits(struct i2c_client *, int, unsigned char,
+			unsigned char);
+
+extern int max8925_device_init(struct max8925_chip *,
+				struct max8925_platform_data *);
+extern void max8925_device_exit(struct max8925_chip *);
+#endif /* __LINUX_MFD_MAX8925_H */
+
-- 
cgit v1.2.3


From 1ad998934e9c6cbae91662a05e0cb8772b1f4f75 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 8 Jan 2010 12:43:29 -0500
Subject: mfd: Add subdevs in max8925

Add subdevs in MAX8925. MAX8925 includes regulator, backlight and touch
components.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   1 +
 drivers/mfd/max8925-core.c  | 142 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/max8925.h |  96 ++++++++++++++++++++++++++++++
 3 files changed, 239 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ee416eefb8e9..d63ab2eec661 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -197,6 +197,7 @@ config PMIC_ADP5520
 config MFD_MAX8925
 	tristate "Maxim Semiconductor MAX8925 PMIC Support"
 	depends on I2C
+	select MFD_CORE
 	help
 	  Say yes here to support for Maxim Semiconductor MAX8925. This is
 	  a Power Management IC. This driver provies common support for
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 3e26267960b1..f36c494b80f1 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -20,6 +20,109 @@
 #define IRQ_MODE_STATUS		0
 #define IRQ_MODE_MASK		1
 
+static struct resource backlight_resources[] = {
+	{
+		.name	= "max8925-backlight",
+		.start	= MAX8925_WLED_MODE_CNTL,
+		.end	= MAX8925_WLED_CNTL,
+		.flags	= IORESOURCE_IO,
+	},
+};
+
+static struct mfd_cell backlight_devs[] = {
+	{
+		.name		= "max8925-backlight",
+		.num_resources	= 1,
+		.resources	= &backlight_resources[0],
+		.id		= -1,
+	},
+};
+
+static struct resource touch_resources[] = {
+	{
+		.name	= "max8925-tsc",
+		.start	= MAX8925_TSC_IRQ,
+		.end	= MAX8925_ADC_RES_END,
+		.flags	= IORESOURCE_IO,
+	},
+};
+
+static struct mfd_cell touch_devs[] = {
+	{
+		.name		= "max8925-touch",
+		.num_resources	= 1,
+		.resources	= &touch_resources[0],
+		.id		= -1,
+	},
+};
+
+#define MAX8925_REG_RESOURCE(_start, _end)	\
+{						\
+	.start	= MAX8925_##_start,		\
+	.end	= MAX8925_##_end,		\
+	.flags	= IORESOURCE_IO,		\
+}
+
+static struct resource regulator_resources[] = {
+	MAX8925_REG_RESOURCE(SDCTL1, SDCTL1),
+	MAX8925_REG_RESOURCE(SDCTL2, SDCTL2),
+	MAX8925_REG_RESOURCE(SDCTL3, SDCTL3),
+	MAX8925_REG_RESOURCE(LDOCTL1, LDOCTL1),
+	MAX8925_REG_RESOURCE(LDOCTL2, LDOCTL2),
+	MAX8925_REG_RESOURCE(LDOCTL3, LDOCTL3),
+	MAX8925_REG_RESOURCE(LDOCTL4, LDOCTL4),
+	MAX8925_REG_RESOURCE(LDOCTL5, LDOCTL5),
+	MAX8925_REG_RESOURCE(LDOCTL6, LDOCTL6),
+	MAX8925_REG_RESOURCE(LDOCTL7, LDOCTL7),
+	MAX8925_REG_RESOURCE(LDOCTL8, LDOCTL8),
+	MAX8925_REG_RESOURCE(LDOCTL9, LDOCTL9),
+	MAX8925_REG_RESOURCE(LDOCTL10, LDOCTL10),
+	MAX8925_REG_RESOURCE(LDOCTL11, LDOCTL11),
+	MAX8925_REG_RESOURCE(LDOCTL12, LDOCTL12),
+	MAX8925_REG_RESOURCE(LDOCTL13, LDOCTL13),
+	MAX8925_REG_RESOURCE(LDOCTL14, LDOCTL14),
+	MAX8925_REG_RESOURCE(LDOCTL15, LDOCTL15),
+	MAX8925_REG_RESOURCE(LDOCTL16, LDOCTL16),
+	MAX8925_REG_RESOURCE(LDOCTL17, LDOCTL17),
+	MAX8925_REG_RESOURCE(LDOCTL18, LDOCTL18),
+	MAX8925_REG_RESOURCE(LDOCTL19, LDOCTL19),
+	MAX8925_REG_RESOURCE(LDOCTL20, LDOCTL20),
+};
+
+#define MAX8925_REG_DEVS(_id)						\
+{									\
+	.name		= "max8925-regulator",				\
+	.num_resources	= 1,						\
+	.resources	= &regulator_resources[MAX8925_ID_##_id],	\
+	.id		= MAX8925_ID_##_id,				\
+}
+
+static struct mfd_cell regulator_devs[] = {
+	MAX8925_REG_DEVS(SD1),
+	MAX8925_REG_DEVS(SD2),
+	MAX8925_REG_DEVS(SD3),
+	MAX8925_REG_DEVS(LDO1),
+	MAX8925_REG_DEVS(LDO2),
+	MAX8925_REG_DEVS(LDO3),
+	MAX8925_REG_DEVS(LDO4),
+	MAX8925_REG_DEVS(LDO5),
+	MAX8925_REG_DEVS(LDO6),
+	MAX8925_REG_DEVS(LDO7),
+	MAX8925_REG_DEVS(LDO8),
+	MAX8925_REG_DEVS(LDO9),
+	MAX8925_REG_DEVS(LDO10),
+	MAX8925_REG_DEVS(LDO11),
+	MAX8925_REG_DEVS(LDO12),
+	MAX8925_REG_DEVS(LDO13),
+	MAX8925_REG_DEVS(LDO14),
+	MAX8925_REG_DEVS(LDO15),
+	MAX8925_REG_DEVS(LDO16),
+	MAX8925_REG_DEVS(LDO17),
+	MAX8925_REG_DEVS(LDO18),
+	MAX8925_REG_DEVS(LDO19),
+	MAX8925_REG_DEVS(LDO20),
+};
+
 static int __get_irq_offset(struct max8925_chip *chip, int irq, int mode,
 			    int *offset, int *bit)
 {
@@ -210,6 +313,30 @@ static int __devinit device_gpm_init(struct max8925_chip *chip,
 
 	/* enable hard-reset for ONKEY power-off */
 	max8925_set_bits(i2c, MAX8925_SYSENSEL, 0x80, 0x80);
+
+	ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
+			      ARRAY_SIZE(regulator_devs),
+			      &regulator_resources[0], 0);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to add regulator subdev\n");
+		goto out_irq;
+	}
+
+	if (pdata && pdata->backlight) {
+		ret = mfd_add_devices(chip->dev, 0, &backlight_devs[0],
+				      ARRAY_SIZE(backlight_devs),
+				      &backlight_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add backlight subdev\n");
+			goto out_dev;
+		}
+	}
+	return 0;
+out_dev:
+	mfd_remove_devices(chip->dev);
+out_irq:
+	if (chip->chip_irq)
+		free_irq(chip->chip_irq, chip);
 out:
 	return ret;
 }
@@ -233,6 +360,20 @@ static int __devinit device_adc_init(struct max8925_chip *chip,
 		goto out;
 	}
 	chip->chip_irq = i2c->irq;
+
+	if (pdata && pdata->touch) {
+		ret = mfd_add_devices(chip->dev, 0, &touch_devs[0],
+				      ARRAY_SIZE(touch_devs),
+				      &touch_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add touch subdev\n");
+			goto out_irq;
+		}
+	}
+	return 0;
+out_irq:
+	if (chip->chip_irq)
+		free_irq(chip->chip_irq, chip);
 out:
 	return ret;
 }
@@ -255,6 +396,7 @@ void max8925_device_exit(struct max8925_chip *chip)
 {
 	if (chip->chip_irq >= 0)
 		free_irq(chip->chip_irq, chip);
+	mfd_remove_devices(chip->dev);
 }
 
 MODULE_DESCRIPTION("PMIC Driver for Maxim MAX8925");
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 2326246ddef2..b72dbe174d51 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -14,6 +14,33 @@
 
 #include <linux/interrupt.h>
 
+/* Unified sub device IDs for MAX8925 */
+enum {
+	MAX8925_ID_SD1,
+	MAX8925_ID_SD2,
+	MAX8925_ID_SD3,
+	MAX8925_ID_LDO1,
+	MAX8925_ID_LDO2,
+	MAX8925_ID_LDO3,
+	MAX8925_ID_LDO4,
+	MAX8925_ID_LDO5,
+	MAX8925_ID_LDO6,
+	MAX8925_ID_LDO7,
+	MAX8925_ID_LDO8,
+	MAX8925_ID_LDO9,
+	MAX8925_ID_LDO10,
+	MAX8925_ID_LDO11,
+	MAX8925_ID_LDO12,
+	MAX8925_ID_LDO13,
+	MAX8925_ID_LDO14,
+	MAX8925_ID_LDO15,
+	MAX8925_ID_LDO16,
+	MAX8925_ID_LDO17,
+	MAX8925_ID_LDO18,
+	MAX8925_ID_LDO19,
+	MAX8925_ID_LDO20,
+};
+
 /* Charger registers */
 #define MAX8925_CHG_IRQ1		(0x7e)
 #define MAX8925_CHG_IRQ2		(0x7f)
@@ -32,12 +59,65 @@
 /* Touch registers */
 #define MAX8925_TSC_IRQ			(0x00)
 #define MAX8925_TSC_IRQ_MASK		(0x01)
+#define MAX8925_ADC_RES_END		(0x6f)
 
 /* RTC registers */
 #define MAX8925_RTC_STATUS		(0x1a)
 #define MAX8925_RTC_IRQ			(0x1c)
 #define MAX8925_RTC_IRQ_MASK		(0x1d)
 
+/* WLED registers */
+#define MAX8925_WLED_MODE_CNTL		(0x84)
+#define MAX8925_WLED_CNTL		(0x85)
+
+/* MAX8925 Registers */
+#define MAX8925_SDCTL1			(0x04)
+#define MAX8925_SDCTL2			(0x07)
+#define MAX8925_SDCTL3			(0x0A)
+#define MAX8925_SDV1			(0x06)
+#define MAX8925_SDV2			(0x09)
+#define MAX8925_SDV3			(0x0C)
+#define MAX8925_LDOCTL1			(0x18)
+#define MAX8925_LDOCTL2			(0x1C)
+#define MAX8925_LDOCTL3			(0x20)
+#define MAX8925_LDOCTL4			(0x24)
+#define MAX8925_LDOCTL5			(0x28)
+#define MAX8925_LDOCTL6			(0x2C)
+#define MAX8925_LDOCTL7			(0x30)
+#define MAX8925_LDOCTL8			(0x34)
+#define MAX8925_LDOCTL9			(0x38)
+#define MAX8925_LDOCTL10		(0x3C)
+#define MAX8925_LDOCTL11		(0x40)
+#define MAX8925_LDOCTL12		(0x44)
+#define MAX8925_LDOCTL13		(0x48)
+#define MAX8925_LDOCTL14		(0x4C)
+#define MAX8925_LDOCTL15		(0x50)
+#define MAX8925_LDOCTL16		(0x10)
+#define MAX8925_LDOCTL17		(0x14)
+#define MAX8925_LDOCTL18		(0x72)
+#define MAX8925_LDOCTL19		(0x5C)
+#define MAX8925_LDOCTL20		(0x9C)
+#define MAX8925_LDOVOUT1		(0x1A)
+#define MAX8925_LDOVOUT2		(0x1E)
+#define MAX8925_LDOVOUT3		(0x22)
+#define MAX8925_LDOVOUT4		(0x26)
+#define MAX8925_LDOVOUT5		(0x2A)
+#define MAX8925_LDOVOUT6		(0x2E)
+#define MAX8925_LDOVOUT7		(0x32)
+#define MAX8925_LDOVOUT8		(0x36)
+#define MAX8925_LDOVOUT9		(0x3A)
+#define MAX8925_LDOVOUT10		(0x3E)
+#define MAX8925_LDOVOUT11		(0x42)
+#define MAX8925_LDOVOUT12		(0x46)
+#define MAX8925_LDOVOUT13		(0x4A)
+#define MAX8925_LDOVOUT14		(0x4E)
+#define MAX8925_LDOVOUT15		(0x52)
+#define MAX8925_LDOVOUT16		(0x12)
+#define MAX8925_LDOVOUT17		(0x16)
+#define MAX8925_LDOVOUT18		(0x74)
+#define MAX8925_LDOVOUT19		(0x5E)
+#define MAX8925_LDOVOUT20		(0x9E)
+
 /* bit definitions */
 #define CHG_IRQ1_MASK			(0x07)
 #define CHG_IRQ2_MASK			(0xff)
@@ -83,6 +163,8 @@ enum {
 #define MAX8925_IRQ_TSC_STICK		(0)
 #define MAX8925_IRQ_TSC_NSTICK		(1)
 
+#define MAX8925_MAX_REGULATOR		(23)
+
 struct max8925_irq {
 	irq_handler_t		handler;
 	void			*data;
@@ -100,7 +182,21 @@ struct max8925_chip {
 	int			chip_irq;
 };
 
+struct max8925_backlight_pdata {
+	int	lxw_scl;	/* 0/1 -- 0.8Ohm/0.4Ohm */
+	int	lxw_freq;	/* 700KHz ~ 1400KHz */
+	int	dual_string;	/* 0/1 -- single/dual string */
+};
+
+struct max8925_touch_pdata {
+	unsigned int		flags;
+};
+
 struct max8925_platform_data {
+	struct max8925_backlight_pdata	*backlight;
+	struct max8925_touch_pdata	*touch;
+	struct regulator_init_data	*regulator[MAX8925_MAX_REGULATOR];
+
 	int	chip_id;
 	int	chip_irq;
 };
-- 
cgit v1.2.3


From 6048a3dd2371c58611ea0ab8b306f8f1469399ae Mon Sep 17 00:00:00 2001
From: Cory Maccarrone <darkstar6262@gmail.com>
Date: Tue, 19 Jan 2010 11:22:45 +0100
Subject: mfd: Add HTCPLD driver

This change introduces a driver for the HTC PLD chip found
on some smartphones, such as the HTC Wizard and HTC Herald.
It works through the I2C bus and acts as a GPIO extender.
Specifically:

 * it can have several sub-devices, each with its own I2C
   address
 * Each sub-device provides 8 output and 8 input pins
 * The chip attaches to one GPIO to signal when any of the
   input GPIOs change -- at which point all chips must be
   scanned for changes

This driver implements the GPIOs throught the kernel's
GPIO and IRQ framework.  This allows any GPIO-servicing
drivers to operate on htcpld pins, such as the gpio-keys
and gpio-leds drivers.

Signed-off-by: Cory Maccarrone <darkstar6262@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig      |   9 +
 drivers/mfd/Makefile     |   1 +
 drivers/mfd/htc-i2cpld.c | 710 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/htcpld.h   |  24 ++
 4 files changed, 744 insertions(+)
 create mode 100644 drivers/mfd/htc-i2cpld.c
 create mode 100644 include/linux/htcpld.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index d63ab2eec661..9bcd447e71e2 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -78,6 +78,15 @@ config HTC_PASIC3
 	  HTC Magician devices, respectively. Actual functionality is
 	  handled by the leds-pasic3 and ds1wm drivers.
 
+config HTC_I2CPLD
+	bool "HTC I2C PLD chip support"
+	depends on I2C=y
+	help
+	  If you say yes here you get support for the supposed CPLD
+	  found on omap850 HTC devices like the HTC Wizard and HTC Herald.
+	  This device provides input and output GPIOs through an I2C
+	  interface to one or more sub-chips.
+
 config UCB1400_CORE
 	tristate "Philips UCB1400 Core driver"
 	depends on AC97_BUS
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 261635700243..142d31202b14 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_MFD_SH_MOBILE_SDHI)		+= sh_mobile_sdhi.o
 
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
+obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
 
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 
diff --git a/drivers/mfd/htc-i2cpld.c b/drivers/mfd/htc-i2cpld.c
new file mode 100644
index 000000000000..37b9fdab4f36
--- /dev/null
+++ b/drivers/mfd/htc-i2cpld.c
@@ -0,0 +1,710 @@
+/*
+ *  htc-i2cpld.c
+ *  Chip driver for an unknown CPLD chip found on omap850 HTC devices like
+ *  the HTC Wizard and HTC Herald.
+ *  The cpld is located on the i2c bus and acts as an input/output GPIO
+ *  extender.
+ *
+ *  Copyright (C) 2009 Cory Maccarrone <darkstar6262@gmail.com>
+ *
+ *  Based on work done in the linwizard project
+ *  Copyright (C) 2008-2009 Angelo Arrifano <miknix@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <linux/htcpld.h>
+#include <linux/gpio.h>
+
+struct htcpld_chip {
+	spinlock_t              lock;
+
+	/* chip info */
+	u8                      reset;
+	u8                      addr;
+	struct device           *dev;
+	struct i2c_client	*client;
+
+	/* Output details */
+	u8                      cache_out;
+	struct gpio_chip        chip_out;
+
+	/* Input details */
+	u8                      cache_in;
+	struct gpio_chip        chip_in;
+
+	u16                     irqs_enabled;
+	uint                    irq_start;
+	int                     nirqs;
+
+	/*
+	 * Work structure to allow for setting values outside of any
+	 * possible interrupt context
+	 */
+	struct work_struct set_val_work;
+};
+
+struct htcpld_data {
+	/* irq info */
+	u16                irqs_enabled;
+	uint               irq_start;
+	int                nirqs;
+	uint               chained_irq;
+	unsigned int       int_reset_gpio_hi;
+	unsigned int       int_reset_gpio_lo;
+
+	/* htcpld info */
+	struct htcpld_chip *chip;
+	unsigned int       nchips;
+};
+
+/* There does not appear to be a way to proactively mask interrupts
+ * on the htcpld chip itself.  So, we simply ignore interrupts that
+ * aren't desired. */
+static void htcpld_mask(unsigned int irq)
+{
+	struct htcpld_chip *chip = get_irq_chip_data(irq);
+	chip->irqs_enabled &= ~(1 << (irq - chip->irq_start));
+	pr_debug("HTCPLD mask %d %04x\n", irq, chip->irqs_enabled);
+}
+static void htcpld_unmask(unsigned int irq)
+{
+	struct htcpld_chip *chip = get_irq_chip_data(irq);
+	chip->irqs_enabled |= 1 << (irq - chip->irq_start);
+	pr_debug("HTCPLD unmask %d %04x\n", irq, chip->irqs_enabled);
+}
+
+static int htcpld_set_type(unsigned int irq, unsigned int flags)
+{
+	struct irq_desc *d = irq_to_desc(irq);
+
+	if (!d) {
+		pr_err("HTCPLD invalid IRQ: %d\n", irq);
+		return -EINVAL;
+	}
+
+	if (flags & ~IRQ_TYPE_SENSE_MASK)
+		return -EINVAL;
+
+	/* We only allow edge triggering */
+	if (flags & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH))
+		return -EINVAL;
+
+	d->status &= ~IRQ_TYPE_SENSE_MASK;
+	d->status |= flags;
+
+	return 0;
+}
+
+static struct irq_chip htcpld_muxed_chip = {
+	.name     = "htcpld",
+	.mask     = htcpld_mask,
+	.unmask   = htcpld_unmask,
+	.set_type = htcpld_set_type,
+};
+
+/* To properly dispatch IRQ events, we need to read from the
+ * chip.  This is an I2C action that could possibly sleep
+ * (which is bad in interrupt context) -- so we use a threaded
+ * interrupt handler to get around that.
+ */
+static irqreturn_t htcpld_handler(int irq, void *dev)
+{
+	struct htcpld_data *htcpld = dev;
+	unsigned int i;
+	unsigned long flags;
+	int irqpin;
+	struct irq_desc *desc;
+
+	if (!htcpld) {
+		pr_debug("htcpld is null in ISR\n");
+		return IRQ_HANDLED;
+	}
+
+	/*
+	 * For each chip, do a read of the chip and trigger any interrupts
+	 * desired.  The interrupts will be triggered from LSB to MSB (i.e.
+	 * bit 0 first, then bit 1, etc.)
+	 *
+	 * For chips that have no interrupt range specified, just skip 'em.
+	 */
+	for (i = 0; i < htcpld->nchips; i++) {
+		struct htcpld_chip *chip = &htcpld->chip[i];
+		struct i2c_client *client;
+		int val;
+		unsigned long uval, old_val;
+
+		if (!chip) {
+			pr_debug("chip %d is null in ISR\n", i);
+			continue;
+		}
+
+		if (chip->nirqs == 0)
+			continue;
+
+		client = chip->client;
+		if (!client) {
+			pr_debug("client %d is null in ISR\n", i);
+			continue;
+		}
+
+		/* Scan the chip */
+		val = i2c_smbus_read_byte_data(client, chip->cache_out);
+		if (val < 0) {
+			/* Throw a warning and skip this chip */
+			dev_warn(chip->dev, "Unable to read from chip: %d\n",
+				 val);
+			continue;
+		}
+
+		uval = (unsigned long)val;
+
+		spin_lock_irqsave(&chip->lock, flags);
+
+		/* Save away the old value so we can compare it */
+		old_val = chip->cache_in;
+
+		/* Write the new value */
+		chip->cache_in = uval;
+
+		spin_unlock_irqrestore(&chip->lock, flags);
+
+		/*
+		 * For each bit in the data (starting at bit 0), trigger
+		 * associated interrupts.
+		 */
+		for (irqpin = 0; irqpin < chip->nirqs; irqpin++) {
+			unsigned oldb, newb;
+			int flags;
+
+			irq = chip->irq_start + irqpin;
+			desc = irq_to_desc(irq);
+			flags = desc->status;
+
+			/* Run the IRQ handler, but only if the bit value
+			 * changed, and the proper flags are set */
+			oldb = (old_val >> irqpin) & 1;
+			newb = (uval >> irqpin) & 1;
+
+			if ((!oldb && newb && (flags & IRQ_TYPE_EDGE_RISING)) ||
+			    (oldb && !newb &&
+			     (flags & IRQ_TYPE_EDGE_FALLING))) {
+				pr_debug("fire IRQ %d\n", irqpin);
+				desc->handle_irq(irq, desc);
+			}
+		}
+	}
+
+	/*
+	 * In order to continue receiving interrupts, the int_reset_gpio must
+	 * be asserted.
+	 */
+	if (htcpld->int_reset_gpio_hi)
+		gpio_set_value(htcpld->int_reset_gpio_hi, 1);
+	if (htcpld->int_reset_gpio_lo)
+		gpio_set_value(htcpld->int_reset_gpio_lo, 0);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * The GPIO set routines can be called from interrupt context, especially if,
+ * for example they're attached to the led-gpio framework and a trigger is
+ * enabled.  As such, we declared work above in the htcpld_chip structure,
+ * and that work is scheduled in the set routine.  The kernel can then run
+ * the I2C functions, which will sleep, in process context.
+ */
+void htcpld_chip_set(struct gpio_chip *chip, unsigned offset, int val)
+{
+	struct i2c_client *client;
+	struct htcpld_chip *chip_data;
+	unsigned long flags;
+
+	chip_data = container_of(chip, struct htcpld_chip, chip_out);
+	if (!chip_data)
+		return;
+
+	client = chip_data->client;
+	if (client == NULL)
+		return;
+
+	spin_lock_irqsave(&chip_data->lock, flags);
+	if (val)
+		chip_data->cache_out |= (1 << offset);
+	else
+		chip_data->cache_out &= ~(1 << offset);
+	spin_unlock_irqrestore(&chip_data->lock, flags);
+
+	schedule_work(&(chip_data->set_val_work));
+}
+
+void htcpld_chip_set_ni(struct work_struct *work)
+{
+	struct htcpld_chip *chip_data;
+	struct i2c_client *client;
+
+	chip_data = container_of(work, struct htcpld_chip, set_val_work);
+	client = chip_data->client;
+	i2c_smbus_read_byte_data(client, chip_data->cache_out);
+}
+
+int htcpld_chip_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct htcpld_chip *chip_data;
+	int val = 0;
+	int is_input = 0;
+
+	/* Try out first */
+	chip_data = container_of(chip, struct htcpld_chip, chip_out);
+	if (!chip_data) {
+		/* Try in */
+		is_input = 1;
+		chip_data = container_of(chip, struct htcpld_chip, chip_in);
+		if (!chip_data)
+			return -EINVAL;
+	}
+
+	/* Determine if this is an input or output GPIO */
+	if (!is_input)
+		/* Use the output cache */
+		val = (chip_data->cache_out >> offset) & 1;
+	else
+		/* Use the input cache */
+		val = (chip_data->cache_in >> offset) & 1;
+
+	if (val)
+		return 1;
+	else
+		return 0;
+}
+
+static int htcpld_direction_output(struct gpio_chip *chip,
+					unsigned offset, int value)
+{
+	htcpld_chip_set(chip, offset, value);
+	return 0;
+}
+
+static int htcpld_direction_input(struct gpio_chip *chip,
+					unsigned offset)
+{
+	/*
+	 * No-op: this function can only be called on the input chip.
+	 * We do however make sure the offset is within range.
+	 */
+	return (offset < chip->ngpio) ? 0 : -EINVAL;
+}
+
+int htcpld_chip_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct htcpld_chip *chip_data;
+
+	chip_data = container_of(chip, struct htcpld_chip, chip_in);
+
+	if (offset < chip_data->nirqs)
+		return chip_data->irq_start + offset;
+	else
+		return -EINVAL;
+}
+
+void htcpld_chip_reset(struct i2c_client *client)
+{
+	struct htcpld_chip *chip_data = i2c_get_clientdata(client);
+	if (!chip_data)
+		return;
+
+	i2c_smbus_read_byte_data(
+		client, (chip_data->cache_out = chip_data->reset));
+}
+
+static int __devinit htcpld_setup_chip_irq(
+		struct platform_device *pdev,
+		int chip_index)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	struct htcpld_chip *chip;
+	struct htcpld_chip_platform_data *plat_chip_data;
+	unsigned int irq, irq_end;
+	int ret = 0;
+
+	/* Get the platform and driver data */
+	pdata = dev->platform_data;
+	htcpld = platform_get_drvdata(pdev);
+	chip = &htcpld->chip[chip_index];
+	plat_chip_data = &pdata->chip[chip_index];
+
+	/* Setup irq handlers */
+	irq_end = chip->irq_start + chip->nirqs;
+	for (irq = chip->irq_start; irq < irq_end; irq++) {
+		set_irq_chip(irq, &htcpld_muxed_chip);
+		set_irq_chip_data(irq, chip);
+		set_irq_handler(irq, handle_simple_irq);
+#ifdef CONFIG_ARM
+		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+#else
+		set_irq_probe(irq);
+#endif
+	}
+
+	return ret;
+}
+
+static int __devinit htcpld_register_chip_i2c(
+		struct platform_device *pdev,
+		int chip_index)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	struct htcpld_chip *chip;
+	struct htcpld_chip_platform_data *plat_chip_data;
+	struct i2c_adapter *adapter;
+	struct i2c_client *client;
+	struct i2c_board_info info;
+
+	/* Get the platform and driver data */
+	pdata = dev->platform_data;
+	htcpld = platform_get_drvdata(pdev);
+	chip = &htcpld->chip[chip_index];
+	plat_chip_data = &pdata->chip[chip_index];
+
+	adapter = i2c_get_adapter(pdata->i2c_adapter_id);
+	if (adapter == NULL) {
+		/* Eek, no such I2C adapter!  Bail out. */
+		dev_warn(dev, "Chip at i2c address 0x%x: Invalid i2c adapter %d\n",
+			 plat_chip_data->addr, pdata->i2c_adapter_id);
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		dev_warn(dev, "i2c adapter %d non-functional\n",
+			 pdata->i2c_adapter_id);
+		return -EINVAL;
+	}
+
+	memset(&info, 0, sizeof(struct i2c_board_info));
+	info.addr = plat_chip_data->addr;
+	strlcpy(info.type, "htcpld-chip", I2C_NAME_SIZE);
+	info.platform_data = chip;
+
+	/* Add the I2C device.  This calls the probe() function. */
+	client = i2c_new_device(adapter, &info);
+	if (!client) {
+		/* I2C device registration failed, contineu with the next */
+		dev_warn(dev, "Unable to add I2C device for 0x%x\n",
+			 plat_chip_data->addr);
+		return -ENODEV;
+	}
+
+	i2c_set_clientdata(client, chip);
+	snprintf(client->name, I2C_NAME_SIZE, "Chip_0x%d", client->addr);
+	chip->client = client;
+
+	/* Reset the chip */
+	htcpld_chip_reset(client);
+	chip->cache_in = i2c_smbus_read_byte_data(client, chip->cache_out);
+
+	return 0;
+}
+
+static void __devinit htcpld_unregister_chip_i2c(
+		struct platform_device *pdev,
+		int chip_index)
+{
+	struct htcpld_data *htcpld;
+	struct htcpld_chip *chip;
+
+	/* Get the platform and driver data */
+	htcpld = platform_get_drvdata(pdev);
+	chip = &htcpld->chip[chip_index];
+
+	if (chip->client)
+		i2c_unregister_device(chip->client);
+}
+
+static int __devinit htcpld_register_chip_gpio(
+		struct platform_device *pdev,
+		int chip_index)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	struct htcpld_chip *chip;
+	struct htcpld_chip_platform_data *plat_chip_data;
+	struct gpio_chip *gpio_chip;
+	int ret = 0;
+
+	/* Get the platform and driver data */
+	pdata = dev->platform_data;
+	htcpld = platform_get_drvdata(pdev);
+	chip = &htcpld->chip[chip_index];
+	plat_chip_data = &pdata->chip[chip_index];
+
+	/* Setup the GPIO chips */
+	gpio_chip = &(chip->chip_out);
+	gpio_chip->label           = "htcpld-out";
+	gpio_chip->dev             = dev;
+	gpio_chip->owner           = THIS_MODULE;
+	gpio_chip->get             = htcpld_chip_get;
+	gpio_chip->set             = htcpld_chip_set;
+	gpio_chip->direction_input = NULL;
+	gpio_chip->direction_output = htcpld_direction_output;
+	gpio_chip->base            = plat_chip_data->gpio_out_base;
+	gpio_chip->ngpio           = plat_chip_data->num_gpios;
+
+	gpio_chip = &(chip->chip_in);
+	gpio_chip->label           = "htcpld-in";
+	gpio_chip->dev             = dev;
+	gpio_chip->owner           = THIS_MODULE;
+	gpio_chip->get             = htcpld_chip_get;
+	gpio_chip->set             = NULL;
+	gpio_chip->direction_input = htcpld_direction_input;
+	gpio_chip->direction_output = NULL;
+	gpio_chip->to_irq          = htcpld_chip_to_irq;
+	gpio_chip->base            = plat_chip_data->gpio_in_base;
+	gpio_chip->ngpio           = plat_chip_data->num_gpios;
+
+	/* Add the GPIO chips */
+	ret = gpiochip_add(&(chip->chip_out));
+	if (ret) {
+		dev_warn(dev, "Unable to register output GPIOs for 0x%x: %d\n",
+			 plat_chip_data->addr, ret);
+		return ret;
+	}
+
+	ret = gpiochip_add(&(chip->chip_in));
+	if (ret) {
+		int error;
+
+		dev_warn(dev, "Unable to register input GPIOs for 0x%x: %d\n",
+			 plat_chip_data->addr, ret);
+
+		error = gpiochip_remove(&(chip->chip_out));
+		if (error)
+			dev_warn(dev, "Error while trying to unregister gpio chip: %d\n", error);
+
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __devinit htcpld_setup_chips(struct platform_device *pdev)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	int i;
+
+	/* Get the platform and driver data */
+	pdata = dev->platform_data;
+	htcpld = platform_get_drvdata(pdev);
+
+	/* Setup each chip's output GPIOs */
+	htcpld->nchips = pdata->num_chip;
+	htcpld->chip = kzalloc(sizeof(struct htcpld_chip) * htcpld->nchips,
+			       GFP_KERNEL);
+	if (!htcpld->chip) {
+		dev_warn(dev, "Unable to allocate memory for chips\n");
+		return -ENOMEM;
+	}
+
+	/* Add the chips as best we can */
+	for (i = 0; i < htcpld->nchips; i++) {
+		int ret;
+
+		/* Setup the HTCPLD chips */
+		htcpld->chip[i].reset = pdata->chip[i].reset;
+		htcpld->chip[i].cache_out = pdata->chip[i].reset;
+		htcpld->chip[i].cache_in = 0;
+		htcpld->chip[i].dev = dev;
+		htcpld->chip[i].irq_start = pdata->chip[i].irq_base;
+		htcpld->chip[i].nirqs = pdata->chip[i].num_irqs;
+
+		INIT_WORK(&(htcpld->chip[i].set_val_work), &htcpld_chip_set_ni);
+		spin_lock_init(&(htcpld->chip[i].lock));
+
+		/* Setup the interrupts for the chip */
+		if (htcpld->chained_irq) {
+			ret = htcpld_setup_chip_irq(pdev, i);
+			if (ret)
+				continue;
+		}
+
+		/* Register the chip with I2C */
+		ret = htcpld_register_chip_i2c(pdev, i);
+		if (ret)
+			continue;
+
+
+		/* Register the chips with the GPIO subsystem */
+		ret = htcpld_register_chip_gpio(pdev, i);
+		if (ret) {
+			/* Unregister the chip from i2c and continue */
+			htcpld_unregister_chip_i2c(pdev, i);
+			continue;
+		}
+
+		dev_info(dev, "Registered chip at 0x%x\n", pdata->chip[i].addr);
+	}
+
+	return 0;
+}
+
+static int __devinit htcpld_core_probe(struct platform_device *pdev)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	struct resource *res;
+	int ret = 0;
+
+	if (!dev)
+		return -ENODEV;
+
+	pdata = dev->platform_data;
+	if (!pdata) {
+		dev_warn(dev, "Platform data not found for htcpld core!\n");
+		return -ENXIO;
+	}
+
+	htcpld = kzalloc(sizeof(struct htcpld_data), GFP_KERNEL);
+	if (!htcpld)
+		return -ENOMEM;
+
+	/* Find chained irq */
+	ret = -EINVAL;
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (res) {
+		int flags;
+		htcpld->chained_irq = res->start;
+
+		/* Setup the chained interrupt handler */
+		flags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
+		ret = request_threaded_irq(htcpld->chained_irq,
+					   NULL, htcpld_handler,
+					   flags, pdev->name, htcpld);
+		if (ret) {
+			dev_warn(dev, "Unable to setup chained irq handler: %d\n", ret);
+			goto fail;
+		} else
+			device_init_wakeup(dev, 0);
+	}
+
+	/* Set the driver data */
+	platform_set_drvdata(pdev, htcpld);
+
+	/* Setup the htcpld chips */
+	ret = htcpld_setup_chips(pdev);
+	if (ret)
+		goto fail;
+
+	/* Request the GPIO(s) for the int reset and set them up */
+	if (pdata->int_reset_gpio_hi) {
+		ret = gpio_request(pdata->int_reset_gpio_hi, "htcpld-core");
+		if (ret) {
+			/*
+			 * If it failed, that sucks, but we can probably
+			 * continue on without it.
+			 */
+			dev_warn(dev, "Unable to request int_reset_gpio_hi -- interrupts may not work\n");
+			htcpld->int_reset_gpio_hi = 0;
+		} else {
+			htcpld->int_reset_gpio_hi = pdata->int_reset_gpio_hi;
+			gpio_set_value(htcpld->int_reset_gpio_hi, 1);
+		}
+	}
+
+	if (pdata->int_reset_gpio_lo) {
+		ret = gpio_request(pdata->int_reset_gpio_lo, "htcpld-core");
+		if (ret) {
+			/*
+			 * If it failed, that sucks, but we can probably
+			 * continue on without it.
+			 */
+			dev_warn(dev, "Unable to request int_reset_gpio_lo -- interrupts may not work\n");
+			htcpld->int_reset_gpio_lo = 0;
+		} else {
+			htcpld->int_reset_gpio_lo = pdata->int_reset_gpio_lo;
+			gpio_set_value(htcpld->int_reset_gpio_lo, 0);
+		}
+	}
+
+	dev_info(dev, "Initialized successfully\n");
+	return 0;
+
+fail:
+	kfree(htcpld);
+	return ret;
+}
+
+/* The I2C Driver -- used internally */
+static const struct i2c_device_id htcpld_chip_id[] = {
+	{ "htcpld-chip", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, htcpld_chip_id);
+
+
+static struct i2c_driver htcpld_chip_driver = {
+	.driver = {
+		.name	= "htcpld-chip",
+	},
+	.id_table = htcpld_chip_id,
+};
+
+/* The Core Driver */
+static struct platform_driver htcpld_core_driver = {
+	.driver = {
+		.name = "i2c-htcpld",
+	},
+};
+
+static int __init htcpld_core_init(void)
+{
+	int ret;
+
+	/* Register the I2C Chip driver */
+	ret = i2c_add_driver(&htcpld_chip_driver);
+	if (ret)
+		return ret;
+
+	/* Probe for our chips */
+	return platform_driver_probe(&htcpld_core_driver, htcpld_core_probe);
+}
+
+static void __exit htcpld_core_exit(void)
+{
+	i2c_del_driver(&htcpld_chip_driver);
+	platform_driver_unregister(&htcpld_core_driver);
+}
+
+module_init(htcpld_core_init);
+module_exit(htcpld_core_exit);
+
+MODULE_AUTHOR("Cory Maccarrone <darkstar6262@gmail.com>");
+MODULE_DESCRIPTION("I2C HTC PLD Driver");
+MODULE_LICENSE("GPL");
+
diff --git a/include/linux/htcpld.h b/include/linux/htcpld.h
new file mode 100644
index 000000000000..ab3f6cb4dddc
--- /dev/null
+++ b/include/linux/htcpld.h
@@ -0,0 +1,24 @@
+#ifndef __LINUX_HTCPLD_H
+#define __LINUX_HTCPLD_H
+
+struct htcpld_chip_platform_data {
+	unsigned int addr;
+	unsigned int reset;
+	unsigned int num_gpios;
+	unsigned int gpio_out_base;
+	unsigned int gpio_in_base;
+	unsigned int irq_base;
+	unsigned int num_irqs;
+};
+
+struct htcpld_core_platform_data {
+	unsigned int                      int_reset_gpio_hi;
+	unsigned int                      int_reset_gpio_lo;
+	unsigned int                      i2c_adapter_id;
+
+	struct htcpld_chip_platform_data  *chip;
+	unsigned int                      num_chip;
+};
+
+#endif /* __LINUX_HTCPLD_H */
+
-- 
cgit v1.2.3


From f7ea2dc59ed46dcd0f1cfaccda02211f4507207b Mon Sep 17 00:00:00 2001
From: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Date: Fri, 15 Jan 2010 15:33:46 +0100
Subject: mfd: Remove leftover from discontinued TWL4030 battery patch

The TWL4030_BCI_BATTERY config option originates from a patch to the
omap git tree. However inclusion in linux was seemingly rejected and
the functionality nears inclusion under a different name so this
removes the bits of the old version that made it into the mainline
kernel again.

Signed-off-by: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 19 -------------------
 include/linux/i2c/twl.h |  7 +------
 2 files changed, 1 insertion(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 19a930d06241..d81003f4867e 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -58,13 +58,6 @@
 
 #define DRIVER_NAME			"twl"
 
-#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
-	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
-#define twl_has_bci()		true
-#else
-#define twl_has_bci()		false
-#endif
-
 #if defined(CONFIG_KEYBOARD_TWL4030) || defined(CONFIG_KEYBOARD_TWL4030_MODULE)
 #define twl_has_keypad()	true
 #else
@@ -588,18 +581,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 	struct device	*child;
 	unsigned sub_chip_id;
 
-	if (twl_has_bci() && pdata->bci &&
-	    !(features & (TPS_SUBSET | TWL5031))) {
-		child = add_child(3, "twl4030_bci",
-				pdata->bci, sizeof(*pdata->bci),
-				false,
-				/* irq0 = CHG_PRES, irq1 = BCI */
-				pdata->irq_base + BCI_PRES_INTR_OFFSET,
-				pdata->irq_base + BCI_INTR_OFFSET);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
 	if (twl_has_gpio() && pdata->gpio) {
 		child = add_child(SUB_CHIP_ID1, "twl4030_gpio",
 				pdata->gpio, sizeof(*pdata->gpio),
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 7897f3096560..9733e9e53f2b 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -605,12 +605,7 @@ int twl4030_sih_setup(int module);
 #define TWL4030_VAUX3_DEV_GRP		0x1F
 #define TWL4030_VAUX3_DEDICATED		0x22
 
-#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
-	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
-	extern int twl4030charger_usb_en(int enable);
-#else
-	static inline int twl4030charger_usb_en(int enable) { return 0; }
-#endif
+static inline int twl4030charger_usb_en(int enable) { return 0; }
 
 /*----------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 1c4d3b70a40c666331052adf77933e6994590b74 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 29 Jan 2010 18:20:28 +0000
Subject: mfd: Add WM8994 register definitions

As a separate patch due to the large size.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/registers.h | 4292 ++++++++++++++++++++++++++++++++++
 1 file changed, 4292 insertions(+)
 create mode 100644 include/linux/mfd/wm8994/registers.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h
new file mode 100644
index 000000000000..967f62f54159
--- /dev/null
+++ b/include/linux/mfd/wm8994/registers.h
@@ -0,0 +1,4292 @@
+/*
+ * include/linux/mfd/wm8994/registers.h -- Register definitions for WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM8994_REGISTERS_H__
+#define __MFD_WM8994_REGISTERS_H__
+
+/*
+ * Register values.
+ */
+#define WM8994_SOFTWARE_RESET                   0x00
+#define WM8994_POWER_MANAGEMENT_1               0x01
+#define WM8994_POWER_MANAGEMENT_2               0x02
+#define WM8994_POWER_MANAGEMENT_3               0x03
+#define WM8994_POWER_MANAGEMENT_4               0x04
+#define WM8994_POWER_MANAGEMENT_5               0x05
+#define WM8994_POWER_MANAGEMENT_6               0x06
+#define WM8994_INPUT_MIXER_1                    0x15
+#define WM8994_LEFT_LINE_INPUT_1_2_VOLUME       0x18
+#define WM8994_LEFT_LINE_INPUT_3_4_VOLUME       0x19
+#define WM8994_RIGHT_LINE_INPUT_1_2_VOLUME      0x1A
+#define WM8994_RIGHT_LINE_INPUT_3_4_VOLUME      0x1B
+#define WM8994_LEFT_OUTPUT_VOLUME               0x1C
+#define WM8994_RIGHT_OUTPUT_VOLUME              0x1D
+#define WM8994_LINE_OUTPUTS_VOLUME              0x1E
+#define WM8994_HPOUT2_VOLUME                    0x1F
+#define WM8994_LEFT_OPGA_VOLUME                 0x20
+#define WM8994_RIGHT_OPGA_VOLUME                0x21
+#define WM8994_SPKMIXL_ATTENUATION              0x22
+#define WM8994_SPKMIXR_ATTENUATION              0x23
+#define WM8994_SPKOUT_MIXERS                    0x24
+#define WM8994_CLASSD                           0x25
+#define WM8994_SPEAKER_VOLUME_LEFT              0x26
+#define WM8994_SPEAKER_VOLUME_RIGHT             0x27
+#define WM8994_INPUT_MIXER_2                    0x28
+#define WM8994_INPUT_MIXER_3                    0x29
+#define WM8994_INPUT_MIXER_4                    0x2A
+#define WM8994_INPUT_MIXER_5                    0x2B
+#define WM8994_INPUT_MIXER_6                    0x2C
+#define WM8994_OUTPUT_MIXER_1                   0x2D
+#define WM8994_OUTPUT_MIXER_2                   0x2E
+#define WM8994_OUTPUT_MIXER_3                   0x2F
+#define WM8994_OUTPUT_MIXER_4                   0x30
+#define WM8994_OUTPUT_MIXER_5                   0x31
+#define WM8994_OUTPUT_MIXER_6                   0x32
+#define WM8994_HPOUT2_MIXER                     0x33
+#define WM8994_LINE_MIXER_1                     0x34
+#define WM8994_LINE_MIXER_2                     0x35
+#define WM8994_SPEAKER_MIXER                    0x36
+#define WM8994_ADDITIONAL_CONTROL               0x37
+#define WM8994_ANTIPOP_1                        0x38
+#define WM8994_ANTIPOP_2                        0x39
+#define WM8994_MICBIAS                          0x3A
+#define WM8994_LDO_1                            0x3B
+#define WM8994_LDO_2                            0x3C
+#define WM8994_CHARGE_PUMP_1                    0x4C
+#define WM8994_CLASS_W_1                        0x51
+#define WM8994_DC_SERVO_1                       0x54
+#define WM8994_DC_SERVO_2                       0x55
+#define WM8994_DC_SERVO_4                       0x57
+#define WM8994_DC_SERVO_READBACK                0x58
+#define WM8994_ANALOGUE_HP_1                    0x60
+#define WM8994_CHIP_REVISION                    0x100
+#define WM8994_CONTROL_INTERFACE                0x101
+#define WM8994_WRITE_SEQUENCER_CTRL_1           0x110
+#define WM8994_WRITE_SEQUENCER_CTRL_2           0x111
+#define WM8994_AIF1_CLOCKING_1                  0x200
+#define WM8994_AIF1_CLOCKING_2                  0x201
+#define WM8994_AIF2_CLOCKING_1                  0x204
+#define WM8994_AIF2_CLOCKING_2                  0x205
+#define WM8994_CLOCKING_1                       0x208
+#define WM8994_CLOCKING_2                       0x209
+#define WM8994_AIF1_RATE                        0x210
+#define WM8994_AIF2_RATE                        0x211
+#define WM8994_RATE_STATUS                      0x212
+#define WM8994_FLL1_CONTROL_1                   0x220
+#define WM8994_FLL1_CONTROL_2                   0x221
+#define WM8994_FLL1_CONTROL_3                   0x222
+#define WM8994_FLL1_CONTROL_4                   0x223
+#define WM8994_FLL1_CONTROL_5                   0x224
+#define WM8994_FLL2_CONTROL_1                   0x240
+#define WM8994_FLL2_CONTROL_2                   0x241
+#define WM8994_FLL2_CONTROL_3                   0x242
+#define WM8994_FLL2_CONTROL_4                   0x243
+#define WM8994_FLL2_CONTROL_5                   0x244
+#define WM8994_AIF1_CONTROL_1                   0x300
+#define WM8994_AIF1_CONTROL_2                   0x301
+#define WM8994_AIF1_MASTER_SLAVE                0x302
+#define WM8994_AIF1_BCLK                        0x303
+#define WM8994_AIF1ADC_LRCLK                    0x304
+#define WM8994_AIF1DAC_LRCLK                    0x305
+#define WM8994_AIF1DAC_DATA                     0x306
+#define WM8994_AIF1ADC_DATA                     0x307
+#define WM8994_AIF2_CONTROL_1                   0x310
+#define WM8994_AIF2_CONTROL_2                   0x311
+#define WM8994_AIF2_MASTER_SLAVE                0x312
+#define WM8994_AIF2_BCLK                        0x313
+#define WM8994_AIF2ADC_LRCLK                    0x314
+#define WM8994_AIF2DAC_LRCLK                    0x315
+#define WM8994_AIF2DAC_DATA                     0x316
+#define WM8994_AIF2ADC_DATA                     0x317
+#define WM8994_AIF1_ADC1_LEFT_VOLUME            0x400
+#define WM8994_AIF1_ADC1_RIGHT_VOLUME           0x401
+#define WM8994_AIF1_DAC1_LEFT_VOLUME            0x402
+#define WM8994_AIF1_DAC1_RIGHT_VOLUME           0x403
+#define WM8994_AIF1_ADC2_LEFT_VOLUME            0x404
+#define WM8994_AIF1_ADC2_RIGHT_VOLUME           0x405
+#define WM8994_AIF1_DAC2_LEFT_VOLUME            0x406
+#define WM8994_AIF1_DAC2_RIGHT_VOLUME           0x407
+#define WM8994_AIF1_ADC1_FILTERS                0x410
+#define WM8994_AIF1_ADC2_FILTERS                0x411
+#define WM8994_AIF1_DAC1_FILTERS_1              0x420
+#define WM8994_AIF1_DAC1_FILTERS_2              0x421
+#define WM8994_AIF1_DAC2_FILTERS_1              0x422
+#define WM8994_AIF1_DAC2_FILTERS_2              0x423
+#define WM8994_AIF1_DRC1_1                      0x440
+#define WM8994_AIF1_DRC1_2                      0x441
+#define WM8994_AIF1_DRC1_3                      0x442
+#define WM8994_AIF1_DRC1_4                      0x443
+#define WM8994_AIF1_DRC1_5                      0x444
+#define WM8994_AIF1_DRC2_1                      0x450
+#define WM8994_AIF1_DRC2_2                      0x451
+#define WM8994_AIF1_DRC2_3                      0x452
+#define WM8994_AIF1_DRC2_4                      0x453
+#define WM8994_AIF1_DRC2_5                      0x454
+#define WM8994_AIF1_DAC1_EQ_GAINS_1             0x480
+#define WM8994_AIF1_DAC1_EQ_GAINS_2             0x481
+#define WM8994_AIF1_DAC1_EQ_BAND_1_A            0x482
+#define WM8994_AIF1_DAC1_EQ_BAND_1_B            0x483
+#define WM8994_AIF1_DAC1_EQ_BAND_1_PG           0x484
+#define WM8994_AIF1_DAC1_EQ_BAND_2_A            0x485
+#define WM8994_AIF1_DAC1_EQ_BAND_2_B            0x486
+#define WM8994_AIF1_DAC1_EQ_BAND_2_C            0x487
+#define WM8994_AIF1_DAC1_EQ_BAND_2_PG           0x488
+#define WM8994_AIF1_DAC1_EQ_BAND_3_A            0x489
+#define WM8994_AIF1_DAC1_EQ_BAND_3_B            0x48A
+#define WM8994_AIF1_DAC1_EQ_BAND_3_C            0x48B
+#define WM8994_AIF1_DAC1_EQ_BAND_3_PG           0x48C
+#define WM8994_AIF1_DAC1_EQ_BAND_4_A            0x48D
+#define WM8994_AIF1_DAC1_EQ_BAND_4_B            0x48E
+#define WM8994_AIF1_DAC1_EQ_BAND_4_C            0x48F
+#define WM8994_AIF1_DAC1_EQ_BAND_4_PG           0x490
+#define WM8994_AIF1_DAC1_EQ_BAND_5_A            0x491
+#define WM8994_AIF1_DAC1_EQ_BAND_5_B            0x492
+#define WM8994_AIF1_DAC1_EQ_BAND_5_PG           0x493
+#define WM8994_AIF1_DAC2_EQ_GAINS_1             0x4A0
+#define WM8994_AIF1_DAC2_EQ_GAINS_2             0x4A1
+#define WM8994_AIF1_DAC2_EQ_BAND_1_A            0x4A2
+#define WM8994_AIF1_DAC2_EQ_BAND_1_B            0x4A3
+#define WM8994_AIF1_DAC2_EQ_BAND_1_PG           0x4A4
+#define WM8994_AIF1_DAC2_EQ_BAND_2_A            0x4A5
+#define WM8994_AIF1_DAC2_EQ_BAND_2_B            0x4A6
+#define WM8994_AIF1_DAC2_EQ_BAND_2_C            0x4A7
+#define WM8994_AIF1_DAC2_EQ_BAND_2_PG           0x4A8
+#define WM8994_AIF1_DAC2_EQ_BAND_3_A            0x4A9
+#define WM8994_AIF1_DAC2_EQ_BAND_3_B            0x4AA
+#define WM8994_AIF1_DAC2_EQ_BAND_3_C            0x4AB
+#define WM8994_AIF1_DAC2_EQ_BAND_3_PG           0x4AC
+#define WM8994_AIF1_DAC2_EQ_BAND_4_A            0x4AD
+#define WM8994_AIF1_DAC2_EQ_BAND_4_B            0x4AE
+#define WM8994_AIF1_DAC2_EQ_BAND_4_C            0x4AF
+#define WM8994_AIF1_DAC2_EQ_BAND_4_PG           0x4B0
+#define WM8994_AIF1_DAC2_EQ_BAND_5_A            0x4B1
+#define WM8994_AIF1_DAC2_EQ_BAND_5_B            0x4B2
+#define WM8994_AIF1_DAC2_EQ_BAND_5_PG           0x4B3
+#define WM8994_AIF2_ADC_LEFT_VOLUME             0x500
+#define WM8994_AIF2_ADC_RIGHT_VOLUME            0x501
+#define WM8994_AIF2_DAC_LEFT_VOLUME             0x502
+#define WM8994_AIF2_DAC_RIGHT_VOLUME            0x503
+#define WM8994_AIF2_ADC_FILTERS                 0x510
+#define WM8994_AIF2_DAC_FILTERS_1               0x520
+#define WM8994_AIF2_DAC_FILTERS_2               0x521
+#define WM8994_AIF2_DRC_1                       0x540
+#define WM8994_AIF2_DRC_2                       0x541
+#define WM8994_AIF2_DRC_3                       0x542
+#define WM8994_AIF2_DRC_4                       0x543
+#define WM8994_AIF2_DRC_5                       0x544
+#define WM8994_AIF2_EQ_GAINS_1                  0x580
+#define WM8994_AIF2_EQ_GAINS_2                  0x581
+#define WM8994_AIF2_EQ_BAND_1_A                 0x582
+#define WM8994_AIF2_EQ_BAND_1_B                 0x583
+#define WM8994_AIF2_EQ_BAND_1_PG                0x584
+#define WM8994_AIF2_EQ_BAND_2_A                 0x585
+#define WM8994_AIF2_EQ_BAND_2_B                 0x586
+#define WM8994_AIF2_EQ_BAND_2_C                 0x587
+#define WM8994_AIF2_EQ_BAND_2_PG                0x588
+#define WM8994_AIF2_EQ_BAND_3_A                 0x589
+#define WM8994_AIF2_EQ_BAND_3_B                 0x58A
+#define WM8994_AIF2_EQ_BAND_3_C                 0x58B
+#define WM8994_AIF2_EQ_BAND_3_PG                0x58C
+#define WM8994_AIF2_EQ_BAND_4_A                 0x58D
+#define WM8994_AIF2_EQ_BAND_4_B                 0x58E
+#define WM8994_AIF2_EQ_BAND_4_C                 0x58F
+#define WM8994_AIF2_EQ_BAND_4_PG                0x590
+#define WM8994_AIF2_EQ_BAND_5_A                 0x591
+#define WM8994_AIF2_EQ_BAND_5_B                 0x592
+#define WM8994_AIF2_EQ_BAND_5_PG                0x593
+#define WM8994_DAC1_MIXER_VOLUMES               0x600
+#define WM8994_DAC1_LEFT_MIXER_ROUTING          0x601
+#define WM8994_DAC1_RIGHT_MIXER_ROUTING         0x602
+#define WM8994_DAC2_MIXER_VOLUMES               0x603
+#define WM8994_DAC2_LEFT_MIXER_ROUTING          0x604
+#define WM8994_DAC2_RIGHT_MIXER_ROUTING         0x605
+#define WM8994_AIF1_ADC1_LEFT_MIXER_ROUTING     0x606
+#define WM8994_AIF1_ADC1_RIGHT_MIXER_ROUTING    0x607
+#define WM8994_AIF1_ADC2_LEFT_MIXER_ROUTING     0x608
+#define WM8994_AIF1_ADC2_RIGHT_MIXER_ROUTING    0x609
+#define WM8994_DAC1_LEFT_VOLUME                 0x610
+#define WM8994_DAC1_RIGHT_VOLUME                0x611
+#define WM8994_DAC2_LEFT_VOLUME                 0x612
+#define WM8994_DAC2_RIGHT_VOLUME                0x613
+#define WM8994_DAC_SOFTMUTE                     0x614
+#define WM8994_OVERSAMPLING                     0x620
+#define WM8994_SIDETONE                         0x621
+#define WM8994_GPIO_1                           0x700
+#define WM8994_GPIO_2                           0x701
+#define WM8994_GPIO_3                           0x702
+#define WM8994_GPIO_4                           0x703
+#define WM8994_GPIO_5                           0x704
+#define WM8994_GPIO_6                           0x705
+#define WM8994_GPIO_7                           0x706
+#define WM8994_GPIO_8                           0x707
+#define WM8994_GPIO_9                           0x708
+#define WM8994_GPIO_10                          0x709
+#define WM8994_GPIO_11                          0x70A
+#define WM8994_PULL_CONTROL_1                   0x720
+#define WM8994_PULL_CONTROL_2                   0x721
+#define WM8994_INTERRUPT_STATUS_1               0x730
+#define WM8994_INTERRUPT_STATUS_2               0x731
+#define WM8994_INTERRUPT_RAW_STATUS_2           0x732
+#define WM8994_INTERRUPT_STATUS_1_MASK          0x738
+#define WM8994_INTERRUPT_STATUS_2_MASK          0x739
+#define WM8994_INTERRUPT_CONTROL                0x740
+#define WM8994_IRQ_DEBOUNCE                     0x748
+#define WM8994_WRITE_SEQUENCER_0                0x3000
+#define WM8994_WRITE_SEQUENCER_1                0x3001
+#define WM8994_WRITE_SEQUENCER_2                0x3002
+#define WM8994_WRITE_SEQUENCER_3                0x3003
+#define WM8994_WRITE_SEQUENCER_4                0x3004
+#define WM8994_WRITE_SEQUENCER_5                0x3005
+#define WM8994_WRITE_SEQUENCER_6                0x3006
+#define WM8994_WRITE_SEQUENCER_7                0x3007
+#define WM8994_WRITE_SEQUENCER_8                0x3008
+#define WM8994_WRITE_SEQUENCER_9                0x3009
+#define WM8994_WRITE_SEQUENCER_10               0x300A
+#define WM8994_WRITE_SEQUENCER_11               0x300B
+#define WM8994_WRITE_SEQUENCER_12               0x300C
+#define WM8994_WRITE_SEQUENCER_13               0x300D
+#define WM8994_WRITE_SEQUENCER_14               0x300E
+#define WM8994_WRITE_SEQUENCER_15               0x300F
+#define WM8994_WRITE_SEQUENCER_16               0x3010
+#define WM8994_WRITE_SEQUENCER_17               0x3011
+#define WM8994_WRITE_SEQUENCER_18               0x3012
+#define WM8994_WRITE_SEQUENCER_19               0x3013
+#define WM8994_WRITE_SEQUENCER_20               0x3014
+#define WM8994_WRITE_SEQUENCER_21               0x3015
+#define WM8994_WRITE_SEQUENCER_22               0x3016
+#define WM8994_WRITE_SEQUENCER_23               0x3017
+#define WM8994_WRITE_SEQUENCER_24               0x3018
+#define WM8994_WRITE_SEQUENCER_25               0x3019
+#define WM8994_WRITE_SEQUENCER_26               0x301A
+#define WM8994_WRITE_SEQUENCER_27               0x301B
+#define WM8994_WRITE_SEQUENCER_28               0x301C
+#define WM8994_WRITE_SEQUENCER_29               0x301D
+#define WM8994_WRITE_SEQUENCER_30               0x301E
+#define WM8994_WRITE_SEQUENCER_31               0x301F
+#define WM8994_WRITE_SEQUENCER_32               0x3020
+#define WM8994_WRITE_SEQUENCER_33               0x3021
+#define WM8994_WRITE_SEQUENCER_34               0x3022
+#define WM8994_WRITE_SEQUENCER_35               0x3023
+#define WM8994_WRITE_SEQUENCER_36               0x3024
+#define WM8994_WRITE_SEQUENCER_37               0x3025
+#define WM8994_WRITE_SEQUENCER_38               0x3026
+#define WM8994_WRITE_SEQUENCER_39               0x3027
+#define WM8994_WRITE_SEQUENCER_40               0x3028
+#define WM8994_WRITE_SEQUENCER_41               0x3029
+#define WM8994_WRITE_SEQUENCER_42               0x302A
+#define WM8994_WRITE_SEQUENCER_43               0x302B
+#define WM8994_WRITE_SEQUENCER_44               0x302C
+#define WM8994_WRITE_SEQUENCER_45               0x302D
+#define WM8994_WRITE_SEQUENCER_46               0x302E
+#define WM8994_WRITE_SEQUENCER_47               0x302F
+#define WM8994_WRITE_SEQUENCER_48               0x3030
+#define WM8994_WRITE_SEQUENCER_49               0x3031
+#define WM8994_WRITE_SEQUENCER_50               0x3032
+#define WM8994_WRITE_SEQUENCER_51               0x3033
+#define WM8994_WRITE_SEQUENCER_52               0x3034
+#define WM8994_WRITE_SEQUENCER_53               0x3035
+#define WM8994_WRITE_SEQUENCER_54               0x3036
+#define WM8994_WRITE_SEQUENCER_55               0x3037
+#define WM8994_WRITE_SEQUENCER_56               0x3038
+#define WM8994_WRITE_SEQUENCER_57               0x3039
+#define WM8994_WRITE_SEQUENCER_58               0x303A
+#define WM8994_WRITE_SEQUENCER_59               0x303B
+#define WM8994_WRITE_SEQUENCER_60               0x303C
+#define WM8994_WRITE_SEQUENCER_61               0x303D
+#define WM8994_WRITE_SEQUENCER_62               0x303E
+#define WM8994_WRITE_SEQUENCER_63               0x303F
+#define WM8994_WRITE_SEQUENCER_64               0x3040
+#define WM8994_WRITE_SEQUENCER_65               0x3041
+#define WM8994_WRITE_SEQUENCER_66               0x3042
+#define WM8994_WRITE_SEQUENCER_67               0x3043
+#define WM8994_WRITE_SEQUENCER_68               0x3044
+#define WM8994_WRITE_SEQUENCER_69               0x3045
+#define WM8994_WRITE_SEQUENCER_70               0x3046
+#define WM8994_WRITE_SEQUENCER_71               0x3047
+#define WM8994_WRITE_SEQUENCER_72               0x3048
+#define WM8994_WRITE_SEQUENCER_73               0x3049
+#define WM8994_WRITE_SEQUENCER_74               0x304A
+#define WM8994_WRITE_SEQUENCER_75               0x304B
+#define WM8994_WRITE_SEQUENCER_76               0x304C
+#define WM8994_WRITE_SEQUENCER_77               0x304D
+#define WM8994_WRITE_SEQUENCER_78               0x304E
+#define WM8994_WRITE_SEQUENCER_79               0x304F
+#define WM8994_WRITE_SEQUENCER_80               0x3050
+#define WM8994_WRITE_SEQUENCER_81               0x3051
+#define WM8994_WRITE_SEQUENCER_82               0x3052
+#define WM8994_WRITE_SEQUENCER_83               0x3053
+#define WM8994_WRITE_SEQUENCER_84               0x3054
+#define WM8994_WRITE_SEQUENCER_85               0x3055
+#define WM8994_WRITE_SEQUENCER_86               0x3056
+#define WM8994_WRITE_SEQUENCER_87               0x3057
+#define WM8994_WRITE_SEQUENCER_88               0x3058
+#define WM8994_WRITE_SEQUENCER_89               0x3059
+#define WM8994_WRITE_SEQUENCER_90               0x305A
+#define WM8994_WRITE_SEQUENCER_91               0x305B
+#define WM8994_WRITE_SEQUENCER_92               0x305C
+#define WM8994_WRITE_SEQUENCER_93               0x305D
+#define WM8994_WRITE_SEQUENCER_94               0x305E
+#define WM8994_WRITE_SEQUENCER_95               0x305F
+#define WM8994_WRITE_SEQUENCER_96               0x3060
+#define WM8994_WRITE_SEQUENCER_97               0x3061
+#define WM8994_WRITE_SEQUENCER_98               0x3062
+#define WM8994_WRITE_SEQUENCER_99               0x3063
+#define WM8994_WRITE_SEQUENCER_100              0x3064
+#define WM8994_WRITE_SEQUENCER_101              0x3065
+#define WM8994_WRITE_SEQUENCER_102              0x3066
+#define WM8994_WRITE_SEQUENCER_103              0x3067
+#define WM8994_WRITE_SEQUENCER_104              0x3068
+#define WM8994_WRITE_SEQUENCER_105              0x3069
+#define WM8994_WRITE_SEQUENCER_106              0x306A
+#define WM8994_WRITE_SEQUENCER_107              0x306B
+#define WM8994_WRITE_SEQUENCER_108              0x306C
+#define WM8994_WRITE_SEQUENCER_109              0x306D
+#define WM8994_WRITE_SEQUENCER_110              0x306E
+#define WM8994_WRITE_SEQUENCER_111              0x306F
+#define WM8994_WRITE_SEQUENCER_112              0x3070
+#define WM8994_WRITE_SEQUENCER_113              0x3071
+#define WM8994_WRITE_SEQUENCER_114              0x3072
+#define WM8994_WRITE_SEQUENCER_115              0x3073
+#define WM8994_WRITE_SEQUENCER_116              0x3074
+#define WM8994_WRITE_SEQUENCER_117              0x3075
+#define WM8994_WRITE_SEQUENCER_118              0x3076
+#define WM8994_WRITE_SEQUENCER_119              0x3077
+#define WM8994_WRITE_SEQUENCER_120              0x3078
+#define WM8994_WRITE_SEQUENCER_121              0x3079
+#define WM8994_WRITE_SEQUENCER_122              0x307A
+#define WM8994_WRITE_SEQUENCER_123              0x307B
+#define WM8994_WRITE_SEQUENCER_124              0x307C
+#define WM8994_WRITE_SEQUENCER_125              0x307D
+#define WM8994_WRITE_SEQUENCER_126              0x307E
+#define WM8994_WRITE_SEQUENCER_127              0x307F
+#define WM8994_WRITE_SEQUENCER_128              0x3080
+#define WM8994_WRITE_SEQUENCER_129              0x3081
+#define WM8994_WRITE_SEQUENCER_130              0x3082
+#define WM8994_WRITE_SEQUENCER_131              0x3083
+#define WM8994_WRITE_SEQUENCER_132              0x3084
+#define WM8994_WRITE_SEQUENCER_133              0x3085
+#define WM8994_WRITE_SEQUENCER_134              0x3086
+#define WM8994_WRITE_SEQUENCER_135              0x3087
+#define WM8994_WRITE_SEQUENCER_136              0x3088
+#define WM8994_WRITE_SEQUENCER_137              0x3089
+#define WM8994_WRITE_SEQUENCER_138              0x308A
+#define WM8994_WRITE_SEQUENCER_139              0x308B
+#define WM8994_WRITE_SEQUENCER_140              0x308C
+#define WM8994_WRITE_SEQUENCER_141              0x308D
+#define WM8994_WRITE_SEQUENCER_142              0x308E
+#define WM8994_WRITE_SEQUENCER_143              0x308F
+#define WM8994_WRITE_SEQUENCER_144              0x3090
+#define WM8994_WRITE_SEQUENCER_145              0x3091
+#define WM8994_WRITE_SEQUENCER_146              0x3092
+#define WM8994_WRITE_SEQUENCER_147              0x3093
+#define WM8994_WRITE_SEQUENCER_148              0x3094
+#define WM8994_WRITE_SEQUENCER_149              0x3095
+#define WM8994_WRITE_SEQUENCER_150              0x3096
+#define WM8994_WRITE_SEQUENCER_151              0x3097
+#define WM8994_WRITE_SEQUENCER_152              0x3098
+#define WM8994_WRITE_SEQUENCER_153              0x3099
+#define WM8994_WRITE_SEQUENCER_154              0x309A
+#define WM8994_WRITE_SEQUENCER_155              0x309B
+#define WM8994_WRITE_SEQUENCER_156              0x309C
+#define WM8994_WRITE_SEQUENCER_157              0x309D
+#define WM8994_WRITE_SEQUENCER_158              0x309E
+#define WM8994_WRITE_SEQUENCER_159              0x309F
+#define WM8994_WRITE_SEQUENCER_160              0x30A0
+#define WM8994_WRITE_SEQUENCER_161              0x30A1
+#define WM8994_WRITE_SEQUENCER_162              0x30A2
+#define WM8994_WRITE_SEQUENCER_163              0x30A3
+#define WM8994_WRITE_SEQUENCER_164              0x30A4
+#define WM8994_WRITE_SEQUENCER_165              0x30A5
+#define WM8994_WRITE_SEQUENCER_166              0x30A6
+#define WM8994_WRITE_SEQUENCER_167              0x30A7
+#define WM8994_WRITE_SEQUENCER_168              0x30A8
+#define WM8994_WRITE_SEQUENCER_169              0x30A9
+#define WM8994_WRITE_SEQUENCER_170              0x30AA
+#define WM8994_WRITE_SEQUENCER_171              0x30AB
+#define WM8994_WRITE_SEQUENCER_172              0x30AC
+#define WM8994_WRITE_SEQUENCER_173              0x30AD
+#define WM8994_WRITE_SEQUENCER_174              0x30AE
+#define WM8994_WRITE_SEQUENCER_175              0x30AF
+#define WM8994_WRITE_SEQUENCER_176              0x30B0
+#define WM8994_WRITE_SEQUENCER_177              0x30B1
+#define WM8994_WRITE_SEQUENCER_178              0x30B2
+#define WM8994_WRITE_SEQUENCER_179              0x30B3
+#define WM8994_WRITE_SEQUENCER_180              0x30B4
+#define WM8994_WRITE_SEQUENCER_181              0x30B5
+#define WM8994_WRITE_SEQUENCER_182              0x30B6
+#define WM8994_WRITE_SEQUENCER_183              0x30B7
+#define WM8994_WRITE_SEQUENCER_184              0x30B8
+#define WM8994_WRITE_SEQUENCER_185              0x30B9
+#define WM8994_WRITE_SEQUENCER_186              0x30BA
+#define WM8994_WRITE_SEQUENCER_187              0x30BB
+#define WM8994_WRITE_SEQUENCER_188              0x30BC
+#define WM8994_WRITE_SEQUENCER_189              0x30BD
+#define WM8994_WRITE_SEQUENCER_190              0x30BE
+#define WM8994_WRITE_SEQUENCER_191              0x30BF
+#define WM8994_WRITE_SEQUENCER_192              0x30C0
+#define WM8994_WRITE_SEQUENCER_193              0x30C1
+#define WM8994_WRITE_SEQUENCER_194              0x30C2
+#define WM8994_WRITE_SEQUENCER_195              0x30C3
+#define WM8994_WRITE_SEQUENCER_196              0x30C4
+#define WM8994_WRITE_SEQUENCER_197              0x30C5
+#define WM8994_WRITE_SEQUENCER_198              0x30C6
+#define WM8994_WRITE_SEQUENCER_199              0x30C7
+#define WM8994_WRITE_SEQUENCER_200              0x30C8
+#define WM8994_WRITE_SEQUENCER_201              0x30C9
+#define WM8994_WRITE_SEQUENCER_202              0x30CA
+#define WM8994_WRITE_SEQUENCER_203              0x30CB
+#define WM8994_WRITE_SEQUENCER_204              0x30CC
+#define WM8994_WRITE_SEQUENCER_205              0x30CD
+#define WM8994_WRITE_SEQUENCER_206              0x30CE
+#define WM8994_WRITE_SEQUENCER_207              0x30CF
+#define WM8994_WRITE_SEQUENCER_208              0x30D0
+#define WM8994_WRITE_SEQUENCER_209              0x30D1
+#define WM8994_WRITE_SEQUENCER_210              0x30D2
+#define WM8994_WRITE_SEQUENCER_211              0x30D3
+#define WM8994_WRITE_SEQUENCER_212              0x30D4
+#define WM8994_WRITE_SEQUENCER_213              0x30D5
+#define WM8994_WRITE_SEQUENCER_214              0x30D6
+#define WM8994_WRITE_SEQUENCER_215              0x30D7
+#define WM8994_WRITE_SEQUENCER_216              0x30D8
+#define WM8994_WRITE_SEQUENCER_217              0x30D9
+#define WM8994_WRITE_SEQUENCER_218              0x30DA
+#define WM8994_WRITE_SEQUENCER_219              0x30DB
+#define WM8994_WRITE_SEQUENCER_220              0x30DC
+#define WM8994_WRITE_SEQUENCER_221              0x30DD
+#define WM8994_WRITE_SEQUENCER_222              0x30DE
+#define WM8994_WRITE_SEQUENCER_223              0x30DF
+#define WM8994_WRITE_SEQUENCER_224              0x30E0
+#define WM8994_WRITE_SEQUENCER_225              0x30E1
+#define WM8994_WRITE_SEQUENCER_226              0x30E2
+#define WM8994_WRITE_SEQUENCER_227              0x30E3
+#define WM8994_WRITE_SEQUENCER_228              0x30E4
+#define WM8994_WRITE_SEQUENCER_229              0x30E5
+#define WM8994_WRITE_SEQUENCER_230              0x30E6
+#define WM8994_WRITE_SEQUENCER_231              0x30E7
+#define WM8994_WRITE_SEQUENCER_232              0x30E8
+#define WM8994_WRITE_SEQUENCER_233              0x30E9
+#define WM8994_WRITE_SEQUENCER_234              0x30EA
+#define WM8994_WRITE_SEQUENCER_235              0x30EB
+#define WM8994_WRITE_SEQUENCER_236              0x30EC
+#define WM8994_WRITE_SEQUENCER_237              0x30ED
+#define WM8994_WRITE_SEQUENCER_238              0x30EE
+#define WM8994_WRITE_SEQUENCER_239              0x30EF
+#define WM8994_WRITE_SEQUENCER_240              0x30F0
+#define WM8994_WRITE_SEQUENCER_241              0x30F1
+#define WM8994_WRITE_SEQUENCER_242              0x30F2
+#define WM8994_WRITE_SEQUENCER_243              0x30F3
+#define WM8994_WRITE_SEQUENCER_244              0x30F4
+#define WM8994_WRITE_SEQUENCER_245              0x30F5
+#define WM8994_WRITE_SEQUENCER_246              0x30F6
+#define WM8994_WRITE_SEQUENCER_247              0x30F7
+#define WM8994_WRITE_SEQUENCER_248              0x30F8
+#define WM8994_WRITE_SEQUENCER_249              0x30F9
+#define WM8994_WRITE_SEQUENCER_250              0x30FA
+#define WM8994_WRITE_SEQUENCER_251              0x30FB
+#define WM8994_WRITE_SEQUENCER_252              0x30FC
+#define WM8994_WRITE_SEQUENCER_253              0x30FD
+#define WM8994_WRITE_SEQUENCER_254              0x30FE
+#define WM8994_WRITE_SEQUENCER_255              0x30FF
+#define WM8994_WRITE_SEQUENCER_256              0x3100
+#define WM8994_WRITE_SEQUENCER_257              0x3101
+#define WM8994_WRITE_SEQUENCER_258              0x3102
+#define WM8994_WRITE_SEQUENCER_259              0x3103
+#define WM8994_WRITE_SEQUENCER_260              0x3104
+#define WM8994_WRITE_SEQUENCER_261              0x3105
+#define WM8994_WRITE_SEQUENCER_262              0x3106
+#define WM8994_WRITE_SEQUENCER_263              0x3107
+#define WM8994_WRITE_SEQUENCER_264              0x3108
+#define WM8994_WRITE_SEQUENCER_265              0x3109
+#define WM8994_WRITE_SEQUENCER_266              0x310A
+#define WM8994_WRITE_SEQUENCER_267              0x310B
+#define WM8994_WRITE_SEQUENCER_268              0x310C
+#define WM8994_WRITE_SEQUENCER_269              0x310D
+#define WM8994_WRITE_SEQUENCER_270              0x310E
+#define WM8994_WRITE_SEQUENCER_271              0x310F
+#define WM8994_WRITE_SEQUENCER_272              0x3110
+#define WM8994_WRITE_SEQUENCER_273              0x3111
+#define WM8994_WRITE_SEQUENCER_274              0x3112
+#define WM8994_WRITE_SEQUENCER_275              0x3113
+#define WM8994_WRITE_SEQUENCER_276              0x3114
+#define WM8994_WRITE_SEQUENCER_277              0x3115
+#define WM8994_WRITE_SEQUENCER_278              0x3116
+#define WM8994_WRITE_SEQUENCER_279              0x3117
+#define WM8994_WRITE_SEQUENCER_280              0x3118
+#define WM8994_WRITE_SEQUENCER_281              0x3119
+#define WM8994_WRITE_SEQUENCER_282              0x311A
+#define WM8994_WRITE_SEQUENCER_283              0x311B
+#define WM8994_WRITE_SEQUENCER_284              0x311C
+#define WM8994_WRITE_SEQUENCER_285              0x311D
+#define WM8994_WRITE_SEQUENCER_286              0x311E
+#define WM8994_WRITE_SEQUENCER_287              0x311F
+#define WM8994_WRITE_SEQUENCER_288              0x3120
+#define WM8994_WRITE_SEQUENCER_289              0x3121
+#define WM8994_WRITE_SEQUENCER_290              0x3122
+#define WM8994_WRITE_SEQUENCER_291              0x3123
+#define WM8994_WRITE_SEQUENCER_292              0x3124
+#define WM8994_WRITE_SEQUENCER_293              0x3125
+#define WM8994_WRITE_SEQUENCER_294              0x3126
+#define WM8994_WRITE_SEQUENCER_295              0x3127
+#define WM8994_WRITE_SEQUENCER_296              0x3128
+#define WM8994_WRITE_SEQUENCER_297              0x3129
+#define WM8994_WRITE_SEQUENCER_298              0x312A
+#define WM8994_WRITE_SEQUENCER_299              0x312B
+#define WM8994_WRITE_SEQUENCER_300              0x312C
+#define WM8994_WRITE_SEQUENCER_301              0x312D
+#define WM8994_WRITE_SEQUENCER_302              0x312E
+#define WM8994_WRITE_SEQUENCER_303              0x312F
+#define WM8994_WRITE_SEQUENCER_304              0x3130
+#define WM8994_WRITE_SEQUENCER_305              0x3131
+#define WM8994_WRITE_SEQUENCER_306              0x3132
+#define WM8994_WRITE_SEQUENCER_307              0x3133
+#define WM8994_WRITE_SEQUENCER_308              0x3134
+#define WM8994_WRITE_SEQUENCER_309              0x3135
+#define WM8994_WRITE_SEQUENCER_310              0x3136
+#define WM8994_WRITE_SEQUENCER_311              0x3137
+#define WM8994_WRITE_SEQUENCER_312              0x3138
+#define WM8994_WRITE_SEQUENCER_313              0x3139
+#define WM8994_WRITE_SEQUENCER_314              0x313A
+#define WM8994_WRITE_SEQUENCER_315              0x313B
+#define WM8994_WRITE_SEQUENCER_316              0x313C
+#define WM8994_WRITE_SEQUENCER_317              0x313D
+#define WM8994_WRITE_SEQUENCER_318              0x313E
+#define WM8994_WRITE_SEQUENCER_319              0x313F
+#define WM8994_WRITE_SEQUENCER_320              0x3140
+#define WM8994_WRITE_SEQUENCER_321              0x3141
+#define WM8994_WRITE_SEQUENCER_322              0x3142
+#define WM8994_WRITE_SEQUENCER_323              0x3143
+#define WM8994_WRITE_SEQUENCER_324              0x3144
+#define WM8994_WRITE_SEQUENCER_325              0x3145
+#define WM8994_WRITE_SEQUENCER_326              0x3146
+#define WM8994_WRITE_SEQUENCER_327              0x3147
+#define WM8994_WRITE_SEQUENCER_328              0x3148
+#define WM8994_WRITE_SEQUENCER_329              0x3149
+#define WM8994_WRITE_SEQUENCER_330              0x314A
+#define WM8994_WRITE_SEQUENCER_331              0x314B
+#define WM8994_WRITE_SEQUENCER_332              0x314C
+#define WM8994_WRITE_SEQUENCER_333              0x314D
+#define WM8994_WRITE_SEQUENCER_334              0x314E
+#define WM8994_WRITE_SEQUENCER_335              0x314F
+#define WM8994_WRITE_SEQUENCER_336              0x3150
+#define WM8994_WRITE_SEQUENCER_337              0x3151
+#define WM8994_WRITE_SEQUENCER_338              0x3152
+#define WM8994_WRITE_SEQUENCER_339              0x3153
+#define WM8994_WRITE_SEQUENCER_340              0x3154
+#define WM8994_WRITE_SEQUENCER_341              0x3155
+#define WM8994_WRITE_SEQUENCER_342              0x3156
+#define WM8994_WRITE_SEQUENCER_343              0x3157
+#define WM8994_WRITE_SEQUENCER_344              0x3158
+#define WM8994_WRITE_SEQUENCER_345              0x3159
+#define WM8994_WRITE_SEQUENCER_346              0x315A
+#define WM8994_WRITE_SEQUENCER_347              0x315B
+#define WM8994_WRITE_SEQUENCER_348              0x315C
+#define WM8994_WRITE_SEQUENCER_349              0x315D
+#define WM8994_WRITE_SEQUENCER_350              0x315E
+#define WM8994_WRITE_SEQUENCER_351              0x315F
+#define WM8994_WRITE_SEQUENCER_352              0x3160
+#define WM8994_WRITE_SEQUENCER_353              0x3161
+#define WM8994_WRITE_SEQUENCER_354              0x3162
+#define WM8994_WRITE_SEQUENCER_355              0x3163
+#define WM8994_WRITE_SEQUENCER_356              0x3164
+#define WM8994_WRITE_SEQUENCER_357              0x3165
+#define WM8994_WRITE_SEQUENCER_358              0x3166
+#define WM8994_WRITE_SEQUENCER_359              0x3167
+#define WM8994_WRITE_SEQUENCER_360              0x3168
+#define WM8994_WRITE_SEQUENCER_361              0x3169
+#define WM8994_WRITE_SEQUENCER_362              0x316A
+#define WM8994_WRITE_SEQUENCER_363              0x316B
+#define WM8994_WRITE_SEQUENCER_364              0x316C
+#define WM8994_WRITE_SEQUENCER_365              0x316D
+#define WM8994_WRITE_SEQUENCER_366              0x316E
+#define WM8994_WRITE_SEQUENCER_367              0x316F
+#define WM8994_WRITE_SEQUENCER_368              0x3170
+#define WM8994_WRITE_SEQUENCER_369              0x3171
+#define WM8994_WRITE_SEQUENCER_370              0x3172
+#define WM8994_WRITE_SEQUENCER_371              0x3173
+#define WM8994_WRITE_SEQUENCER_372              0x3174
+#define WM8994_WRITE_SEQUENCER_373              0x3175
+#define WM8994_WRITE_SEQUENCER_374              0x3176
+#define WM8994_WRITE_SEQUENCER_375              0x3177
+#define WM8994_WRITE_SEQUENCER_376              0x3178
+#define WM8994_WRITE_SEQUENCER_377              0x3179
+#define WM8994_WRITE_SEQUENCER_378              0x317A
+#define WM8994_WRITE_SEQUENCER_379              0x317B
+#define WM8994_WRITE_SEQUENCER_380              0x317C
+#define WM8994_WRITE_SEQUENCER_381              0x317D
+#define WM8994_WRITE_SEQUENCER_382              0x317E
+#define WM8994_WRITE_SEQUENCER_383              0x317F
+#define WM8994_WRITE_SEQUENCER_384              0x3180
+#define WM8994_WRITE_SEQUENCER_385              0x3181
+#define WM8994_WRITE_SEQUENCER_386              0x3182
+#define WM8994_WRITE_SEQUENCER_387              0x3183
+#define WM8994_WRITE_SEQUENCER_388              0x3184
+#define WM8994_WRITE_SEQUENCER_389              0x3185
+#define WM8994_WRITE_SEQUENCER_390              0x3186
+#define WM8994_WRITE_SEQUENCER_391              0x3187
+#define WM8994_WRITE_SEQUENCER_392              0x3188
+#define WM8994_WRITE_SEQUENCER_393              0x3189
+#define WM8994_WRITE_SEQUENCER_394              0x318A
+#define WM8994_WRITE_SEQUENCER_395              0x318B
+#define WM8994_WRITE_SEQUENCER_396              0x318C
+#define WM8994_WRITE_SEQUENCER_397              0x318D
+#define WM8994_WRITE_SEQUENCER_398              0x318E
+#define WM8994_WRITE_SEQUENCER_399              0x318F
+#define WM8994_WRITE_SEQUENCER_400              0x3190
+#define WM8994_WRITE_SEQUENCER_401              0x3191
+#define WM8994_WRITE_SEQUENCER_402              0x3192
+#define WM8994_WRITE_SEQUENCER_403              0x3193
+#define WM8994_WRITE_SEQUENCER_404              0x3194
+#define WM8994_WRITE_SEQUENCER_405              0x3195
+#define WM8994_WRITE_SEQUENCER_406              0x3196
+#define WM8994_WRITE_SEQUENCER_407              0x3197
+#define WM8994_WRITE_SEQUENCER_408              0x3198
+#define WM8994_WRITE_SEQUENCER_409              0x3199
+#define WM8994_WRITE_SEQUENCER_410              0x319A
+#define WM8994_WRITE_SEQUENCER_411              0x319B
+#define WM8994_WRITE_SEQUENCER_412              0x319C
+#define WM8994_WRITE_SEQUENCER_413              0x319D
+#define WM8994_WRITE_SEQUENCER_414              0x319E
+#define WM8994_WRITE_SEQUENCER_415              0x319F
+#define WM8994_WRITE_SEQUENCER_416              0x31A0
+#define WM8994_WRITE_SEQUENCER_417              0x31A1
+#define WM8994_WRITE_SEQUENCER_418              0x31A2
+#define WM8994_WRITE_SEQUENCER_419              0x31A3
+#define WM8994_WRITE_SEQUENCER_420              0x31A4
+#define WM8994_WRITE_SEQUENCER_421              0x31A5
+#define WM8994_WRITE_SEQUENCER_422              0x31A6
+#define WM8994_WRITE_SEQUENCER_423              0x31A7
+#define WM8994_WRITE_SEQUENCER_424              0x31A8
+#define WM8994_WRITE_SEQUENCER_425              0x31A9
+#define WM8994_WRITE_SEQUENCER_426              0x31AA
+#define WM8994_WRITE_SEQUENCER_427              0x31AB
+#define WM8994_WRITE_SEQUENCER_428              0x31AC
+#define WM8994_WRITE_SEQUENCER_429              0x31AD
+#define WM8994_WRITE_SEQUENCER_430              0x31AE
+#define WM8994_WRITE_SEQUENCER_431              0x31AF
+#define WM8994_WRITE_SEQUENCER_432              0x31B0
+#define WM8994_WRITE_SEQUENCER_433              0x31B1
+#define WM8994_WRITE_SEQUENCER_434              0x31B2
+#define WM8994_WRITE_SEQUENCER_435              0x31B3
+#define WM8994_WRITE_SEQUENCER_436              0x31B4
+#define WM8994_WRITE_SEQUENCER_437              0x31B5
+#define WM8994_WRITE_SEQUENCER_438              0x31B6
+#define WM8994_WRITE_SEQUENCER_439              0x31B7
+#define WM8994_WRITE_SEQUENCER_440              0x31B8
+#define WM8994_WRITE_SEQUENCER_441              0x31B9
+#define WM8994_WRITE_SEQUENCER_442              0x31BA
+#define WM8994_WRITE_SEQUENCER_443              0x31BB
+#define WM8994_WRITE_SEQUENCER_444              0x31BC
+#define WM8994_WRITE_SEQUENCER_445              0x31BD
+#define WM8994_WRITE_SEQUENCER_446              0x31BE
+#define WM8994_WRITE_SEQUENCER_447              0x31BF
+#define WM8994_WRITE_SEQUENCER_448              0x31C0
+#define WM8994_WRITE_SEQUENCER_449              0x31C1
+#define WM8994_WRITE_SEQUENCER_450              0x31C2
+#define WM8994_WRITE_SEQUENCER_451              0x31C3
+#define WM8994_WRITE_SEQUENCER_452              0x31C4
+#define WM8994_WRITE_SEQUENCER_453              0x31C5
+#define WM8994_WRITE_SEQUENCER_454              0x31C6
+#define WM8994_WRITE_SEQUENCER_455              0x31C7
+#define WM8994_WRITE_SEQUENCER_456              0x31C8
+#define WM8994_WRITE_SEQUENCER_457              0x31C9
+#define WM8994_WRITE_SEQUENCER_458              0x31CA
+#define WM8994_WRITE_SEQUENCER_459              0x31CB
+#define WM8994_WRITE_SEQUENCER_460              0x31CC
+#define WM8994_WRITE_SEQUENCER_461              0x31CD
+#define WM8994_WRITE_SEQUENCER_462              0x31CE
+#define WM8994_WRITE_SEQUENCER_463              0x31CF
+#define WM8994_WRITE_SEQUENCER_464              0x31D0
+#define WM8994_WRITE_SEQUENCER_465              0x31D1
+#define WM8994_WRITE_SEQUENCER_466              0x31D2
+#define WM8994_WRITE_SEQUENCER_467              0x31D3
+#define WM8994_WRITE_SEQUENCER_468              0x31D4
+#define WM8994_WRITE_SEQUENCER_469              0x31D5
+#define WM8994_WRITE_SEQUENCER_470              0x31D6
+#define WM8994_WRITE_SEQUENCER_471              0x31D7
+#define WM8994_WRITE_SEQUENCER_472              0x31D8
+#define WM8994_WRITE_SEQUENCER_473              0x31D9
+#define WM8994_WRITE_SEQUENCER_474              0x31DA
+#define WM8994_WRITE_SEQUENCER_475              0x31DB
+#define WM8994_WRITE_SEQUENCER_476              0x31DC
+#define WM8994_WRITE_SEQUENCER_477              0x31DD
+#define WM8994_WRITE_SEQUENCER_478              0x31DE
+#define WM8994_WRITE_SEQUENCER_479              0x31DF
+#define WM8994_WRITE_SEQUENCER_480              0x31E0
+#define WM8994_WRITE_SEQUENCER_481              0x31E1
+#define WM8994_WRITE_SEQUENCER_482              0x31E2
+#define WM8994_WRITE_SEQUENCER_483              0x31E3
+#define WM8994_WRITE_SEQUENCER_484              0x31E4
+#define WM8994_WRITE_SEQUENCER_485              0x31E5
+#define WM8994_WRITE_SEQUENCER_486              0x31E6
+#define WM8994_WRITE_SEQUENCER_487              0x31E7
+#define WM8994_WRITE_SEQUENCER_488              0x31E8
+#define WM8994_WRITE_SEQUENCER_489              0x31E9
+#define WM8994_WRITE_SEQUENCER_490              0x31EA
+#define WM8994_WRITE_SEQUENCER_491              0x31EB
+#define WM8994_WRITE_SEQUENCER_492              0x31EC
+#define WM8994_WRITE_SEQUENCER_493              0x31ED
+#define WM8994_WRITE_SEQUENCER_494              0x31EE
+#define WM8994_WRITE_SEQUENCER_495              0x31EF
+#define WM8994_WRITE_SEQUENCER_496              0x31F0
+#define WM8994_WRITE_SEQUENCER_497              0x31F1
+#define WM8994_WRITE_SEQUENCER_498              0x31F2
+#define WM8994_WRITE_SEQUENCER_499              0x31F3
+#define WM8994_WRITE_SEQUENCER_500              0x31F4
+#define WM8994_WRITE_SEQUENCER_501              0x31F5
+#define WM8994_WRITE_SEQUENCER_502              0x31F6
+#define WM8994_WRITE_SEQUENCER_503              0x31F7
+#define WM8994_WRITE_SEQUENCER_504              0x31F8
+#define WM8994_WRITE_SEQUENCER_505              0x31F9
+#define WM8994_WRITE_SEQUENCER_506              0x31FA
+#define WM8994_WRITE_SEQUENCER_507              0x31FB
+#define WM8994_WRITE_SEQUENCER_508              0x31FC
+#define WM8994_WRITE_SEQUENCER_509              0x31FD
+#define WM8994_WRITE_SEQUENCER_510              0x31FE
+#define WM8994_WRITE_SEQUENCER_511              0x31FF
+
+#define WM8994_REGISTER_COUNT                   736
+#define WM8994_MAX_REGISTER                     0x31FF
+#define WM8994_MAX_CACHED_REGISTER              0x749
+
+/*
+ * Field Definitions.
+ */
+
+/*
+ * R0 (0x00) - Software Reset
+ */
+#define WM8994_SW_RESET_MASK                    0xFFFF  /* SW_RESET - [15:0] */
+#define WM8994_SW_RESET_SHIFT                        0  /* SW_RESET - [15:0] */
+#define WM8994_SW_RESET_WIDTH                       16  /* SW_RESET - [15:0] */
+
+/*
+ * R1 (0x01) - Power Management (1)
+ */
+#define WM8994_SPKOUTR_ENA                      0x2000  /* SPKOUTR_ENA */
+#define WM8994_SPKOUTR_ENA_MASK                 0x2000  /* SPKOUTR_ENA */
+#define WM8994_SPKOUTR_ENA_SHIFT                    13  /* SPKOUTR_ENA */
+#define WM8994_SPKOUTR_ENA_WIDTH                     1  /* SPKOUTR_ENA */
+#define WM8994_SPKOUTL_ENA                      0x1000  /* SPKOUTL_ENA */
+#define WM8994_SPKOUTL_ENA_MASK                 0x1000  /* SPKOUTL_ENA */
+#define WM8994_SPKOUTL_ENA_SHIFT                    12  /* SPKOUTL_ENA */
+#define WM8994_SPKOUTL_ENA_WIDTH                     1  /* SPKOUTL_ENA */
+#define WM8994_HPOUT2_ENA                       0x0800  /* HPOUT2_ENA */
+#define WM8994_HPOUT2_ENA_MASK                  0x0800  /* HPOUT2_ENA */
+#define WM8994_HPOUT2_ENA_SHIFT                     11  /* HPOUT2_ENA */
+#define WM8994_HPOUT2_ENA_WIDTH                      1  /* HPOUT2_ENA */
+#define WM8994_HPOUT1L_ENA                      0x0200  /* HPOUT1L_ENA */
+#define WM8994_HPOUT1L_ENA_MASK                 0x0200  /* HPOUT1L_ENA */
+#define WM8994_HPOUT1L_ENA_SHIFT                     9  /* HPOUT1L_ENA */
+#define WM8994_HPOUT1L_ENA_WIDTH                     1  /* HPOUT1L_ENA */
+#define WM8994_HPOUT1R_ENA                      0x0100  /* HPOUT1R_ENA */
+#define WM8994_HPOUT1R_ENA_MASK                 0x0100  /* HPOUT1R_ENA */
+#define WM8994_HPOUT1R_ENA_SHIFT                     8  /* HPOUT1R_ENA */
+#define WM8994_HPOUT1R_ENA_WIDTH                     1  /* HPOUT1R_ENA */
+#define WM8994_MICB2_ENA                        0x0020  /* MICB2_ENA */
+#define WM8994_MICB2_ENA_MASK                   0x0020  /* MICB2_ENA */
+#define WM8994_MICB2_ENA_SHIFT                       5  /* MICB2_ENA */
+#define WM8994_MICB2_ENA_WIDTH                       1  /* MICB2_ENA */
+#define WM8994_MICB1_ENA                        0x0010  /* MICB1_ENA */
+#define WM8994_MICB1_ENA_MASK                   0x0010  /* MICB1_ENA */
+#define WM8994_MICB1_ENA_SHIFT                       4  /* MICB1_ENA */
+#define WM8994_MICB1_ENA_WIDTH                       1  /* MICB1_ENA */
+#define WM8994_VMID_SEL_MASK                    0x0006  /* VMID_SEL - [2:1] */
+#define WM8994_VMID_SEL_SHIFT                        1  /* VMID_SEL - [2:1] */
+#define WM8994_VMID_SEL_WIDTH                        2  /* VMID_SEL - [2:1] */
+#define WM8994_BIAS_ENA                         0x0001  /* BIAS_ENA */
+#define WM8994_BIAS_ENA_MASK                    0x0001  /* BIAS_ENA */
+#define WM8994_BIAS_ENA_SHIFT                        0  /* BIAS_ENA */
+#define WM8994_BIAS_ENA_WIDTH                        1  /* BIAS_ENA */
+
+/*
+ * R2 (0x02) - Power Management (2)
+ */
+#define WM8994_TSHUT_ENA                        0x4000  /* TSHUT_ENA */
+#define WM8994_TSHUT_ENA_MASK                   0x4000  /* TSHUT_ENA */
+#define WM8994_TSHUT_ENA_SHIFT                      14  /* TSHUT_ENA */
+#define WM8994_TSHUT_ENA_WIDTH                       1  /* TSHUT_ENA */
+#define WM8994_TSHUT_OPDIS                      0x2000  /* TSHUT_OPDIS */
+#define WM8994_TSHUT_OPDIS_MASK                 0x2000  /* TSHUT_OPDIS */
+#define WM8994_TSHUT_OPDIS_SHIFT                    13  /* TSHUT_OPDIS */
+#define WM8994_TSHUT_OPDIS_WIDTH                     1  /* TSHUT_OPDIS */
+#define WM8994_OPCLK_ENA                        0x0800  /* OPCLK_ENA */
+#define WM8994_OPCLK_ENA_MASK                   0x0800  /* OPCLK_ENA */
+#define WM8994_OPCLK_ENA_SHIFT                      11  /* OPCLK_ENA */
+#define WM8994_OPCLK_ENA_WIDTH                       1  /* OPCLK_ENA */
+#define WM8994_MIXINL_ENA                       0x0200  /* MIXINL_ENA */
+#define WM8994_MIXINL_ENA_MASK                  0x0200  /* MIXINL_ENA */
+#define WM8994_MIXINL_ENA_SHIFT                      9  /* MIXINL_ENA */
+#define WM8994_MIXINL_ENA_WIDTH                      1  /* MIXINL_ENA */
+#define WM8994_MIXINR_ENA                       0x0100  /* MIXINR_ENA */
+#define WM8994_MIXINR_ENA_MASK                  0x0100  /* MIXINR_ENA */
+#define WM8994_MIXINR_ENA_SHIFT                      8  /* MIXINR_ENA */
+#define WM8994_MIXINR_ENA_WIDTH                      1  /* MIXINR_ENA */
+#define WM8994_IN2L_ENA                         0x0080  /* IN2L_ENA */
+#define WM8994_IN2L_ENA_MASK                    0x0080  /* IN2L_ENA */
+#define WM8994_IN2L_ENA_SHIFT                        7  /* IN2L_ENA */
+#define WM8994_IN2L_ENA_WIDTH                        1  /* IN2L_ENA */
+#define WM8994_IN1L_ENA                         0x0040  /* IN1L_ENA */
+#define WM8994_IN1L_ENA_MASK                    0x0040  /* IN1L_ENA */
+#define WM8994_IN1L_ENA_SHIFT                        6  /* IN1L_ENA */
+#define WM8994_IN1L_ENA_WIDTH                        1  /* IN1L_ENA */
+#define WM8994_IN2R_ENA                         0x0020  /* IN2R_ENA */
+#define WM8994_IN2R_ENA_MASK                    0x0020  /* IN2R_ENA */
+#define WM8994_IN2R_ENA_SHIFT                        5  /* IN2R_ENA */
+#define WM8994_IN2R_ENA_WIDTH                        1  /* IN2R_ENA */
+#define WM8994_IN1R_ENA                         0x0010  /* IN1R_ENA */
+#define WM8994_IN1R_ENA_MASK                    0x0010  /* IN1R_ENA */
+#define WM8994_IN1R_ENA_SHIFT                        4  /* IN1R_ENA */
+#define WM8994_IN1R_ENA_WIDTH                        1  /* IN1R_ENA */
+
+/*
+ * R3 (0x03) - Power Management (3)
+ */
+#define WM8994_LINEOUT1N_ENA                    0x2000  /* LINEOUT1N_ENA */
+#define WM8994_LINEOUT1N_ENA_MASK               0x2000  /* LINEOUT1N_ENA */
+#define WM8994_LINEOUT1N_ENA_SHIFT                  13  /* LINEOUT1N_ENA */
+#define WM8994_LINEOUT1N_ENA_WIDTH                   1  /* LINEOUT1N_ENA */
+#define WM8994_LINEOUT1P_ENA                    0x1000  /* LINEOUT1P_ENA */
+#define WM8994_LINEOUT1P_ENA_MASK               0x1000  /* LINEOUT1P_ENA */
+#define WM8994_LINEOUT1P_ENA_SHIFT                  12  /* LINEOUT1P_ENA */
+#define WM8994_LINEOUT1P_ENA_WIDTH                   1  /* LINEOUT1P_ENA */
+#define WM8994_LINEOUT2N_ENA                    0x0800  /* LINEOUT2N_ENA */
+#define WM8994_LINEOUT2N_ENA_MASK               0x0800  /* LINEOUT2N_ENA */
+#define WM8994_LINEOUT2N_ENA_SHIFT                  11  /* LINEOUT2N_ENA */
+#define WM8994_LINEOUT2N_ENA_WIDTH                   1  /* LINEOUT2N_ENA */
+#define WM8994_LINEOUT2P_ENA                    0x0400  /* LINEOUT2P_ENA */
+#define WM8994_LINEOUT2P_ENA_MASK               0x0400  /* LINEOUT2P_ENA */
+#define WM8994_LINEOUT2P_ENA_SHIFT                  10  /* LINEOUT2P_ENA */
+#define WM8994_LINEOUT2P_ENA_WIDTH                   1  /* LINEOUT2P_ENA */
+#define WM8994_SPKRVOL_ENA                      0x0200  /* SPKRVOL_ENA */
+#define WM8994_SPKRVOL_ENA_MASK                 0x0200  /* SPKRVOL_ENA */
+#define WM8994_SPKRVOL_ENA_SHIFT                     9  /* SPKRVOL_ENA */
+#define WM8994_SPKRVOL_ENA_WIDTH                     1  /* SPKRVOL_ENA */
+#define WM8994_SPKLVOL_ENA                      0x0100  /* SPKLVOL_ENA */
+#define WM8994_SPKLVOL_ENA_MASK                 0x0100  /* SPKLVOL_ENA */
+#define WM8994_SPKLVOL_ENA_SHIFT                     8  /* SPKLVOL_ENA */
+#define WM8994_SPKLVOL_ENA_WIDTH                     1  /* SPKLVOL_ENA */
+#define WM8994_MIXOUTLVOL_ENA                   0x0080  /* MIXOUTLVOL_ENA */
+#define WM8994_MIXOUTLVOL_ENA_MASK              0x0080  /* MIXOUTLVOL_ENA */
+#define WM8994_MIXOUTLVOL_ENA_SHIFT                  7  /* MIXOUTLVOL_ENA */
+#define WM8994_MIXOUTLVOL_ENA_WIDTH                  1  /* MIXOUTLVOL_ENA */
+#define WM8994_MIXOUTRVOL_ENA                   0x0040  /* MIXOUTRVOL_ENA */
+#define WM8994_MIXOUTRVOL_ENA_MASK              0x0040  /* MIXOUTRVOL_ENA */
+#define WM8994_MIXOUTRVOL_ENA_SHIFT                  6  /* MIXOUTRVOL_ENA */
+#define WM8994_MIXOUTRVOL_ENA_WIDTH                  1  /* MIXOUTRVOL_ENA */
+#define WM8994_MIXOUTL_ENA                      0x0020  /* MIXOUTL_ENA */
+#define WM8994_MIXOUTL_ENA_MASK                 0x0020  /* MIXOUTL_ENA */
+#define WM8994_MIXOUTL_ENA_SHIFT                     5  /* MIXOUTL_ENA */
+#define WM8994_MIXOUTL_ENA_WIDTH                     1  /* MIXOUTL_ENA */
+#define WM8994_MIXOUTR_ENA                      0x0010  /* MIXOUTR_ENA */
+#define WM8994_MIXOUTR_ENA_MASK                 0x0010  /* MIXOUTR_ENA */
+#define WM8994_MIXOUTR_ENA_SHIFT                     4  /* MIXOUTR_ENA */
+#define WM8994_MIXOUTR_ENA_WIDTH                     1  /* MIXOUTR_ENA */
+
+/*
+ * R4 (0x04) - Power Management (4)
+ */
+#define WM8994_AIF2ADCL_ENA                     0x2000  /* AIF2ADCL_ENA */
+#define WM8994_AIF2ADCL_ENA_MASK                0x2000  /* AIF2ADCL_ENA */
+#define WM8994_AIF2ADCL_ENA_SHIFT                   13  /* AIF2ADCL_ENA */
+#define WM8994_AIF2ADCL_ENA_WIDTH                    1  /* AIF2ADCL_ENA */
+#define WM8994_AIF2ADCR_ENA                     0x1000  /* AIF2ADCR_ENA */
+#define WM8994_AIF2ADCR_ENA_MASK                0x1000  /* AIF2ADCR_ENA */
+#define WM8994_AIF2ADCR_ENA_SHIFT                   12  /* AIF2ADCR_ENA */
+#define WM8994_AIF2ADCR_ENA_WIDTH                    1  /* AIF2ADCR_ENA */
+#define WM8994_AIF1ADC2L_ENA                    0x0800  /* AIF1ADC2L_ENA */
+#define WM8994_AIF1ADC2L_ENA_MASK               0x0800  /* AIF1ADC2L_ENA */
+#define WM8994_AIF1ADC2L_ENA_SHIFT                  11  /* AIF1ADC2L_ENA */
+#define WM8994_AIF1ADC2L_ENA_WIDTH                   1  /* AIF1ADC2L_ENA */
+#define WM8994_AIF1ADC2R_ENA                    0x0400  /* AIF1ADC2R_ENA */
+#define WM8994_AIF1ADC2R_ENA_MASK               0x0400  /* AIF1ADC2R_ENA */
+#define WM8994_AIF1ADC2R_ENA_SHIFT                  10  /* AIF1ADC2R_ENA */
+#define WM8994_AIF1ADC2R_ENA_WIDTH                   1  /* AIF1ADC2R_ENA */
+#define WM8994_AIF1ADC1L_ENA                    0x0200  /* AIF1ADC1L_ENA */
+#define WM8994_AIF1ADC1L_ENA_MASK               0x0200  /* AIF1ADC1L_ENA */
+#define WM8994_AIF1ADC1L_ENA_SHIFT                   9  /* AIF1ADC1L_ENA */
+#define WM8994_AIF1ADC1L_ENA_WIDTH                   1  /* AIF1ADC1L_ENA */
+#define WM8994_AIF1ADC1R_ENA                    0x0100  /* AIF1ADC1R_ENA */
+#define WM8994_AIF1ADC1R_ENA_MASK               0x0100  /* AIF1ADC1R_ENA */
+#define WM8994_AIF1ADC1R_ENA_SHIFT                   8  /* AIF1ADC1R_ENA */
+#define WM8994_AIF1ADC1R_ENA_WIDTH                   1  /* AIF1ADC1R_ENA */
+#define WM8994_DMIC2L_ENA                       0x0020  /* DMIC2L_ENA */
+#define WM8994_DMIC2L_ENA_MASK                  0x0020  /* DMIC2L_ENA */
+#define WM8994_DMIC2L_ENA_SHIFT                      5  /* DMIC2L_ENA */
+#define WM8994_DMIC2L_ENA_WIDTH                      1  /* DMIC2L_ENA */
+#define WM8994_DMIC2R_ENA                       0x0010  /* DMIC2R_ENA */
+#define WM8994_DMIC2R_ENA_MASK                  0x0010  /* DMIC2R_ENA */
+#define WM8994_DMIC2R_ENA_SHIFT                      4  /* DMIC2R_ENA */
+#define WM8994_DMIC2R_ENA_WIDTH                      1  /* DMIC2R_ENA */
+#define WM8994_DMIC1L_ENA                       0x0008  /* DMIC1L_ENA */
+#define WM8994_DMIC1L_ENA_MASK                  0x0008  /* DMIC1L_ENA */
+#define WM8994_DMIC1L_ENA_SHIFT                      3  /* DMIC1L_ENA */
+#define WM8994_DMIC1L_ENA_WIDTH                      1  /* DMIC1L_ENA */
+#define WM8994_DMIC1R_ENA                       0x0004  /* DMIC1R_ENA */
+#define WM8994_DMIC1R_ENA_MASK                  0x0004  /* DMIC1R_ENA */
+#define WM8994_DMIC1R_ENA_SHIFT                      2  /* DMIC1R_ENA */
+#define WM8994_DMIC1R_ENA_WIDTH                      1  /* DMIC1R_ENA */
+#define WM8994_ADCL_ENA                         0x0002  /* ADCL_ENA */
+#define WM8994_ADCL_ENA_MASK                    0x0002  /* ADCL_ENA */
+#define WM8994_ADCL_ENA_SHIFT                        1  /* ADCL_ENA */
+#define WM8994_ADCL_ENA_WIDTH                        1  /* ADCL_ENA */
+#define WM8994_ADCR_ENA                         0x0001  /* ADCR_ENA */
+#define WM8994_ADCR_ENA_MASK                    0x0001  /* ADCR_ENA */
+#define WM8994_ADCR_ENA_SHIFT                        0  /* ADCR_ENA */
+#define WM8994_ADCR_ENA_WIDTH                        1  /* ADCR_ENA */
+
+/*
+ * R5 (0x05) - Power Management (5)
+ */
+#define WM8994_AIF2DACL_ENA                     0x2000  /* AIF2DACL_ENA */
+#define WM8994_AIF2DACL_ENA_MASK                0x2000  /* AIF2DACL_ENA */
+#define WM8994_AIF2DACL_ENA_SHIFT                   13  /* AIF2DACL_ENA */
+#define WM8994_AIF2DACL_ENA_WIDTH                    1  /* AIF2DACL_ENA */
+#define WM8994_AIF2DACR_ENA                     0x1000  /* AIF2DACR_ENA */
+#define WM8994_AIF2DACR_ENA_MASK                0x1000  /* AIF2DACR_ENA */
+#define WM8994_AIF2DACR_ENA_SHIFT                   12  /* AIF2DACR_ENA */
+#define WM8994_AIF2DACR_ENA_WIDTH                    1  /* AIF2DACR_ENA */
+#define WM8994_AIF1DAC2L_ENA                    0x0800  /* AIF1DAC2L_ENA */
+#define WM8994_AIF1DAC2L_ENA_MASK               0x0800  /* AIF1DAC2L_ENA */
+#define WM8994_AIF1DAC2L_ENA_SHIFT                  11  /* AIF1DAC2L_ENA */
+#define WM8994_AIF1DAC2L_ENA_WIDTH                   1  /* AIF1DAC2L_ENA */
+#define WM8994_AIF1DAC2R_ENA                    0x0400  /* AIF1DAC2R_ENA */
+#define WM8994_AIF1DAC2R_ENA_MASK               0x0400  /* AIF1DAC2R_ENA */
+#define WM8994_AIF1DAC2R_ENA_SHIFT                  10  /* AIF1DAC2R_ENA */
+#define WM8994_AIF1DAC2R_ENA_WIDTH                   1  /* AIF1DAC2R_ENA */
+#define WM8994_AIF1DAC1L_ENA                    0x0200  /* AIF1DAC1L_ENA */
+#define WM8994_AIF1DAC1L_ENA_MASK               0x0200  /* AIF1DAC1L_ENA */
+#define WM8994_AIF1DAC1L_ENA_SHIFT                   9  /* AIF1DAC1L_ENA */
+#define WM8994_AIF1DAC1L_ENA_WIDTH                   1  /* AIF1DAC1L_ENA */
+#define WM8994_AIF1DAC1R_ENA                    0x0100  /* AIF1DAC1R_ENA */
+#define WM8994_AIF1DAC1R_ENA_MASK               0x0100  /* AIF1DAC1R_ENA */
+#define WM8994_AIF1DAC1R_ENA_SHIFT                   8  /* AIF1DAC1R_ENA */
+#define WM8994_AIF1DAC1R_ENA_WIDTH                   1  /* AIF1DAC1R_ENA */
+#define WM8994_DAC2L_ENA                        0x0008  /* DAC2L_ENA */
+#define WM8994_DAC2L_ENA_MASK                   0x0008  /* DAC2L_ENA */
+#define WM8994_DAC2L_ENA_SHIFT                       3  /* DAC2L_ENA */
+#define WM8994_DAC2L_ENA_WIDTH                       1  /* DAC2L_ENA */
+#define WM8994_DAC2R_ENA                        0x0004  /* DAC2R_ENA */
+#define WM8994_DAC2R_ENA_MASK                   0x0004  /* DAC2R_ENA */
+#define WM8994_DAC2R_ENA_SHIFT                       2  /* DAC2R_ENA */
+#define WM8994_DAC2R_ENA_WIDTH                       1  /* DAC2R_ENA */
+#define WM8994_DAC1L_ENA                        0x0002  /* DAC1L_ENA */
+#define WM8994_DAC1L_ENA_MASK                   0x0002  /* DAC1L_ENA */
+#define WM8994_DAC1L_ENA_SHIFT                       1  /* DAC1L_ENA */
+#define WM8994_DAC1L_ENA_WIDTH                       1  /* DAC1L_ENA */
+#define WM8994_DAC1R_ENA                        0x0001  /* DAC1R_ENA */
+#define WM8994_DAC1R_ENA_MASK                   0x0001  /* DAC1R_ENA */
+#define WM8994_DAC1R_ENA_SHIFT                       0  /* DAC1R_ENA */
+#define WM8994_DAC1R_ENA_WIDTH                       1  /* DAC1R_ENA */
+
+/*
+ * R6 (0x06) - Power Management (6)
+ */
+#define WM8994_AIF3_TRI                         0x0020  /* AIF3_TRI */
+#define WM8994_AIF3_TRI_MASK                    0x0020  /* AIF3_TRI */
+#define WM8994_AIF3_TRI_SHIFT                        5  /* AIF3_TRI */
+#define WM8994_AIF3_TRI_WIDTH                        1  /* AIF3_TRI */
+#define WM8994_AIF3_ADCDAT_SRC_MASK             0x0018  /* AIF3_ADCDAT_SRC - [4:3] */
+#define WM8994_AIF3_ADCDAT_SRC_SHIFT                 3  /* AIF3_ADCDAT_SRC - [4:3] */
+#define WM8994_AIF3_ADCDAT_SRC_WIDTH                 2  /* AIF3_ADCDAT_SRC - [4:3] */
+#define WM8994_AIF2_ADCDAT_SRC                  0x0004  /* AIF2_ADCDAT_SRC */
+#define WM8994_AIF2_ADCDAT_SRC_MASK             0x0004  /* AIF2_ADCDAT_SRC */
+#define WM8994_AIF2_ADCDAT_SRC_SHIFT                 2  /* AIF2_ADCDAT_SRC */
+#define WM8994_AIF2_ADCDAT_SRC_WIDTH                 1  /* AIF2_ADCDAT_SRC */
+#define WM8994_AIF2_DACDAT_SRC                  0x0002  /* AIF2_DACDAT_SRC */
+#define WM8994_AIF2_DACDAT_SRC_MASK             0x0002  /* AIF2_DACDAT_SRC */
+#define WM8994_AIF2_DACDAT_SRC_SHIFT                 1  /* AIF2_DACDAT_SRC */
+#define WM8994_AIF2_DACDAT_SRC_WIDTH                 1  /* AIF2_DACDAT_SRC */
+#define WM8994_AIF1_DACDAT_SRC                  0x0001  /* AIF1_DACDAT_SRC */
+#define WM8994_AIF1_DACDAT_SRC_MASK             0x0001  /* AIF1_DACDAT_SRC */
+#define WM8994_AIF1_DACDAT_SRC_SHIFT                 0  /* AIF1_DACDAT_SRC */
+#define WM8994_AIF1_DACDAT_SRC_WIDTH                 1  /* AIF1_DACDAT_SRC */
+
+/*
+ * R21 (0x15) - Input Mixer (1)
+ */
+#define WM8994_IN1RP_MIXINR_BOOST               0x0100  /* IN1RP_MIXINR_BOOST */
+#define WM8994_IN1RP_MIXINR_BOOST_MASK          0x0100  /* IN1RP_MIXINR_BOOST */
+#define WM8994_IN1RP_MIXINR_BOOST_SHIFT              8  /* IN1RP_MIXINR_BOOST */
+#define WM8994_IN1RP_MIXINR_BOOST_WIDTH              1  /* IN1RP_MIXINR_BOOST */
+#define WM8994_IN1LP_MIXINL_BOOST               0x0080  /* IN1LP_MIXINL_BOOST */
+#define WM8994_IN1LP_MIXINL_BOOST_MASK          0x0080  /* IN1LP_MIXINL_BOOST */
+#define WM8994_IN1LP_MIXINL_BOOST_SHIFT              7  /* IN1LP_MIXINL_BOOST */
+#define WM8994_IN1LP_MIXINL_BOOST_WIDTH              1  /* IN1LP_MIXINL_BOOST */
+#define WM8994_INPUTS_CLAMP                     0x0040  /* INPUTS_CLAMP */
+#define WM8994_INPUTS_CLAMP_MASK                0x0040  /* INPUTS_CLAMP */
+#define WM8994_INPUTS_CLAMP_SHIFT                    6  /* INPUTS_CLAMP */
+#define WM8994_INPUTS_CLAMP_WIDTH                    1  /* INPUTS_CLAMP */
+
+/*
+ * R24 (0x18) - Left Line Input 1&2 Volume
+ */
+#define WM8994_IN1_VU                           0x0100  /* IN1_VU */
+#define WM8994_IN1_VU_MASK                      0x0100  /* IN1_VU */
+#define WM8994_IN1_VU_SHIFT                          8  /* IN1_VU */
+#define WM8994_IN1_VU_WIDTH                          1  /* IN1_VU */
+#define WM8994_IN1L_MUTE                        0x0080  /* IN1L_MUTE */
+#define WM8994_IN1L_MUTE_MASK                   0x0080  /* IN1L_MUTE */
+#define WM8994_IN1L_MUTE_SHIFT                       7  /* IN1L_MUTE */
+#define WM8994_IN1L_MUTE_WIDTH                       1  /* IN1L_MUTE */
+#define WM8994_IN1L_ZC                          0x0040  /* IN1L_ZC */
+#define WM8994_IN1L_ZC_MASK                     0x0040  /* IN1L_ZC */
+#define WM8994_IN1L_ZC_SHIFT                         6  /* IN1L_ZC */
+#define WM8994_IN1L_ZC_WIDTH                         1  /* IN1L_ZC */
+#define WM8994_IN1L_VOL_MASK                    0x001F  /* IN1L_VOL - [4:0] */
+#define WM8994_IN1L_VOL_SHIFT                        0  /* IN1L_VOL - [4:0] */
+#define WM8994_IN1L_VOL_WIDTH                        5  /* IN1L_VOL - [4:0] */
+
+/*
+ * R25 (0x19) - Left Line Input 3&4 Volume
+ */
+#define WM8994_IN2_VU                           0x0100  /* IN2_VU */
+#define WM8994_IN2_VU_MASK                      0x0100  /* IN2_VU */
+#define WM8994_IN2_VU_SHIFT                          8  /* IN2_VU */
+#define WM8994_IN2_VU_WIDTH                          1  /* IN2_VU */
+#define WM8994_IN2L_MUTE                        0x0080  /* IN2L_MUTE */
+#define WM8994_IN2L_MUTE_MASK                   0x0080  /* IN2L_MUTE */
+#define WM8994_IN2L_MUTE_SHIFT                       7  /* IN2L_MUTE */
+#define WM8994_IN2L_MUTE_WIDTH                       1  /* IN2L_MUTE */
+#define WM8994_IN2L_ZC                          0x0040  /* IN2L_ZC */
+#define WM8994_IN2L_ZC_MASK                     0x0040  /* IN2L_ZC */
+#define WM8994_IN2L_ZC_SHIFT                         6  /* IN2L_ZC */
+#define WM8994_IN2L_ZC_WIDTH                         1  /* IN2L_ZC */
+#define WM8994_IN2L_VOL_MASK                    0x001F  /* IN2L_VOL - [4:0] */
+#define WM8994_IN2L_VOL_SHIFT                        0  /* IN2L_VOL - [4:0] */
+#define WM8994_IN2L_VOL_WIDTH                        5  /* IN2L_VOL - [4:0] */
+
+/*
+ * R26 (0x1A) - Right Line Input 1&2 Volume
+ */
+#define WM8994_IN1_VU                           0x0100  /* IN1_VU */
+#define WM8994_IN1_VU_MASK                      0x0100  /* IN1_VU */
+#define WM8994_IN1_VU_SHIFT                          8  /* IN1_VU */
+#define WM8994_IN1_VU_WIDTH                          1  /* IN1_VU */
+#define WM8994_IN1R_MUTE                        0x0080  /* IN1R_MUTE */
+#define WM8994_IN1R_MUTE_MASK                   0x0080  /* IN1R_MUTE */
+#define WM8994_IN1R_MUTE_SHIFT                       7  /* IN1R_MUTE */
+#define WM8994_IN1R_MUTE_WIDTH                       1  /* IN1R_MUTE */
+#define WM8994_IN1R_ZC                          0x0040  /* IN1R_ZC */
+#define WM8994_IN1R_ZC_MASK                     0x0040  /* IN1R_ZC */
+#define WM8994_IN1R_ZC_SHIFT                         6  /* IN1R_ZC */
+#define WM8994_IN1R_ZC_WIDTH                         1  /* IN1R_ZC */
+#define WM8994_IN1R_VOL_MASK                    0x001F  /* IN1R_VOL - [4:0] */
+#define WM8994_IN1R_VOL_SHIFT                        0  /* IN1R_VOL - [4:0] */
+#define WM8994_IN1R_VOL_WIDTH                        5  /* IN1R_VOL - [4:0] */
+
+/*
+ * R27 (0x1B) - Right Line Input 3&4 Volume
+ */
+#define WM8994_IN2_VU                           0x0100  /* IN2_VU */
+#define WM8994_IN2_VU_MASK                      0x0100  /* IN2_VU */
+#define WM8994_IN2_VU_SHIFT                          8  /* IN2_VU */
+#define WM8994_IN2_VU_WIDTH                          1  /* IN2_VU */
+#define WM8994_IN2R_MUTE                        0x0080  /* IN2R_MUTE */
+#define WM8994_IN2R_MUTE_MASK                   0x0080  /* IN2R_MUTE */
+#define WM8994_IN2R_MUTE_SHIFT                       7  /* IN2R_MUTE */
+#define WM8994_IN2R_MUTE_WIDTH                       1  /* IN2R_MUTE */
+#define WM8994_IN2R_ZC                          0x0040  /* IN2R_ZC */
+#define WM8994_IN2R_ZC_MASK                     0x0040  /* IN2R_ZC */
+#define WM8994_IN2R_ZC_SHIFT                         6  /* IN2R_ZC */
+#define WM8994_IN2R_ZC_WIDTH                         1  /* IN2R_ZC */
+#define WM8994_IN2R_VOL_MASK                    0x001F  /* IN2R_VOL - [4:0] */
+#define WM8994_IN2R_VOL_SHIFT                        0  /* IN2R_VOL - [4:0] */
+#define WM8994_IN2R_VOL_WIDTH                        5  /* IN2R_VOL - [4:0] */
+
+/*
+ * R28 (0x1C) - Left Output Volume
+ */
+#define WM8994_HPOUT1_VU                        0x0100  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_MASK                   0x0100  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_SHIFT                       8  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_WIDTH                       1  /* HPOUT1_VU */
+#define WM8994_HPOUT1L_ZC                       0x0080  /* HPOUT1L_ZC */
+#define WM8994_HPOUT1L_ZC_MASK                  0x0080  /* HPOUT1L_ZC */
+#define WM8994_HPOUT1L_ZC_SHIFT                      7  /* HPOUT1L_ZC */
+#define WM8994_HPOUT1L_ZC_WIDTH                      1  /* HPOUT1L_ZC */
+#define WM8994_HPOUT1L_MUTE_N                   0x0040  /* HPOUT1L_MUTE_N */
+#define WM8994_HPOUT1L_MUTE_N_MASK              0x0040  /* HPOUT1L_MUTE_N */
+#define WM8994_HPOUT1L_MUTE_N_SHIFT                  6  /* HPOUT1L_MUTE_N */
+#define WM8994_HPOUT1L_MUTE_N_WIDTH                  1  /* HPOUT1L_MUTE_N */
+#define WM8994_HPOUT1L_VOL_MASK                 0x003F  /* HPOUT1L_VOL - [5:0] */
+#define WM8994_HPOUT1L_VOL_SHIFT                     0  /* HPOUT1L_VOL - [5:0] */
+#define WM8994_HPOUT1L_VOL_WIDTH                     6  /* HPOUT1L_VOL - [5:0] */
+
+/*
+ * R29 (0x1D) - Right Output Volume
+ */
+#define WM8994_HPOUT1_VU                        0x0100  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_MASK                   0x0100  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_SHIFT                       8  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_WIDTH                       1  /* HPOUT1_VU */
+#define WM8994_HPOUT1R_ZC                       0x0080  /* HPOUT1R_ZC */
+#define WM8994_HPOUT1R_ZC_MASK                  0x0080  /* HPOUT1R_ZC */
+#define WM8994_HPOUT1R_ZC_SHIFT                      7  /* HPOUT1R_ZC */
+#define WM8994_HPOUT1R_ZC_WIDTH                      1  /* HPOUT1R_ZC */
+#define WM8994_HPOUT1R_MUTE_N                   0x0040  /* HPOUT1R_MUTE_N */
+#define WM8994_HPOUT1R_MUTE_N_MASK              0x0040  /* HPOUT1R_MUTE_N */
+#define WM8994_HPOUT1R_MUTE_N_SHIFT                  6  /* HPOUT1R_MUTE_N */
+#define WM8994_HPOUT1R_MUTE_N_WIDTH                  1  /* HPOUT1R_MUTE_N */
+#define WM8994_HPOUT1R_VOL_MASK                 0x003F  /* HPOUT1R_VOL - [5:0] */
+#define WM8994_HPOUT1R_VOL_SHIFT                     0  /* HPOUT1R_VOL - [5:0] */
+#define WM8994_HPOUT1R_VOL_WIDTH                     6  /* HPOUT1R_VOL - [5:0] */
+
+/*
+ * R30 (0x1E) - Line Outputs Volume
+ */
+#define WM8994_LINEOUT1N_MUTE                   0x0040  /* LINEOUT1N_MUTE */
+#define WM8994_LINEOUT1N_MUTE_MASK              0x0040  /* LINEOUT1N_MUTE */
+#define WM8994_LINEOUT1N_MUTE_SHIFT                  6  /* LINEOUT1N_MUTE */
+#define WM8994_LINEOUT1N_MUTE_WIDTH                  1  /* LINEOUT1N_MUTE */
+#define WM8994_LINEOUT1P_MUTE                   0x0020  /* LINEOUT1P_MUTE */
+#define WM8994_LINEOUT1P_MUTE_MASK              0x0020  /* LINEOUT1P_MUTE */
+#define WM8994_LINEOUT1P_MUTE_SHIFT                  5  /* LINEOUT1P_MUTE */
+#define WM8994_LINEOUT1P_MUTE_WIDTH                  1  /* LINEOUT1P_MUTE */
+#define WM8994_LINEOUT1_VOL                     0x0010  /* LINEOUT1_VOL */
+#define WM8994_LINEOUT1_VOL_MASK                0x0010  /* LINEOUT1_VOL */
+#define WM8994_LINEOUT1_VOL_SHIFT                    4  /* LINEOUT1_VOL */
+#define WM8994_LINEOUT1_VOL_WIDTH                    1  /* LINEOUT1_VOL */
+#define WM8994_LINEOUT2N_MUTE                   0x0004  /* LINEOUT2N_MUTE */
+#define WM8994_LINEOUT2N_MUTE_MASK              0x0004  /* LINEOUT2N_MUTE */
+#define WM8994_LINEOUT2N_MUTE_SHIFT                  2  /* LINEOUT2N_MUTE */
+#define WM8994_LINEOUT2N_MUTE_WIDTH                  1  /* LINEOUT2N_MUTE */
+#define WM8994_LINEOUT2P_MUTE                   0x0002  /* LINEOUT2P_MUTE */
+#define WM8994_LINEOUT2P_MUTE_MASK              0x0002  /* LINEOUT2P_MUTE */
+#define WM8994_LINEOUT2P_MUTE_SHIFT                  1  /* LINEOUT2P_MUTE */
+#define WM8994_LINEOUT2P_MUTE_WIDTH                  1  /* LINEOUT2P_MUTE */
+#define WM8994_LINEOUT2_VOL                     0x0001  /* LINEOUT2_VOL */
+#define WM8994_LINEOUT2_VOL_MASK                0x0001  /* LINEOUT2_VOL */
+#define WM8994_LINEOUT2_VOL_SHIFT                    0  /* LINEOUT2_VOL */
+#define WM8994_LINEOUT2_VOL_WIDTH                    1  /* LINEOUT2_VOL */
+
+/*
+ * R31 (0x1F) - HPOUT2 Volume
+ */
+#define WM8994_HPOUT2_MUTE                      0x0020  /* HPOUT2_MUTE */
+#define WM8994_HPOUT2_MUTE_MASK                 0x0020  /* HPOUT2_MUTE */
+#define WM8994_HPOUT2_MUTE_SHIFT                     5  /* HPOUT2_MUTE */
+#define WM8994_HPOUT2_MUTE_WIDTH                     1  /* HPOUT2_MUTE */
+#define WM8994_HPOUT2_VOL                       0x0010  /* HPOUT2_VOL */
+#define WM8994_HPOUT2_VOL_MASK                  0x0010  /* HPOUT2_VOL */
+#define WM8994_HPOUT2_VOL_SHIFT                      4  /* HPOUT2_VOL */
+#define WM8994_HPOUT2_VOL_WIDTH                      1  /* HPOUT2_VOL */
+
+/*
+ * R32 (0x20) - Left OPGA Volume
+ */
+#define WM8994_MIXOUT_VU                        0x0100  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_MASK                   0x0100  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_SHIFT                       8  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_WIDTH                       1  /* MIXOUT_VU */
+#define WM8994_MIXOUTL_ZC                       0x0080  /* MIXOUTL_ZC */
+#define WM8994_MIXOUTL_ZC_MASK                  0x0080  /* MIXOUTL_ZC */
+#define WM8994_MIXOUTL_ZC_SHIFT                      7  /* MIXOUTL_ZC */
+#define WM8994_MIXOUTL_ZC_WIDTH                      1  /* MIXOUTL_ZC */
+#define WM8994_MIXOUTL_MUTE_N                   0x0040  /* MIXOUTL_MUTE_N */
+#define WM8994_MIXOUTL_MUTE_N_MASK              0x0040  /* MIXOUTL_MUTE_N */
+#define WM8994_MIXOUTL_MUTE_N_SHIFT                  6  /* MIXOUTL_MUTE_N */
+#define WM8994_MIXOUTL_MUTE_N_WIDTH                  1  /* MIXOUTL_MUTE_N */
+#define WM8994_MIXOUTL_VOL_MASK                 0x003F  /* MIXOUTL_VOL - [5:0] */
+#define WM8994_MIXOUTL_VOL_SHIFT                     0  /* MIXOUTL_VOL - [5:0] */
+#define WM8994_MIXOUTL_VOL_WIDTH                     6  /* MIXOUTL_VOL - [5:0] */
+
+/*
+ * R33 (0x21) - Right OPGA Volume
+ */
+#define WM8994_MIXOUT_VU                        0x0100  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_MASK                   0x0100  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_SHIFT                       8  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_WIDTH                       1  /* MIXOUT_VU */
+#define WM8994_MIXOUTR_ZC                       0x0080  /* MIXOUTR_ZC */
+#define WM8994_MIXOUTR_ZC_MASK                  0x0080  /* MIXOUTR_ZC */
+#define WM8994_MIXOUTR_ZC_SHIFT                      7  /* MIXOUTR_ZC */
+#define WM8994_MIXOUTR_ZC_WIDTH                      1  /* MIXOUTR_ZC */
+#define WM8994_MIXOUTR_MUTE_N                   0x0040  /* MIXOUTR_MUTE_N */
+#define WM8994_MIXOUTR_MUTE_N_MASK              0x0040  /* MIXOUTR_MUTE_N */
+#define WM8994_MIXOUTR_MUTE_N_SHIFT                  6  /* MIXOUTR_MUTE_N */
+#define WM8994_MIXOUTR_MUTE_N_WIDTH                  1  /* MIXOUTR_MUTE_N */
+#define WM8994_MIXOUTR_VOL_MASK                 0x003F  /* MIXOUTR_VOL - [5:0] */
+#define WM8994_MIXOUTR_VOL_SHIFT                     0  /* MIXOUTR_VOL - [5:0] */
+#define WM8994_MIXOUTR_VOL_WIDTH                     6  /* MIXOUTR_VOL - [5:0] */
+
+/*
+ * R34 (0x22) - SPKMIXL Attenuation
+ */
+#define WM8994_DAC2L_SPKMIXL_VOL                0x0040  /* DAC2L_SPKMIXL_VOL */
+#define WM8994_DAC2L_SPKMIXL_VOL_MASK           0x0040  /* DAC2L_SPKMIXL_VOL */
+#define WM8994_DAC2L_SPKMIXL_VOL_SHIFT               6  /* DAC2L_SPKMIXL_VOL */
+#define WM8994_DAC2L_SPKMIXL_VOL_WIDTH               1  /* DAC2L_SPKMIXL_VOL */
+#define WM8994_MIXINL_SPKMIXL_VOL               0x0020  /* MIXINL_SPKMIXL_VOL */
+#define WM8994_MIXINL_SPKMIXL_VOL_MASK          0x0020  /* MIXINL_SPKMIXL_VOL */
+#define WM8994_MIXINL_SPKMIXL_VOL_SHIFT              5  /* MIXINL_SPKMIXL_VOL */
+#define WM8994_MIXINL_SPKMIXL_VOL_WIDTH              1  /* MIXINL_SPKMIXL_VOL */
+#define WM8994_IN1LP_SPKMIXL_VOL                0x0010  /* IN1LP_SPKMIXL_VOL */
+#define WM8994_IN1LP_SPKMIXL_VOL_MASK           0x0010  /* IN1LP_SPKMIXL_VOL */
+#define WM8994_IN1LP_SPKMIXL_VOL_SHIFT               4  /* IN1LP_SPKMIXL_VOL */
+#define WM8994_IN1LP_SPKMIXL_VOL_WIDTH               1  /* IN1LP_SPKMIXL_VOL */
+#define WM8994_MIXOUTL_SPKMIXL_VOL              0x0008  /* MIXOUTL_SPKMIXL_VOL */
+#define WM8994_MIXOUTL_SPKMIXL_VOL_MASK         0x0008  /* MIXOUTL_SPKMIXL_VOL */
+#define WM8994_MIXOUTL_SPKMIXL_VOL_SHIFT             3  /* MIXOUTL_SPKMIXL_VOL */
+#define WM8994_MIXOUTL_SPKMIXL_VOL_WIDTH             1  /* MIXOUTL_SPKMIXL_VOL */
+#define WM8994_DAC1L_SPKMIXL_VOL                0x0004  /* DAC1L_SPKMIXL_VOL */
+#define WM8994_DAC1L_SPKMIXL_VOL_MASK           0x0004  /* DAC1L_SPKMIXL_VOL */
+#define WM8994_DAC1L_SPKMIXL_VOL_SHIFT               2  /* DAC1L_SPKMIXL_VOL */
+#define WM8994_DAC1L_SPKMIXL_VOL_WIDTH               1  /* DAC1L_SPKMIXL_VOL */
+#define WM8994_SPKMIXL_VOL_MASK                 0x0003  /* SPKMIXL_VOL - [1:0] */
+#define WM8994_SPKMIXL_VOL_SHIFT                     0  /* SPKMIXL_VOL - [1:0] */
+#define WM8994_SPKMIXL_VOL_WIDTH                     2  /* SPKMIXL_VOL - [1:0] */
+
+/*
+ * R35 (0x23) - SPKMIXR Attenuation
+ */
+#define WM8994_SPKOUT_CLASSAB                   0x0100  /* SPKOUT_CLASSAB */
+#define WM8994_SPKOUT_CLASSAB_MASK              0x0100  /* SPKOUT_CLASSAB */
+#define WM8994_SPKOUT_CLASSAB_SHIFT                  8  /* SPKOUT_CLASSAB */
+#define WM8994_SPKOUT_CLASSAB_WIDTH                  1  /* SPKOUT_CLASSAB */
+#define WM8994_DAC2R_SPKMIXR_VOL                0x0040  /* DAC2R_SPKMIXR_VOL */
+#define WM8994_DAC2R_SPKMIXR_VOL_MASK           0x0040  /* DAC2R_SPKMIXR_VOL */
+#define WM8994_DAC2R_SPKMIXR_VOL_SHIFT               6  /* DAC2R_SPKMIXR_VOL */
+#define WM8994_DAC2R_SPKMIXR_VOL_WIDTH               1  /* DAC2R_SPKMIXR_VOL */
+#define WM8994_MIXINR_SPKMIXR_VOL               0x0020  /* MIXINR_SPKMIXR_VOL */
+#define WM8994_MIXINR_SPKMIXR_VOL_MASK          0x0020  /* MIXINR_SPKMIXR_VOL */
+#define WM8994_MIXINR_SPKMIXR_VOL_SHIFT              5  /* MIXINR_SPKMIXR_VOL */
+#define WM8994_MIXINR_SPKMIXR_VOL_WIDTH              1  /* MIXINR_SPKMIXR_VOL */
+#define WM8994_IN1RP_SPKMIXR_VOL                0x0010  /* IN1RP_SPKMIXR_VOL */
+#define WM8994_IN1RP_SPKMIXR_VOL_MASK           0x0010  /* IN1RP_SPKMIXR_VOL */
+#define WM8994_IN1RP_SPKMIXR_VOL_SHIFT               4  /* IN1RP_SPKMIXR_VOL */
+#define WM8994_IN1RP_SPKMIXR_VOL_WIDTH               1  /* IN1RP_SPKMIXR_VOL */
+#define WM8994_MIXOUTR_SPKMIXR_VOL              0x0008  /* MIXOUTR_SPKMIXR_VOL */
+#define WM8994_MIXOUTR_SPKMIXR_VOL_MASK         0x0008  /* MIXOUTR_SPKMIXR_VOL */
+#define WM8994_MIXOUTR_SPKMIXR_VOL_SHIFT             3  /* MIXOUTR_SPKMIXR_VOL */
+#define WM8994_MIXOUTR_SPKMIXR_VOL_WIDTH             1  /* MIXOUTR_SPKMIXR_VOL */
+#define WM8994_DAC1R_SPKMIXR_VOL                0x0004  /* DAC1R_SPKMIXR_VOL */
+#define WM8994_DAC1R_SPKMIXR_VOL_MASK           0x0004  /* DAC1R_SPKMIXR_VOL */
+#define WM8994_DAC1R_SPKMIXR_VOL_SHIFT               2  /* DAC1R_SPKMIXR_VOL */
+#define WM8994_DAC1R_SPKMIXR_VOL_WIDTH               1  /* DAC1R_SPKMIXR_VOL */
+#define WM8994_SPKMIXR_VOL_MASK                 0x0003  /* SPKMIXR_VOL - [1:0] */
+#define WM8994_SPKMIXR_VOL_SHIFT                     0  /* SPKMIXR_VOL - [1:0] */
+#define WM8994_SPKMIXR_VOL_WIDTH                     2  /* SPKMIXR_VOL - [1:0] */
+
+/*
+ * R36 (0x24) - SPKOUT Mixers
+ */
+#define WM8994_IN2LRP_TO_SPKOUTL                0x0020  /* IN2LRP_TO_SPKOUTL */
+#define WM8994_IN2LRP_TO_SPKOUTL_MASK           0x0020  /* IN2LRP_TO_SPKOUTL */
+#define WM8994_IN2LRP_TO_SPKOUTL_SHIFT               5  /* IN2LRP_TO_SPKOUTL */
+#define WM8994_IN2LRP_TO_SPKOUTL_WIDTH               1  /* IN2LRP_TO_SPKOUTL */
+#define WM8994_SPKMIXL_TO_SPKOUTL               0x0010  /* SPKMIXL_TO_SPKOUTL */
+#define WM8994_SPKMIXL_TO_SPKOUTL_MASK          0x0010  /* SPKMIXL_TO_SPKOUTL */
+#define WM8994_SPKMIXL_TO_SPKOUTL_SHIFT              4  /* SPKMIXL_TO_SPKOUTL */
+#define WM8994_SPKMIXL_TO_SPKOUTL_WIDTH              1  /* SPKMIXL_TO_SPKOUTL */
+#define WM8994_SPKMIXR_TO_SPKOUTL               0x0008  /* SPKMIXR_TO_SPKOUTL */
+#define WM8994_SPKMIXR_TO_SPKOUTL_MASK          0x0008  /* SPKMIXR_TO_SPKOUTL */
+#define WM8994_SPKMIXR_TO_SPKOUTL_SHIFT              3  /* SPKMIXR_TO_SPKOUTL */
+#define WM8994_SPKMIXR_TO_SPKOUTL_WIDTH              1  /* SPKMIXR_TO_SPKOUTL */
+#define WM8994_IN2LRP_TO_SPKOUTR                0x0004  /* IN2LRP_TO_SPKOUTR */
+#define WM8994_IN2LRP_TO_SPKOUTR_MASK           0x0004  /* IN2LRP_TO_SPKOUTR */
+#define WM8994_IN2LRP_TO_SPKOUTR_SHIFT               2  /* IN2LRP_TO_SPKOUTR */
+#define WM8994_IN2LRP_TO_SPKOUTR_WIDTH               1  /* IN2LRP_TO_SPKOUTR */
+#define WM8994_SPKMIXL_TO_SPKOUTR               0x0002  /* SPKMIXL_TO_SPKOUTR */
+#define WM8994_SPKMIXL_TO_SPKOUTR_MASK          0x0002  /* SPKMIXL_TO_SPKOUTR */
+#define WM8994_SPKMIXL_TO_SPKOUTR_SHIFT              1  /* SPKMIXL_TO_SPKOUTR */
+#define WM8994_SPKMIXL_TO_SPKOUTR_WIDTH              1  /* SPKMIXL_TO_SPKOUTR */
+#define WM8994_SPKMIXR_TO_SPKOUTR               0x0001  /* SPKMIXR_TO_SPKOUTR */
+#define WM8994_SPKMIXR_TO_SPKOUTR_MASK          0x0001  /* SPKMIXR_TO_SPKOUTR */
+#define WM8994_SPKMIXR_TO_SPKOUTR_SHIFT              0  /* SPKMIXR_TO_SPKOUTR */
+#define WM8994_SPKMIXR_TO_SPKOUTR_WIDTH              1  /* SPKMIXR_TO_SPKOUTR */
+
+/*
+ * R37 (0x25) - ClassD
+ */
+#define WM8994_SPKOUTL_BOOST_MASK               0x0038  /* SPKOUTL_BOOST - [5:3] */
+#define WM8994_SPKOUTL_BOOST_SHIFT                   3  /* SPKOUTL_BOOST - [5:3] */
+#define WM8994_SPKOUTL_BOOST_WIDTH                   3  /* SPKOUTL_BOOST - [5:3] */
+#define WM8994_SPKOUTR_BOOST_MASK               0x0007  /* SPKOUTR_BOOST - [2:0] */
+#define WM8994_SPKOUTR_BOOST_SHIFT                   0  /* SPKOUTR_BOOST - [2:0] */
+#define WM8994_SPKOUTR_BOOST_WIDTH                   3  /* SPKOUTR_BOOST - [2:0] */
+
+/*
+ * R38 (0x26) - Speaker Volume Left
+ */
+#define WM8994_SPKOUT_VU                        0x0100  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_MASK                   0x0100  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_SHIFT                       8  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_WIDTH                       1  /* SPKOUT_VU */
+#define WM8994_SPKOUTL_ZC                       0x0080  /* SPKOUTL_ZC */
+#define WM8994_SPKOUTL_ZC_MASK                  0x0080  /* SPKOUTL_ZC */
+#define WM8994_SPKOUTL_ZC_SHIFT                      7  /* SPKOUTL_ZC */
+#define WM8994_SPKOUTL_ZC_WIDTH                      1  /* SPKOUTL_ZC */
+#define WM8994_SPKOUTL_MUTE_N                   0x0040  /* SPKOUTL_MUTE_N */
+#define WM8994_SPKOUTL_MUTE_N_MASK              0x0040  /* SPKOUTL_MUTE_N */
+#define WM8994_SPKOUTL_MUTE_N_SHIFT                  6  /* SPKOUTL_MUTE_N */
+#define WM8994_SPKOUTL_MUTE_N_WIDTH                  1  /* SPKOUTL_MUTE_N */
+#define WM8994_SPKOUTL_VOL_MASK                 0x003F  /* SPKOUTL_VOL - [5:0] */
+#define WM8994_SPKOUTL_VOL_SHIFT                     0  /* SPKOUTL_VOL - [5:0] */
+#define WM8994_SPKOUTL_VOL_WIDTH                     6  /* SPKOUTL_VOL - [5:0] */
+
+/*
+ * R39 (0x27) - Speaker Volume Right
+ */
+#define WM8994_SPKOUT_VU                        0x0100  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_MASK                   0x0100  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_SHIFT                       8  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_WIDTH                       1  /* SPKOUT_VU */
+#define WM8994_SPKOUTR_ZC                       0x0080  /* SPKOUTR_ZC */
+#define WM8994_SPKOUTR_ZC_MASK                  0x0080  /* SPKOUTR_ZC */
+#define WM8994_SPKOUTR_ZC_SHIFT                      7  /* SPKOUTR_ZC */
+#define WM8994_SPKOUTR_ZC_WIDTH                      1  /* SPKOUTR_ZC */
+#define WM8994_SPKOUTR_MUTE_N                   0x0040  /* SPKOUTR_MUTE_N */
+#define WM8994_SPKOUTR_MUTE_N_MASK              0x0040  /* SPKOUTR_MUTE_N */
+#define WM8994_SPKOUTR_MUTE_N_SHIFT                  6  /* SPKOUTR_MUTE_N */
+#define WM8994_SPKOUTR_MUTE_N_WIDTH                  1  /* SPKOUTR_MUTE_N */
+#define WM8994_SPKOUTR_VOL_MASK                 0x003F  /* SPKOUTR_VOL - [5:0] */
+#define WM8994_SPKOUTR_VOL_SHIFT                     0  /* SPKOUTR_VOL - [5:0] */
+#define WM8994_SPKOUTR_VOL_WIDTH                     6  /* SPKOUTR_VOL - [5:0] */
+
+/*
+ * R40 (0x28) - Input Mixer (2)
+ */
+#define WM8994_IN2LP_TO_IN2L                    0x0080  /* IN2LP_TO_IN2L */
+#define WM8994_IN2LP_TO_IN2L_MASK               0x0080  /* IN2LP_TO_IN2L */
+#define WM8994_IN2LP_TO_IN2L_SHIFT                   7  /* IN2LP_TO_IN2L */
+#define WM8994_IN2LP_TO_IN2L_WIDTH                   1  /* IN2LP_TO_IN2L */
+#define WM8994_IN2LN_TO_IN2L                    0x0040  /* IN2LN_TO_IN2L */
+#define WM8994_IN2LN_TO_IN2L_MASK               0x0040  /* IN2LN_TO_IN2L */
+#define WM8994_IN2LN_TO_IN2L_SHIFT                   6  /* IN2LN_TO_IN2L */
+#define WM8994_IN2LN_TO_IN2L_WIDTH                   1  /* IN2LN_TO_IN2L */
+#define WM8994_IN1LP_TO_IN1L                    0x0020  /* IN1LP_TO_IN1L */
+#define WM8994_IN1LP_TO_IN1L_MASK               0x0020  /* IN1LP_TO_IN1L */
+#define WM8994_IN1LP_TO_IN1L_SHIFT                   5  /* IN1LP_TO_IN1L */
+#define WM8994_IN1LP_TO_IN1L_WIDTH                   1  /* IN1LP_TO_IN1L */
+#define WM8994_IN1LN_TO_IN1L                    0x0010  /* IN1LN_TO_IN1L */
+#define WM8994_IN1LN_TO_IN1L_MASK               0x0010  /* IN1LN_TO_IN1L */
+#define WM8994_IN1LN_TO_IN1L_SHIFT                   4  /* IN1LN_TO_IN1L */
+#define WM8994_IN1LN_TO_IN1L_WIDTH                   1  /* IN1LN_TO_IN1L */
+#define WM8994_IN2RP_TO_IN2R                    0x0008  /* IN2RP_TO_IN2R */
+#define WM8994_IN2RP_TO_IN2R_MASK               0x0008  /* IN2RP_TO_IN2R */
+#define WM8994_IN2RP_TO_IN2R_SHIFT                   3  /* IN2RP_TO_IN2R */
+#define WM8994_IN2RP_TO_IN2R_WIDTH                   1  /* IN2RP_TO_IN2R */
+#define WM8994_IN2RN_TO_IN2R                    0x0004  /* IN2RN_TO_IN2R */
+#define WM8994_IN2RN_TO_IN2R_MASK               0x0004  /* IN2RN_TO_IN2R */
+#define WM8994_IN2RN_TO_IN2R_SHIFT                   2  /* IN2RN_TO_IN2R */
+#define WM8994_IN2RN_TO_IN2R_WIDTH                   1  /* IN2RN_TO_IN2R */
+#define WM8994_IN1RP_TO_IN1R                    0x0002  /* IN1RP_TO_IN1R */
+#define WM8994_IN1RP_TO_IN1R_MASK               0x0002  /* IN1RP_TO_IN1R */
+#define WM8994_IN1RP_TO_IN1R_SHIFT                   1  /* IN1RP_TO_IN1R */
+#define WM8994_IN1RP_TO_IN1R_WIDTH                   1  /* IN1RP_TO_IN1R */
+#define WM8994_IN1RN_TO_IN1R                    0x0001  /* IN1RN_TO_IN1R */
+#define WM8994_IN1RN_TO_IN1R_MASK               0x0001  /* IN1RN_TO_IN1R */
+#define WM8994_IN1RN_TO_IN1R_SHIFT                   0  /* IN1RN_TO_IN1R */
+#define WM8994_IN1RN_TO_IN1R_WIDTH                   1  /* IN1RN_TO_IN1R */
+
+/*
+ * R41 (0x29) - Input Mixer (3)
+ */
+#define WM8994_IN2L_TO_MIXINL                   0x0100  /* IN2L_TO_MIXINL */
+#define WM8994_IN2L_TO_MIXINL_MASK              0x0100  /* IN2L_TO_MIXINL */
+#define WM8994_IN2L_TO_MIXINL_SHIFT                  8  /* IN2L_TO_MIXINL */
+#define WM8994_IN2L_TO_MIXINL_WIDTH                  1  /* IN2L_TO_MIXINL */
+#define WM8994_IN2L_MIXINL_VOL                  0x0080  /* IN2L_MIXINL_VOL */
+#define WM8994_IN2L_MIXINL_VOL_MASK             0x0080  /* IN2L_MIXINL_VOL */
+#define WM8994_IN2L_MIXINL_VOL_SHIFT                 7  /* IN2L_MIXINL_VOL */
+#define WM8994_IN2L_MIXINL_VOL_WIDTH                 1  /* IN2L_MIXINL_VOL */
+#define WM8994_IN1L_TO_MIXINL                   0x0020  /* IN1L_TO_MIXINL */
+#define WM8994_IN1L_TO_MIXINL_MASK              0x0020  /* IN1L_TO_MIXINL */
+#define WM8994_IN1L_TO_MIXINL_SHIFT                  5  /* IN1L_TO_MIXINL */
+#define WM8994_IN1L_TO_MIXINL_WIDTH                  1  /* IN1L_TO_MIXINL */
+#define WM8994_IN1L_MIXINL_VOL                  0x0010  /* IN1L_MIXINL_VOL */
+#define WM8994_IN1L_MIXINL_VOL_MASK             0x0010  /* IN1L_MIXINL_VOL */
+#define WM8994_IN1L_MIXINL_VOL_SHIFT                 4  /* IN1L_MIXINL_VOL */
+#define WM8994_IN1L_MIXINL_VOL_WIDTH                 1  /* IN1L_MIXINL_VOL */
+#define WM8994_MIXOUTL_MIXINL_VOL_MASK          0x0007  /* MIXOUTL_MIXINL_VOL - [2:0] */
+#define WM8994_MIXOUTL_MIXINL_VOL_SHIFT              0  /* MIXOUTL_MIXINL_VOL - [2:0] */
+#define WM8994_MIXOUTL_MIXINL_VOL_WIDTH              3  /* MIXOUTL_MIXINL_VOL - [2:0] */
+
+/*
+ * R42 (0x2A) - Input Mixer (4)
+ */
+#define WM8994_IN2R_TO_MIXINR                   0x0100  /* IN2R_TO_MIXINR */
+#define WM8994_IN2R_TO_MIXINR_MASK              0x0100  /* IN2R_TO_MIXINR */
+#define WM8994_IN2R_TO_MIXINR_SHIFT                  8  /* IN2R_TO_MIXINR */
+#define WM8994_IN2R_TO_MIXINR_WIDTH                  1  /* IN2R_TO_MIXINR */
+#define WM8994_IN2R_MIXINR_VOL                  0x0080  /* IN2R_MIXINR_VOL */
+#define WM8994_IN2R_MIXINR_VOL_MASK             0x0080  /* IN2R_MIXINR_VOL */
+#define WM8994_IN2R_MIXINR_VOL_SHIFT                 7  /* IN2R_MIXINR_VOL */
+#define WM8994_IN2R_MIXINR_VOL_WIDTH                 1  /* IN2R_MIXINR_VOL */
+#define WM8994_IN1R_TO_MIXINR                   0x0020  /* IN1R_TO_MIXINR */
+#define WM8994_IN1R_TO_MIXINR_MASK              0x0020  /* IN1R_TO_MIXINR */
+#define WM8994_IN1R_TO_MIXINR_SHIFT                  5  /* IN1R_TO_MIXINR */
+#define WM8994_IN1R_TO_MIXINR_WIDTH                  1  /* IN1R_TO_MIXINR */
+#define WM8994_IN1R_MIXINR_VOL                  0x0010  /* IN1R_MIXINR_VOL */
+#define WM8994_IN1R_MIXINR_VOL_MASK             0x0010  /* IN1R_MIXINR_VOL */
+#define WM8994_IN1R_MIXINR_VOL_SHIFT                 4  /* IN1R_MIXINR_VOL */
+#define WM8994_IN1R_MIXINR_VOL_WIDTH                 1  /* IN1R_MIXINR_VOL */
+#define WM8994_MIXOUTR_MIXINR_VOL_MASK          0x0007  /* MIXOUTR_MIXINR_VOL - [2:0] */
+#define WM8994_MIXOUTR_MIXINR_VOL_SHIFT              0  /* MIXOUTR_MIXINR_VOL - [2:0] */
+#define WM8994_MIXOUTR_MIXINR_VOL_WIDTH              3  /* MIXOUTR_MIXINR_VOL - [2:0] */
+
+/*
+ * R43 (0x2B) - Input Mixer (5)
+ */
+#define WM8994_IN1LP_MIXINL_VOL_MASK            0x01C0  /* IN1LP_MIXINL_VOL - [8:6] */
+#define WM8994_IN1LP_MIXINL_VOL_SHIFT                6  /* IN1LP_MIXINL_VOL - [8:6] */
+#define WM8994_IN1LP_MIXINL_VOL_WIDTH                3  /* IN1LP_MIXINL_VOL - [8:6] */
+#define WM8994_IN2LRP_MIXINL_VOL_MASK           0x0007  /* IN2LRP_MIXINL_VOL - [2:0] */
+#define WM8994_IN2LRP_MIXINL_VOL_SHIFT               0  /* IN2LRP_MIXINL_VOL - [2:0] */
+#define WM8994_IN2LRP_MIXINL_VOL_WIDTH               3  /* IN2LRP_MIXINL_VOL - [2:0] */
+
+/*
+ * R44 (0x2C) - Input Mixer (6)
+ */
+#define WM8994_IN1RP_MIXINR_VOL_MASK            0x01C0  /* IN1RP_MIXINR_VOL - [8:6] */
+#define WM8994_IN1RP_MIXINR_VOL_SHIFT                6  /* IN1RP_MIXINR_VOL - [8:6] */
+#define WM8994_IN1RP_MIXINR_VOL_WIDTH                3  /* IN1RP_MIXINR_VOL - [8:6] */
+#define WM8994_IN2LRP_MIXINR_VOL_MASK           0x0007  /* IN2LRP_MIXINR_VOL - [2:0] */
+#define WM8994_IN2LRP_MIXINR_VOL_SHIFT               0  /* IN2LRP_MIXINR_VOL - [2:0] */
+#define WM8994_IN2LRP_MIXINR_VOL_WIDTH               3  /* IN2LRP_MIXINR_VOL - [2:0] */
+
+/*
+ * R45 (0x2D) - Output Mixer (1)
+ */
+#define WM8994_DAC1L_TO_HPOUT1L                 0x0100  /* DAC1L_TO_HPOUT1L */
+#define WM8994_DAC1L_TO_HPOUT1L_MASK            0x0100  /* DAC1L_TO_HPOUT1L */
+#define WM8994_DAC1L_TO_HPOUT1L_SHIFT                8  /* DAC1L_TO_HPOUT1L */
+#define WM8994_DAC1L_TO_HPOUT1L_WIDTH                1  /* DAC1L_TO_HPOUT1L */
+#define WM8994_MIXINR_TO_MIXOUTL                0x0080  /* MIXINR_TO_MIXOUTL */
+#define WM8994_MIXINR_TO_MIXOUTL_MASK           0x0080  /* MIXINR_TO_MIXOUTL */
+#define WM8994_MIXINR_TO_MIXOUTL_SHIFT               7  /* MIXINR_TO_MIXOUTL */
+#define WM8994_MIXINR_TO_MIXOUTL_WIDTH               1  /* MIXINR_TO_MIXOUTL */
+#define WM8994_MIXINL_TO_MIXOUTL                0x0040  /* MIXINL_TO_MIXOUTL */
+#define WM8994_MIXINL_TO_MIXOUTL_MASK           0x0040  /* MIXINL_TO_MIXOUTL */
+#define WM8994_MIXINL_TO_MIXOUTL_SHIFT               6  /* MIXINL_TO_MIXOUTL */
+#define WM8994_MIXINL_TO_MIXOUTL_WIDTH               1  /* MIXINL_TO_MIXOUTL */
+#define WM8994_IN2RN_TO_MIXOUTL                 0x0020  /* IN2RN_TO_MIXOUTL */
+#define WM8994_IN2RN_TO_MIXOUTL_MASK            0x0020  /* IN2RN_TO_MIXOUTL */
+#define WM8994_IN2RN_TO_MIXOUTL_SHIFT                5  /* IN2RN_TO_MIXOUTL */
+#define WM8994_IN2RN_TO_MIXOUTL_WIDTH                1  /* IN2RN_TO_MIXOUTL */
+#define WM8994_IN2LN_TO_MIXOUTL                 0x0010  /* IN2LN_TO_MIXOUTL */
+#define WM8994_IN2LN_TO_MIXOUTL_MASK            0x0010  /* IN2LN_TO_MIXOUTL */
+#define WM8994_IN2LN_TO_MIXOUTL_SHIFT                4  /* IN2LN_TO_MIXOUTL */
+#define WM8994_IN2LN_TO_MIXOUTL_WIDTH                1  /* IN2LN_TO_MIXOUTL */
+#define WM8994_IN1R_TO_MIXOUTL                  0x0008  /* IN1R_TO_MIXOUTL */
+#define WM8994_IN1R_TO_MIXOUTL_MASK             0x0008  /* IN1R_TO_MIXOUTL */
+#define WM8994_IN1R_TO_MIXOUTL_SHIFT                 3  /* IN1R_TO_MIXOUTL */
+#define WM8994_IN1R_TO_MIXOUTL_WIDTH                 1  /* IN1R_TO_MIXOUTL */
+#define WM8994_IN1L_TO_MIXOUTL                  0x0004  /* IN1L_TO_MIXOUTL */
+#define WM8994_IN1L_TO_MIXOUTL_MASK             0x0004  /* IN1L_TO_MIXOUTL */
+#define WM8994_IN1L_TO_MIXOUTL_SHIFT                 2  /* IN1L_TO_MIXOUTL */
+#define WM8994_IN1L_TO_MIXOUTL_WIDTH                 1  /* IN1L_TO_MIXOUTL */
+#define WM8994_IN2LP_TO_MIXOUTL                 0x0002  /* IN2LP_TO_MIXOUTL */
+#define WM8994_IN2LP_TO_MIXOUTL_MASK            0x0002  /* IN2LP_TO_MIXOUTL */
+#define WM8994_IN2LP_TO_MIXOUTL_SHIFT                1  /* IN2LP_TO_MIXOUTL */
+#define WM8994_IN2LP_TO_MIXOUTL_WIDTH                1  /* IN2LP_TO_MIXOUTL */
+#define WM8994_DAC1L_TO_MIXOUTL                 0x0001  /* DAC1L_TO_MIXOUTL */
+#define WM8994_DAC1L_TO_MIXOUTL_MASK            0x0001  /* DAC1L_TO_MIXOUTL */
+#define WM8994_DAC1L_TO_MIXOUTL_SHIFT                0  /* DAC1L_TO_MIXOUTL */
+#define WM8994_DAC1L_TO_MIXOUTL_WIDTH                1  /* DAC1L_TO_MIXOUTL */
+
+/*
+ * R46 (0x2E) - Output Mixer (2)
+ */
+#define WM8994_DAC1R_TO_HPOUT1R                 0x0100  /* DAC1R_TO_HPOUT1R */
+#define WM8994_DAC1R_TO_HPOUT1R_MASK            0x0100  /* DAC1R_TO_HPOUT1R */
+#define WM8994_DAC1R_TO_HPOUT1R_SHIFT                8  /* DAC1R_TO_HPOUT1R */
+#define WM8994_DAC1R_TO_HPOUT1R_WIDTH                1  /* DAC1R_TO_HPOUT1R */
+#define WM8994_MIXINL_TO_MIXOUTR                0x0080  /* MIXINL_TO_MIXOUTR */
+#define WM8994_MIXINL_TO_MIXOUTR_MASK           0x0080  /* MIXINL_TO_MIXOUTR */
+#define WM8994_MIXINL_TO_MIXOUTR_SHIFT               7  /* MIXINL_TO_MIXOUTR */
+#define WM8994_MIXINL_TO_MIXOUTR_WIDTH               1  /* MIXINL_TO_MIXOUTR */
+#define WM8994_MIXINR_TO_MIXOUTR                0x0040  /* MIXINR_TO_MIXOUTR */
+#define WM8994_MIXINR_TO_MIXOUTR_MASK           0x0040  /* MIXINR_TO_MIXOUTR */
+#define WM8994_MIXINR_TO_MIXOUTR_SHIFT               6  /* MIXINR_TO_MIXOUTR */
+#define WM8994_MIXINR_TO_MIXOUTR_WIDTH               1  /* MIXINR_TO_MIXOUTR */
+#define WM8994_IN2LN_TO_MIXOUTR                 0x0020  /* IN2LN_TO_MIXOUTR */
+#define WM8994_IN2LN_TO_MIXOUTR_MASK            0x0020  /* IN2LN_TO_MIXOUTR */
+#define WM8994_IN2LN_TO_MIXOUTR_SHIFT                5  /* IN2LN_TO_MIXOUTR */
+#define WM8994_IN2LN_TO_MIXOUTR_WIDTH                1  /* IN2LN_TO_MIXOUTR */
+#define WM8994_IN2RN_TO_MIXOUTR                 0x0010  /* IN2RN_TO_MIXOUTR */
+#define WM8994_IN2RN_TO_MIXOUTR_MASK            0x0010  /* IN2RN_TO_MIXOUTR */
+#define WM8994_IN2RN_TO_MIXOUTR_SHIFT                4  /* IN2RN_TO_MIXOUTR */
+#define WM8994_IN2RN_TO_MIXOUTR_WIDTH                1  /* IN2RN_TO_MIXOUTR */
+#define WM8994_IN1L_TO_MIXOUTR                  0x0008  /* IN1L_TO_MIXOUTR */
+#define WM8994_IN1L_TO_MIXOUTR_MASK             0x0008  /* IN1L_TO_MIXOUTR */
+#define WM8994_IN1L_TO_MIXOUTR_SHIFT                 3  /* IN1L_TO_MIXOUTR */
+#define WM8994_IN1L_TO_MIXOUTR_WIDTH                 1  /* IN1L_TO_MIXOUTR */
+#define WM8994_IN1R_TO_MIXOUTR                  0x0004  /* IN1R_TO_MIXOUTR */
+#define WM8994_IN1R_TO_MIXOUTR_MASK             0x0004  /* IN1R_TO_MIXOUTR */
+#define WM8994_IN1R_TO_MIXOUTR_SHIFT                 2  /* IN1R_TO_MIXOUTR */
+#define WM8994_IN1R_TO_MIXOUTR_WIDTH                 1  /* IN1R_TO_MIXOUTR */
+#define WM8994_IN2RP_TO_MIXOUTR                 0x0002  /* IN2RP_TO_MIXOUTR */
+#define WM8994_IN2RP_TO_MIXOUTR_MASK            0x0002  /* IN2RP_TO_MIXOUTR */
+#define WM8994_IN2RP_TO_MIXOUTR_SHIFT                1  /* IN2RP_TO_MIXOUTR */
+#define WM8994_IN2RP_TO_MIXOUTR_WIDTH                1  /* IN2RP_TO_MIXOUTR */
+#define WM8994_DAC1R_TO_MIXOUTR                 0x0001  /* DAC1R_TO_MIXOUTR */
+#define WM8994_DAC1R_TO_MIXOUTR_MASK            0x0001  /* DAC1R_TO_MIXOUTR */
+#define WM8994_DAC1R_TO_MIXOUTR_SHIFT                0  /* DAC1R_TO_MIXOUTR */
+#define WM8994_DAC1R_TO_MIXOUTR_WIDTH                1  /* DAC1R_TO_MIXOUTR */
+
+/*
+ * R47 (0x2F) - Output Mixer (3)
+ */
+#define WM8994_IN2LP_MIXOUTL_VOL_MASK           0x0E00  /* IN2LP_MIXOUTL_VOL - [11:9] */
+#define WM8994_IN2LP_MIXOUTL_VOL_SHIFT               9  /* IN2LP_MIXOUTL_VOL - [11:9] */
+#define WM8994_IN2LP_MIXOUTL_VOL_WIDTH               3  /* IN2LP_MIXOUTL_VOL - [11:9] */
+#define WM8994_IN2LN_MIXOUTL_VOL_MASK           0x01C0  /* IN2LN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN2LN_MIXOUTL_VOL_SHIFT               6  /* IN2LN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN2LN_MIXOUTL_VOL_WIDTH               3  /* IN2LN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN1R_MIXOUTL_VOL_MASK            0x0038  /* IN1R_MIXOUTL_VOL - [5:3] */
+#define WM8994_IN1R_MIXOUTL_VOL_SHIFT                3  /* IN1R_MIXOUTL_VOL - [5:3] */
+#define WM8994_IN1R_MIXOUTL_VOL_WIDTH                3  /* IN1R_MIXOUTL_VOL - [5:3] */
+#define WM8994_IN1L_MIXOUTL_VOL_MASK            0x0007  /* IN1L_MIXOUTL_VOL - [2:0] */
+#define WM8994_IN1L_MIXOUTL_VOL_SHIFT                0  /* IN1L_MIXOUTL_VOL - [2:0] */
+#define WM8994_IN1L_MIXOUTL_VOL_WIDTH                3  /* IN1L_MIXOUTL_VOL - [2:0] */
+
+/*
+ * R48 (0x30) - Output Mixer (4)
+ */
+#define WM8994_IN2RP_MIXOUTR_VOL_MASK           0x0E00  /* IN2RP_MIXOUTR_VOL - [11:9] */
+#define WM8994_IN2RP_MIXOUTR_VOL_SHIFT               9  /* IN2RP_MIXOUTR_VOL - [11:9] */
+#define WM8994_IN2RP_MIXOUTR_VOL_WIDTH               3  /* IN2RP_MIXOUTR_VOL - [11:9] */
+#define WM8994_IN2RN_MIXOUTR_VOL_MASK           0x01C0  /* IN2RN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN2RN_MIXOUTR_VOL_SHIFT               6  /* IN2RN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN2RN_MIXOUTR_VOL_WIDTH               3  /* IN2RN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN1L_MIXOUTR_VOL_MASK            0x0038  /* IN1L_MIXOUTR_VOL - [5:3] */
+#define WM8994_IN1L_MIXOUTR_VOL_SHIFT                3  /* IN1L_MIXOUTR_VOL - [5:3] */
+#define WM8994_IN1L_MIXOUTR_VOL_WIDTH                3  /* IN1L_MIXOUTR_VOL - [5:3] */
+#define WM8994_IN1R_MIXOUTR_VOL_MASK            0x0007  /* IN1R_MIXOUTR_VOL - [2:0] */
+#define WM8994_IN1R_MIXOUTR_VOL_SHIFT                0  /* IN1R_MIXOUTR_VOL - [2:0] */
+#define WM8994_IN1R_MIXOUTR_VOL_WIDTH                3  /* IN1R_MIXOUTR_VOL - [2:0] */
+
+/*
+ * R49 (0x31) - Output Mixer (5)
+ */
+#define WM8994_DAC1L_MIXOUTL_VOL_MASK           0x0E00  /* DAC1L_MIXOUTL_VOL - [11:9] */
+#define WM8994_DAC1L_MIXOUTL_VOL_SHIFT               9  /* DAC1L_MIXOUTL_VOL - [11:9] */
+#define WM8994_DAC1L_MIXOUTL_VOL_WIDTH               3  /* DAC1L_MIXOUTL_VOL - [11:9] */
+#define WM8994_IN2RN_MIXOUTL_VOL_MASK           0x01C0  /* IN2RN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN2RN_MIXOUTL_VOL_SHIFT               6  /* IN2RN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN2RN_MIXOUTL_VOL_WIDTH               3  /* IN2RN_MIXOUTL_VOL - [8:6] */
+#define WM8994_MIXINR_MIXOUTL_VOL_MASK          0x0038  /* MIXINR_MIXOUTL_VOL - [5:3] */
+#define WM8994_MIXINR_MIXOUTL_VOL_SHIFT              3  /* MIXINR_MIXOUTL_VOL - [5:3] */
+#define WM8994_MIXINR_MIXOUTL_VOL_WIDTH              3  /* MIXINR_MIXOUTL_VOL - [5:3] */
+#define WM8994_MIXINL_MIXOUTL_VOL_MASK          0x0007  /* MIXINL_MIXOUTL_VOL - [2:0] */
+#define WM8994_MIXINL_MIXOUTL_VOL_SHIFT              0  /* MIXINL_MIXOUTL_VOL - [2:0] */
+#define WM8994_MIXINL_MIXOUTL_VOL_WIDTH              3  /* MIXINL_MIXOUTL_VOL - [2:0] */
+
+/*
+ * R50 (0x32) - Output Mixer (6)
+ */
+#define WM8994_DAC1R_MIXOUTR_VOL_MASK           0x0E00  /* DAC1R_MIXOUTR_VOL - [11:9] */
+#define WM8994_DAC1R_MIXOUTR_VOL_SHIFT               9  /* DAC1R_MIXOUTR_VOL - [11:9] */
+#define WM8994_DAC1R_MIXOUTR_VOL_WIDTH               3  /* DAC1R_MIXOUTR_VOL - [11:9] */
+#define WM8994_IN2LN_MIXOUTR_VOL_MASK           0x01C0  /* IN2LN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN2LN_MIXOUTR_VOL_SHIFT               6  /* IN2LN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN2LN_MIXOUTR_VOL_WIDTH               3  /* IN2LN_MIXOUTR_VOL - [8:6] */
+#define WM8994_MIXINL_MIXOUTR_VOL_MASK          0x0038  /* MIXINL_MIXOUTR_VOL - [5:3] */
+#define WM8994_MIXINL_MIXOUTR_VOL_SHIFT              3  /* MIXINL_MIXOUTR_VOL - [5:3] */
+#define WM8994_MIXINL_MIXOUTR_VOL_WIDTH              3  /* MIXINL_MIXOUTR_VOL - [5:3] */
+#define WM8994_MIXINR_MIXOUTR_VOL_MASK          0x0007  /* MIXINR_MIXOUTR_VOL - [2:0] */
+#define WM8994_MIXINR_MIXOUTR_VOL_SHIFT              0  /* MIXINR_MIXOUTR_VOL - [2:0] */
+#define WM8994_MIXINR_MIXOUTR_VOL_WIDTH              3  /* MIXINR_MIXOUTR_VOL - [2:0] */
+
+/*
+ * R51 (0x33) - HPOUT2 Mixer
+ */
+#define WM8994_IN2LRP_TO_HPOUT2                 0x0020  /* IN2LRP_TO_HPOUT2 */
+#define WM8994_IN2LRP_TO_HPOUT2_MASK            0x0020  /* IN2LRP_TO_HPOUT2 */
+#define WM8994_IN2LRP_TO_HPOUT2_SHIFT                5  /* IN2LRP_TO_HPOUT2 */
+#define WM8994_IN2LRP_TO_HPOUT2_WIDTH                1  /* IN2LRP_TO_HPOUT2 */
+#define WM8994_MIXOUTLVOL_TO_HPOUT2             0x0010  /* MIXOUTLVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTLVOL_TO_HPOUT2_MASK        0x0010  /* MIXOUTLVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTLVOL_TO_HPOUT2_SHIFT            4  /* MIXOUTLVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTLVOL_TO_HPOUT2_WIDTH            1  /* MIXOUTLVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTRVOL_TO_HPOUT2             0x0008  /* MIXOUTRVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTRVOL_TO_HPOUT2_MASK        0x0008  /* MIXOUTRVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTRVOL_TO_HPOUT2_SHIFT            3  /* MIXOUTRVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTRVOL_TO_HPOUT2_WIDTH            1  /* MIXOUTRVOL_TO_HPOUT2 */
+
+/*
+ * R52 (0x34) - Line Mixer (1)
+ */
+#define WM8994_MIXOUTL_TO_LINEOUT1N             0x0040  /* MIXOUTL_TO_LINEOUT1N */
+#define WM8994_MIXOUTL_TO_LINEOUT1N_MASK        0x0040  /* MIXOUTL_TO_LINEOUT1N */
+#define WM8994_MIXOUTL_TO_LINEOUT1N_SHIFT            6  /* MIXOUTL_TO_LINEOUT1N */
+#define WM8994_MIXOUTL_TO_LINEOUT1N_WIDTH            1  /* MIXOUTL_TO_LINEOUT1N */
+#define WM8994_MIXOUTR_TO_LINEOUT1N             0x0020  /* MIXOUTR_TO_LINEOUT1N */
+#define WM8994_MIXOUTR_TO_LINEOUT1N_MASK        0x0020  /* MIXOUTR_TO_LINEOUT1N */
+#define WM8994_MIXOUTR_TO_LINEOUT1N_SHIFT            5  /* MIXOUTR_TO_LINEOUT1N */
+#define WM8994_MIXOUTR_TO_LINEOUT1N_WIDTH            1  /* MIXOUTR_TO_LINEOUT1N */
+#define WM8994_LINEOUT1_MODE                    0x0010  /* LINEOUT1_MODE */
+#define WM8994_LINEOUT1_MODE_MASK               0x0010  /* LINEOUT1_MODE */
+#define WM8994_LINEOUT1_MODE_SHIFT                   4  /* LINEOUT1_MODE */
+#define WM8994_LINEOUT1_MODE_WIDTH                   1  /* LINEOUT1_MODE */
+#define WM8994_IN1R_TO_LINEOUT1P                0x0004  /* IN1R_TO_LINEOUT1P */
+#define WM8994_IN1R_TO_LINEOUT1P_MASK           0x0004  /* IN1R_TO_LINEOUT1P */
+#define WM8994_IN1R_TO_LINEOUT1P_SHIFT               2  /* IN1R_TO_LINEOUT1P */
+#define WM8994_IN1R_TO_LINEOUT1P_WIDTH               1  /* IN1R_TO_LINEOUT1P */
+#define WM8994_IN1L_TO_LINEOUT1P                0x0002  /* IN1L_TO_LINEOUT1P */
+#define WM8994_IN1L_TO_LINEOUT1P_MASK           0x0002  /* IN1L_TO_LINEOUT1P */
+#define WM8994_IN1L_TO_LINEOUT1P_SHIFT               1  /* IN1L_TO_LINEOUT1P */
+#define WM8994_IN1L_TO_LINEOUT1P_WIDTH               1  /* IN1L_TO_LINEOUT1P */
+#define WM8994_MIXOUTL_TO_LINEOUT1P             0x0001  /* MIXOUTL_TO_LINEOUT1P */
+#define WM8994_MIXOUTL_TO_LINEOUT1P_MASK        0x0001  /* MIXOUTL_TO_LINEOUT1P */
+#define WM8994_MIXOUTL_TO_LINEOUT1P_SHIFT            0  /* MIXOUTL_TO_LINEOUT1P */
+#define WM8994_MIXOUTL_TO_LINEOUT1P_WIDTH            1  /* MIXOUTL_TO_LINEOUT1P */
+
+/*
+ * R53 (0x35) - Line Mixer (2)
+ */
+#define WM8994_MIXOUTR_TO_LINEOUT2N             0x0040  /* MIXOUTR_TO_LINEOUT2N */
+#define WM8994_MIXOUTR_TO_LINEOUT2N_MASK        0x0040  /* MIXOUTR_TO_LINEOUT2N */
+#define WM8994_MIXOUTR_TO_LINEOUT2N_SHIFT            6  /* MIXOUTR_TO_LINEOUT2N */
+#define WM8994_MIXOUTR_TO_LINEOUT2N_WIDTH            1  /* MIXOUTR_TO_LINEOUT2N */
+#define WM8994_MIXOUTL_TO_LINEOUT2N             0x0020  /* MIXOUTL_TO_LINEOUT2N */
+#define WM8994_MIXOUTL_TO_LINEOUT2N_MASK        0x0020  /* MIXOUTL_TO_LINEOUT2N */
+#define WM8994_MIXOUTL_TO_LINEOUT2N_SHIFT            5  /* MIXOUTL_TO_LINEOUT2N */
+#define WM8994_MIXOUTL_TO_LINEOUT2N_WIDTH            1  /* MIXOUTL_TO_LINEOUT2N */
+#define WM8994_LINEOUT2_MODE                    0x0010  /* LINEOUT2_MODE */
+#define WM8994_LINEOUT2_MODE_MASK               0x0010  /* LINEOUT2_MODE */
+#define WM8994_LINEOUT2_MODE_SHIFT                   4  /* LINEOUT2_MODE */
+#define WM8994_LINEOUT2_MODE_WIDTH                   1  /* LINEOUT2_MODE */
+#define WM8994_IN1L_TO_LINEOUT2P                0x0004  /* IN1L_TO_LINEOUT2P */
+#define WM8994_IN1L_TO_LINEOUT2P_MASK           0x0004  /* IN1L_TO_LINEOUT2P */
+#define WM8994_IN1L_TO_LINEOUT2P_SHIFT               2  /* IN1L_TO_LINEOUT2P */
+#define WM8994_IN1L_TO_LINEOUT2P_WIDTH               1  /* IN1L_TO_LINEOUT2P */
+#define WM8994_IN1R_TO_LINEOUT2P                0x0002  /* IN1R_TO_LINEOUT2P */
+#define WM8994_IN1R_TO_LINEOUT2P_MASK           0x0002  /* IN1R_TO_LINEOUT2P */
+#define WM8994_IN1R_TO_LINEOUT2P_SHIFT               1  /* IN1R_TO_LINEOUT2P */
+#define WM8994_IN1R_TO_LINEOUT2P_WIDTH               1  /* IN1R_TO_LINEOUT2P */
+#define WM8994_MIXOUTR_TO_LINEOUT2P             0x0001  /* MIXOUTR_TO_LINEOUT2P */
+#define WM8994_MIXOUTR_TO_LINEOUT2P_MASK        0x0001  /* MIXOUTR_TO_LINEOUT2P */
+#define WM8994_MIXOUTR_TO_LINEOUT2P_SHIFT            0  /* MIXOUTR_TO_LINEOUT2P */
+#define WM8994_MIXOUTR_TO_LINEOUT2P_WIDTH            1  /* MIXOUTR_TO_LINEOUT2P */
+
+/*
+ * R54 (0x36) - Speaker Mixer
+ */
+#define WM8994_DAC2L_TO_SPKMIXL                 0x0200  /* DAC2L_TO_SPKMIXL */
+#define WM8994_DAC2L_TO_SPKMIXL_MASK            0x0200  /* DAC2L_TO_SPKMIXL */
+#define WM8994_DAC2L_TO_SPKMIXL_SHIFT                9  /* DAC2L_TO_SPKMIXL */
+#define WM8994_DAC2L_TO_SPKMIXL_WIDTH                1  /* DAC2L_TO_SPKMIXL */
+#define WM8994_DAC2R_TO_SPKMIXR                 0x0100  /* DAC2R_TO_SPKMIXR */
+#define WM8994_DAC2R_TO_SPKMIXR_MASK            0x0100  /* DAC2R_TO_SPKMIXR */
+#define WM8994_DAC2R_TO_SPKMIXR_SHIFT                8  /* DAC2R_TO_SPKMIXR */
+#define WM8994_DAC2R_TO_SPKMIXR_WIDTH                1  /* DAC2R_TO_SPKMIXR */
+#define WM8994_MIXINL_TO_SPKMIXL                0x0080  /* MIXINL_TO_SPKMIXL */
+#define WM8994_MIXINL_TO_SPKMIXL_MASK           0x0080  /* MIXINL_TO_SPKMIXL */
+#define WM8994_MIXINL_TO_SPKMIXL_SHIFT               7  /* MIXINL_TO_SPKMIXL */
+#define WM8994_MIXINL_TO_SPKMIXL_WIDTH               1  /* MIXINL_TO_SPKMIXL */
+#define WM8994_MIXINR_TO_SPKMIXR                0x0040  /* MIXINR_TO_SPKMIXR */
+#define WM8994_MIXINR_TO_SPKMIXR_MASK           0x0040  /* MIXINR_TO_SPKMIXR */
+#define WM8994_MIXINR_TO_SPKMIXR_SHIFT               6  /* MIXINR_TO_SPKMIXR */
+#define WM8994_MIXINR_TO_SPKMIXR_WIDTH               1  /* MIXINR_TO_SPKMIXR */
+#define WM8994_IN1LP_TO_SPKMIXL                 0x0020  /* IN1LP_TO_SPKMIXL */
+#define WM8994_IN1LP_TO_SPKMIXL_MASK            0x0020  /* IN1LP_TO_SPKMIXL */
+#define WM8994_IN1LP_TO_SPKMIXL_SHIFT                5  /* IN1LP_TO_SPKMIXL */
+#define WM8994_IN1LP_TO_SPKMIXL_WIDTH                1  /* IN1LP_TO_SPKMIXL */
+#define WM8994_IN1RP_TO_SPKMIXR                 0x0010  /* IN1RP_TO_SPKMIXR */
+#define WM8994_IN1RP_TO_SPKMIXR_MASK            0x0010  /* IN1RP_TO_SPKMIXR */
+#define WM8994_IN1RP_TO_SPKMIXR_SHIFT                4  /* IN1RP_TO_SPKMIXR */
+#define WM8994_IN1RP_TO_SPKMIXR_WIDTH                1  /* IN1RP_TO_SPKMIXR */
+#define WM8994_MIXOUTL_TO_SPKMIXL               0x0008  /* MIXOUTL_TO_SPKMIXL */
+#define WM8994_MIXOUTL_TO_SPKMIXL_MASK          0x0008  /* MIXOUTL_TO_SPKMIXL */
+#define WM8994_MIXOUTL_TO_SPKMIXL_SHIFT              3  /* MIXOUTL_TO_SPKMIXL */
+#define WM8994_MIXOUTL_TO_SPKMIXL_WIDTH              1  /* MIXOUTL_TO_SPKMIXL */
+#define WM8994_MIXOUTR_TO_SPKMIXR               0x0004  /* MIXOUTR_TO_SPKMIXR */
+#define WM8994_MIXOUTR_TO_SPKMIXR_MASK          0x0004  /* MIXOUTR_TO_SPKMIXR */
+#define WM8994_MIXOUTR_TO_SPKMIXR_SHIFT              2  /* MIXOUTR_TO_SPKMIXR */
+#define WM8994_MIXOUTR_TO_SPKMIXR_WIDTH              1  /* MIXOUTR_TO_SPKMIXR */
+#define WM8994_DAC1L_TO_SPKMIXL                 0x0002  /* DAC1L_TO_SPKMIXL */
+#define WM8994_DAC1L_TO_SPKMIXL_MASK            0x0002  /* DAC1L_TO_SPKMIXL */
+#define WM8994_DAC1L_TO_SPKMIXL_SHIFT                1  /* DAC1L_TO_SPKMIXL */
+#define WM8994_DAC1L_TO_SPKMIXL_WIDTH                1  /* DAC1L_TO_SPKMIXL */
+#define WM8994_DAC1R_TO_SPKMIXR                 0x0001  /* DAC1R_TO_SPKMIXR */
+#define WM8994_DAC1R_TO_SPKMIXR_MASK            0x0001  /* DAC1R_TO_SPKMIXR */
+#define WM8994_DAC1R_TO_SPKMIXR_SHIFT                0  /* DAC1R_TO_SPKMIXR */
+#define WM8994_DAC1R_TO_SPKMIXR_WIDTH                1  /* DAC1R_TO_SPKMIXR */
+
+/*
+ * R55 (0x37) - Additional Control
+ */
+#define WM8994_LINEOUT1_FB                      0x0080  /* LINEOUT1_FB */
+#define WM8994_LINEOUT1_FB_MASK                 0x0080  /* LINEOUT1_FB */
+#define WM8994_LINEOUT1_FB_SHIFT                     7  /* LINEOUT1_FB */
+#define WM8994_LINEOUT1_FB_WIDTH                     1  /* LINEOUT1_FB */
+#define WM8994_LINEOUT2_FB                      0x0040  /* LINEOUT2_FB */
+#define WM8994_LINEOUT2_FB_MASK                 0x0040  /* LINEOUT2_FB */
+#define WM8994_LINEOUT2_FB_SHIFT                     6  /* LINEOUT2_FB */
+#define WM8994_LINEOUT2_FB_WIDTH                     1  /* LINEOUT2_FB */
+#define WM8994_VROI                             0x0001  /* VROI */
+#define WM8994_VROI_MASK                        0x0001  /* VROI */
+#define WM8994_VROI_SHIFT                            0  /* VROI */
+#define WM8994_VROI_WIDTH                            1  /* VROI */
+
+/*
+ * R56 (0x38) - AntiPOP (1)
+ */
+#define WM8994_LINEOUT_VMID_BUF_ENA             0x0080  /* LINEOUT_VMID_BUF_ENA */
+#define WM8994_LINEOUT_VMID_BUF_ENA_MASK        0x0080  /* LINEOUT_VMID_BUF_ENA */
+#define WM8994_LINEOUT_VMID_BUF_ENA_SHIFT            7  /* LINEOUT_VMID_BUF_ENA */
+#define WM8994_LINEOUT_VMID_BUF_ENA_WIDTH            1  /* LINEOUT_VMID_BUF_ENA */
+#define WM8994_HPOUT2_IN_ENA                    0x0040  /* HPOUT2_IN_ENA */
+#define WM8994_HPOUT2_IN_ENA_MASK               0x0040  /* HPOUT2_IN_ENA */
+#define WM8994_HPOUT2_IN_ENA_SHIFT                   6  /* HPOUT2_IN_ENA */
+#define WM8994_HPOUT2_IN_ENA_WIDTH                   1  /* HPOUT2_IN_ENA */
+#define WM8994_LINEOUT1_DISCH                   0x0020  /* LINEOUT1_DISCH */
+#define WM8994_LINEOUT1_DISCH_MASK              0x0020  /* LINEOUT1_DISCH */
+#define WM8994_LINEOUT1_DISCH_SHIFT                  5  /* LINEOUT1_DISCH */
+#define WM8994_LINEOUT1_DISCH_WIDTH                  1  /* LINEOUT1_DISCH */
+#define WM8994_LINEOUT2_DISCH                   0x0010  /* LINEOUT2_DISCH */
+#define WM8994_LINEOUT2_DISCH_MASK              0x0010  /* LINEOUT2_DISCH */
+#define WM8994_LINEOUT2_DISCH_SHIFT                  4  /* LINEOUT2_DISCH */
+#define WM8994_LINEOUT2_DISCH_WIDTH                  1  /* LINEOUT2_DISCH */
+
+/*
+ * R57 (0x39) - AntiPOP (2)
+ */
+#define WM8994_MICB2_DISCH                      0x0100  /* MICB2_DISCH */
+#define WM8994_MICB2_DISCH_MASK                 0x0100  /* MICB2_DISCH */
+#define WM8994_MICB2_DISCH_SHIFT                     8  /* MICB2_DISCH */
+#define WM8994_MICB2_DISCH_WIDTH                     1  /* MICB2_DISCH */
+#define WM8994_MICB1_DISCH                      0x0080  /* MICB1_DISCH */
+#define WM8994_MICB1_DISCH_MASK                 0x0080  /* MICB1_DISCH */
+#define WM8994_MICB1_DISCH_SHIFT                     7  /* MICB1_DISCH */
+#define WM8994_MICB1_DISCH_WIDTH                     1  /* MICB1_DISCH */
+#define WM8994_VMID_RAMP_MASK                   0x0060  /* VMID_RAMP - [6:5] */
+#define WM8994_VMID_RAMP_SHIFT                       5  /* VMID_RAMP - [6:5] */
+#define WM8994_VMID_RAMP_WIDTH                       2  /* VMID_RAMP - [6:5] */
+#define WM8994_VMID_BUF_ENA                     0x0008  /* VMID_BUF_ENA */
+#define WM8994_VMID_BUF_ENA_MASK                0x0008  /* VMID_BUF_ENA */
+#define WM8994_VMID_BUF_ENA_SHIFT                    3  /* VMID_BUF_ENA */
+#define WM8994_VMID_BUF_ENA_WIDTH                    1  /* VMID_BUF_ENA */
+#define WM8994_STARTUP_BIAS_ENA                 0x0004  /* STARTUP_BIAS_ENA */
+#define WM8994_STARTUP_BIAS_ENA_MASK            0x0004  /* STARTUP_BIAS_ENA */
+#define WM8994_STARTUP_BIAS_ENA_SHIFT                2  /* STARTUP_BIAS_ENA */
+#define WM8994_STARTUP_BIAS_ENA_WIDTH                1  /* STARTUP_BIAS_ENA */
+#define WM8994_BIAS_SRC                         0x0002  /* BIAS_SRC */
+#define WM8994_BIAS_SRC_MASK                    0x0002  /* BIAS_SRC */
+#define WM8994_BIAS_SRC_SHIFT                        1  /* BIAS_SRC */
+#define WM8994_BIAS_SRC_WIDTH                        1  /* BIAS_SRC */
+#define WM8994_VMID_DISCH                       0x0001  /* VMID_DISCH */
+#define WM8994_VMID_DISCH_MASK                  0x0001  /* VMID_DISCH */
+#define WM8994_VMID_DISCH_SHIFT                      0  /* VMID_DISCH */
+#define WM8994_VMID_DISCH_WIDTH                      1  /* VMID_DISCH */
+
+/*
+ * R58 (0x3A) - MICBIAS
+ */
+#define WM8994_MICD_SCTHR_MASK                  0x00C0  /* MICD_SCTHR - [7:6] */
+#define WM8994_MICD_SCTHR_SHIFT                      6  /* MICD_SCTHR - [7:6] */
+#define WM8994_MICD_SCTHR_WIDTH                      2  /* MICD_SCTHR - [7:6] */
+#define WM8994_MICD_THR_MASK                    0x0038  /* MICD_THR - [5:3] */
+#define WM8994_MICD_THR_SHIFT                        3  /* MICD_THR - [5:3] */
+#define WM8994_MICD_THR_WIDTH                        3  /* MICD_THR - [5:3] */
+#define WM8994_MICD_ENA                         0x0004  /* MICD_ENA */
+#define WM8994_MICD_ENA_MASK                    0x0004  /* MICD_ENA */
+#define WM8994_MICD_ENA_SHIFT                        2  /* MICD_ENA */
+#define WM8994_MICD_ENA_WIDTH                        1  /* MICD_ENA */
+#define WM8994_MICB2_LVL                        0x0002  /* MICB2_LVL */
+#define WM8994_MICB2_LVL_MASK                   0x0002  /* MICB2_LVL */
+#define WM8994_MICB2_LVL_SHIFT                       1  /* MICB2_LVL */
+#define WM8994_MICB2_LVL_WIDTH                       1  /* MICB2_LVL */
+#define WM8994_MICB1_LVL                        0x0001  /* MICB1_LVL */
+#define WM8994_MICB1_LVL_MASK                   0x0001  /* MICB1_LVL */
+#define WM8994_MICB1_LVL_SHIFT                       0  /* MICB1_LVL */
+#define WM8994_MICB1_LVL_WIDTH                       1  /* MICB1_LVL */
+
+/*
+ * R59 (0x3B) - LDO 1
+ */
+#define WM8994_LDO1_VSEL_MASK                   0x000E  /* LDO1_VSEL - [3:1] */
+#define WM8994_LDO1_VSEL_SHIFT                       1  /* LDO1_VSEL - [3:1] */
+#define WM8994_LDO1_VSEL_WIDTH                       3  /* LDO1_VSEL - [3:1] */
+#define WM8994_LDO1_DISCH                       0x0001  /* LDO1_DISCH */
+#define WM8994_LDO1_DISCH_MASK                  0x0001  /* LDO1_DISCH */
+#define WM8994_LDO1_DISCH_SHIFT                      0  /* LDO1_DISCH */
+#define WM8994_LDO1_DISCH_WIDTH                      1  /* LDO1_DISCH */
+
+/*
+ * R60 (0x3C) - LDO 2
+ */
+#define WM8994_LDO2_VSEL_MASK                   0x0006  /* LDO2_VSEL - [2:1] */
+#define WM8994_LDO2_VSEL_SHIFT                       1  /* LDO2_VSEL - [2:1] */
+#define WM8994_LDO2_VSEL_WIDTH                       2  /* LDO2_VSEL - [2:1] */
+#define WM8994_LDO2_DISCH                       0x0001  /* LDO2_DISCH */
+#define WM8994_LDO2_DISCH_MASK                  0x0001  /* LDO2_DISCH */
+#define WM8994_LDO2_DISCH_SHIFT                      0  /* LDO2_DISCH */
+#define WM8994_LDO2_DISCH_WIDTH                      1  /* LDO2_DISCH */
+
+/*
+ * R76 (0x4C) - Charge Pump (1)
+ */
+#define WM8994_CP_ENA                           0x8000  /* CP_ENA */
+#define WM8994_CP_ENA_MASK                      0x8000  /* CP_ENA */
+#define WM8994_CP_ENA_SHIFT                         15  /* CP_ENA */
+#define WM8994_CP_ENA_WIDTH                          1  /* CP_ENA */
+
+/*
+ * R81 (0x51) - Class W (1)
+ */
+#define WM8994_CP_DYN_SRC_SEL_MASK              0x0300  /* CP_DYN_SRC_SEL - [9:8] */
+#define WM8994_CP_DYN_SRC_SEL_SHIFT                  8  /* CP_DYN_SRC_SEL - [9:8] */
+#define WM8994_CP_DYN_SRC_SEL_WIDTH                  2  /* CP_DYN_SRC_SEL - [9:8] */
+#define WM8994_CP_DYN_PWR                       0x0001  /* CP_DYN_PWR */
+#define WM8994_CP_DYN_PWR_MASK                  0x0001  /* CP_DYN_PWR */
+#define WM8994_CP_DYN_PWR_SHIFT                      0  /* CP_DYN_PWR */
+#define WM8994_CP_DYN_PWR_WIDTH                      1  /* CP_DYN_PWR */
+
+/*
+ * R84 (0x54) - DC Servo (1)
+ */
+#define WM8994_DCS_TRIG_SINGLE_1                0x2000  /* DCS_TRIG_SINGLE_1 */
+#define WM8994_DCS_TRIG_SINGLE_1_MASK           0x2000  /* DCS_TRIG_SINGLE_1 */
+#define WM8994_DCS_TRIG_SINGLE_1_SHIFT              13  /* DCS_TRIG_SINGLE_1 */
+#define WM8994_DCS_TRIG_SINGLE_1_WIDTH               1  /* DCS_TRIG_SINGLE_1 */
+#define WM8994_DCS_TRIG_SINGLE_0                0x1000  /* DCS_TRIG_SINGLE_0 */
+#define WM8994_DCS_TRIG_SINGLE_0_MASK           0x1000  /* DCS_TRIG_SINGLE_0 */
+#define WM8994_DCS_TRIG_SINGLE_0_SHIFT              12  /* DCS_TRIG_SINGLE_0 */
+#define WM8994_DCS_TRIG_SINGLE_0_WIDTH               1  /* DCS_TRIG_SINGLE_0 */
+#define WM8994_DCS_TRIG_SERIES_1                0x0200  /* DCS_TRIG_SERIES_1 */
+#define WM8994_DCS_TRIG_SERIES_1_MASK           0x0200  /* DCS_TRIG_SERIES_1 */
+#define WM8994_DCS_TRIG_SERIES_1_SHIFT               9  /* DCS_TRIG_SERIES_1 */
+#define WM8994_DCS_TRIG_SERIES_1_WIDTH               1  /* DCS_TRIG_SERIES_1 */
+#define WM8994_DCS_TRIG_SERIES_0                0x0100  /* DCS_TRIG_SERIES_0 */
+#define WM8994_DCS_TRIG_SERIES_0_MASK           0x0100  /* DCS_TRIG_SERIES_0 */
+#define WM8994_DCS_TRIG_SERIES_0_SHIFT               8  /* DCS_TRIG_SERIES_0 */
+#define WM8994_DCS_TRIG_SERIES_0_WIDTH               1  /* DCS_TRIG_SERIES_0 */
+#define WM8994_DCS_TRIG_STARTUP_1               0x0020  /* DCS_TRIG_STARTUP_1 */
+#define WM8994_DCS_TRIG_STARTUP_1_MASK          0x0020  /* DCS_TRIG_STARTUP_1 */
+#define WM8994_DCS_TRIG_STARTUP_1_SHIFT              5  /* DCS_TRIG_STARTUP_1 */
+#define WM8994_DCS_TRIG_STARTUP_1_WIDTH              1  /* DCS_TRIG_STARTUP_1 */
+#define WM8994_DCS_TRIG_STARTUP_0               0x0010  /* DCS_TRIG_STARTUP_0 */
+#define WM8994_DCS_TRIG_STARTUP_0_MASK          0x0010  /* DCS_TRIG_STARTUP_0 */
+#define WM8994_DCS_TRIG_STARTUP_0_SHIFT              4  /* DCS_TRIG_STARTUP_0 */
+#define WM8994_DCS_TRIG_STARTUP_0_WIDTH              1  /* DCS_TRIG_STARTUP_0 */
+#define WM8994_DCS_TRIG_DAC_WR_1                0x0008  /* DCS_TRIG_DAC_WR_1 */
+#define WM8994_DCS_TRIG_DAC_WR_1_MASK           0x0008  /* DCS_TRIG_DAC_WR_1 */
+#define WM8994_DCS_TRIG_DAC_WR_1_SHIFT               3  /* DCS_TRIG_DAC_WR_1 */
+#define WM8994_DCS_TRIG_DAC_WR_1_WIDTH               1  /* DCS_TRIG_DAC_WR_1 */
+#define WM8994_DCS_TRIG_DAC_WR_0                0x0004  /* DCS_TRIG_DAC_WR_0 */
+#define WM8994_DCS_TRIG_DAC_WR_0_MASK           0x0004  /* DCS_TRIG_DAC_WR_0 */
+#define WM8994_DCS_TRIG_DAC_WR_0_SHIFT               2  /* DCS_TRIG_DAC_WR_0 */
+#define WM8994_DCS_TRIG_DAC_WR_0_WIDTH               1  /* DCS_TRIG_DAC_WR_0 */
+#define WM8994_DCS_ENA_CHAN_1                   0x0002  /* DCS_ENA_CHAN_1 */
+#define WM8994_DCS_ENA_CHAN_1_MASK              0x0002  /* DCS_ENA_CHAN_1 */
+#define WM8994_DCS_ENA_CHAN_1_SHIFT                  1  /* DCS_ENA_CHAN_1 */
+#define WM8994_DCS_ENA_CHAN_1_WIDTH                  1  /* DCS_ENA_CHAN_1 */
+#define WM8994_DCS_ENA_CHAN_0                   0x0001  /* DCS_ENA_CHAN_0 */
+#define WM8994_DCS_ENA_CHAN_0_MASK              0x0001  /* DCS_ENA_CHAN_0 */
+#define WM8994_DCS_ENA_CHAN_0_SHIFT                  0  /* DCS_ENA_CHAN_0 */
+#define WM8994_DCS_ENA_CHAN_0_WIDTH                  1  /* DCS_ENA_CHAN_0 */
+
+/*
+ * R85 (0x55) - DC Servo (2)
+ */
+#define WM8994_DCS_SERIES_NO_01_MASK            0x0FE0  /* DCS_SERIES_NO_01 - [11:5] */
+#define WM8994_DCS_SERIES_NO_01_SHIFT                5  /* DCS_SERIES_NO_01 - [11:5] */
+#define WM8994_DCS_SERIES_NO_01_WIDTH                7  /* DCS_SERIES_NO_01 - [11:5] */
+#define WM8994_DCS_TIMER_PERIOD_01_MASK         0x000F  /* DCS_TIMER_PERIOD_01 - [3:0] */
+#define WM8994_DCS_TIMER_PERIOD_01_SHIFT             0  /* DCS_TIMER_PERIOD_01 - [3:0] */
+#define WM8994_DCS_TIMER_PERIOD_01_WIDTH             4  /* DCS_TIMER_PERIOD_01 - [3:0] */
+
+/*
+ * R87 (0x57) - DC Servo (4)
+ */
+#define WM8994_DCS_DAC_WR_VAL_1_MASK            0xFF00  /* DCS_DAC_WR_VAL_1 - [15:8] */
+#define WM8994_DCS_DAC_WR_VAL_1_SHIFT                8  /* DCS_DAC_WR_VAL_1 - [15:8] */
+#define WM8994_DCS_DAC_WR_VAL_1_WIDTH                8  /* DCS_DAC_WR_VAL_1 - [15:8] */
+#define WM8994_DCS_DAC_WR_VAL_0_MASK            0x00FF  /* DCS_DAC_WR_VAL_0 - [7:0] */
+#define WM8994_DCS_DAC_WR_VAL_0_SHIFT                0  /* DCS_DAC_WR_VAL_0 - [7:0] */
+#define WM8994_DCS_DAC_WR_VAL_0_WIDTH                8  /* DCS_DAC_WR_VAL_0 - [7:0] */
+
+/*
+ * R88 (0x58) - DC Servo Readback
+ */
+#define WM8994_DCS_CAL_COMPLETE_MASK            0x0300  /* DCS_CAL_COMPLETE - [9:8] */
+#define WM8994_DCS_CAL_COMPLETE_SHIFT                8  /* DCS_CAL_COMPLETE - [9:8] */
+#define WM8994_DCS_CAL_COMPLETE_WIDTH                2  /* DCS_CAL_COMPLETE - [9:8] */
+#define WM8994_DCS_DAC_WR_COMPLETE_MASK         0x0030  /* DCS_DAC_WR_COMPLETE - [5:4] */
+#define WM8994_DCS_DAC_WR_COMPLETE_SHIFT             4  /* DCS_DAC_WR_COMPLETE - [5:4] */
+#define WM8994_DCS_DAC_WR_COMPLETE_WIDTH             2  /* DCS_DAC_WR_COMPLETE - [5:4] */
+#define WM8994_DCS_STARTUP_COMPLETE_MASK        0x0003  /* DCS_STARTUP_COMPLETE - [1:0] */
+#define WM8994_DCS_STARTUP_COMPLETE_SHIFT            0  /* DCS_STARTUP_COMPLETE - [1:0] */
+#define WM8994_DCS_STARTUP_COMPLETE_WIDTH            2  /* DCS_STARTUP_COMPLETE - [1:0] */
+
+/*
+ * R96 (0x60) - Analogue HP (1)
+ */
+#define WM8994_HPOUT1L_RMV_SHORT                0x0080  /* HPOUT1L_RMV_SHORT */
+#define WM8994_HPOUT1L_RMV_SHORT_MASK           0x0080  /* HPOUT1L_RMV_SHORT */
+#define WM8994_HPOUT1L_RMV_SHORT_SHIFT               7  /* HPOUT1L_RMV_SHORT */
+#define WM8994_HPOUT1L_RMV_SHORT_WIDTH               1  /* HPOUT1L_RMV_SHORT */
+#define WM8994_HPOUT1L_OUTP                     0x0040  /* HPOUT1L_OUTP */
+#define WM8994_HPOUT1L_OUTP_MASK                0x0040  /* HPOUT1L_OUTP */
+#define WM8994_HPOUT1L_OUTP_SHIFT                    6  /* HPOUT1L_OUTP */
+#define WM8994_HPOUT1L_OUTP_WIDTH                    1  /* HPOUT1L_OUTP */
+#define WM8994_HPOUT1L_DLY                      0x0020  /* HPOUT1L_DLY */
+#define WM8994_HPOUT1L_DLY_MASK                 0x0020  /* HPOUT1L_DLY */
+#define WM8994_HPOUT1L_DLY_SHIFT                     5  /* HPOUT1L_DLY */
+#define WM8994_HPOUT1L_DLY_WIDTH                     1  /* HPOUT1L_DLY */
+#define WM8994_HPOUT1R_RMV_SHORT                0x0008  /* HPOUT1R_RMV_SHORT */
+#define WM8994_HPOUT1R_RMV_SHORT_MASK           0x0008  /* HPOUT1R_RMV_SHORT */
+#define WM8994_HPOUT1R_RMV_SHORT_SHIFT               3  /* HPOUT1R_RMV_SHORT */
+#define WM8994_HPOUT1R_RMV_SHORT_WIDTH               1  /* HPOUT1R_RMV_SHORT */
+#define WM8994_HPOUT1R_OUTP                     0x0004  /* HPOUT1R_OUTP */
+#define WM8994_HPOUT1R_OUTP_MASK                0x0004  /* HPOUT1R_OUTP */
+#define WM8994_HPOUT1R_OUTP_SHIFT                    2  /* HPOUT1R_OUTP */
+#define WM8994_HPOUT1R_OUTP_WIDTH                    1  /* HPOUT1R_OUTP */
+#define WM8994_HPOUT1R_DLY                      0x0002  /* HPOUT1R_DLY */
+#define WM8994_HPOUT1R_DLY_MASK                 0x0002  /* HPOUT1R_DLY */
+#define WM8994_HPOUT1R_DLY_SHIFT                     1  /* HPOUT1R_DLY */
+#define WM8994_HPOUT1R_DLY_WIDTH                     1  /* HPOUT1R_DLY */
+
+/*
+ * R256 (0x100) - Chip Revision
+ */
+#define WM8994_CHIP_REV_MASK                    0x000F  /* CHIP_REV - [3:0] */
+#define WM8994_CHIP_REV_SHIFT                        0  /* CHIP_REV - [3:0] */
+#define WM8994_CHIP_REV_WIDTH                        4  /* CHIP_REV - [3:0] */
+
+/*
+ * R257 (0x101) - Control Interface
+ */
+#define WM8994_SPI_CONTRD                       0x0040  /* SPI_CONTRD */
+#define WM8994_SPI_CONTRD_MASK                  0x0040  /* SPI_CONTRD */
+#define WM8994_SPI_CONTRD_SHIFT                      6  /* SPI_CONTRD */
+#define WM8994_SPI_CONTRD_WIDTH                      1  /* SPI_CONTRD */
+#define WM8994_SPI_4WIRE                        0x0020  /* SPI_4WIRE */
+#define WM8994_SPI_4WIRE_MASK                   0x0020  /* SPI_4WIRE */
+#define WM8994_SPI_4WIRE_SHIFT                       5  /* SPI_4WIRE */
+#define WM8994_SPI_4WIRE_WIDTH                       1  /* SPI_4WIRE */
+#define WM8994_SPI_CFG                          0x0010  /* SPI_CFG */
+#define WM8994_SPI_CFG_MASK                     0x0010  /* SPI_CFG */
+#define WM8994_SPI_CFG_SHIFT                         4  /* SPI_CFG */
+#define WM8994_SPI_CFG_WIDTH                         1  /* SPI_CFG */
+#define WM8994_AUTO_INC                         0x0004  /* AUTO_INC */
+#define WM8994_AUTO_INC_MASK                    0x0004  /* AUTO_INC */
+#define WM8994_AUTO_INC_SHIFT                        2  /* AUTO_INC */
+#define WM8994_AUTO_INC_WIDTH                        1  /* AUTO_INC */
+
+/*
+ * R272 (0x110) - Write Sequencer Ctrl (1)
+ */
+#define WM8994_WSEQ_ENA                         0x8000  /* WSEQ_ENA */
+#define WM8994_WSEQ_ENA_MASK                    0x8000  /* WSEQ_ENA */
+#define WM8994_WSEQ_ENA_SHIFT                       15  /* WSEQ_ENA */
+#define WM8994_WSEQ_ENA_WIDTH                        1  /* WSEQ_ENA */
+#define WM8994_WSEQ_ABORT                       0x0200  /* WSEQ_ABORT */
+#define WM8994_WSEQ_ABORT_MASK                  0x0200  /* WSEQ_ABORT */
+#define WM8994_WSEQ_ABORT_SHIFT                      9  /* WSEQ_ABORT */
+#define WM8994_WSEQ_ABORT_WIDTH                      1  /* WSEQ_ABORT */
+#define WM8994_WSEQ_START                       0x0100  /* WSEQ_START */
+#define WM8994_WSEQ_START_MASK                  0x0100  /* WSEQ_START */
+#define WM8994_WSEQ_START_SHIFT                      8  /* WSEQ_START */
+#define WM8994_WSEQ_START_WIDTH                      1  /* WSEQ_START */
+#define WM8994_WSEQ_START_INDEX_MASK            0x007F  /* WSEQ_START_INDEX - [6:0] */
+#define WM8994_WSEQ_START_INDEX_SHIFT                0  /* WSEQ_START_INDEX - [6:0] */
+#define WM8994_WSEQ_START_INDEX_WIDTH                7  /* WSEQ_START_INDEX - [6:0] */
+
+/*
+ * R273 (0x111) - Write Sequencer Ctrl (2)
+ */
+#define WM8994_WSEQ_BUSY                        0x0100  /* WSEQ_BUSY */
+#define WM8994_WSEQ_BUSY_MASK                   0x0100  /* WSEQ_BUSY */
+#define WM8994_WSEQ_BUSY_SHIFT                       8  /* WSEQ_BUSY */
+#define WM8994_WSEQ_BUSY_WIDTH                       1  /* WSEQ_BUSY */
+#define WM8994_WSEQ_CURRENT_INDEX_MASK          0x007F  /* WSEQ_CURRENT_INDEX - [6:0] */
+#define WM8994_WSEQ_CURRENT_INDEX_SHIFT              0  /* WSEQ_CURRENT_INDEX - [6:0] */
+#define WM8994_WSEQ_CURRENT_INDEX_WIDTH              7  /* WSEQ_CURRENT_INDEX - [6:0] */
+
+/*
+ * R512 (0x200) - AIF1 Clocking (1)
+ */
+#define WM8994_AIF1CLK_SRC_MASK                 0x0018  /* AIF1CLK_SRC - [4:3] */
+#define WM8994_AIF1CLK_SRC_SHIFT                     3  /* AIF1CLK_SRC - [4:3] */
+#define WM8994_AIF1CLK_SRC_WIDTH                     2  /* AIF1CLK_SRC - [4:3] */
+#define WM8994_AIF1CLK_INV                      0x0004  /* AIF1CLK_INV */
+#define WM8994_AIF1CLK_INV_MASK                 0x0004  /* AIF1CLK_INV */
+#define WM8994_AIF1CLK_INV_SHIFT                     2  /* AIF1CLK_INV */
+#define WM8994_AIF1CLK_INV_WIDTH                     1  /* AIF1CLK_INV */
+#define WM8994_AIF1CLK_DIV                      0x0002  /* AIF1CLK_DIV */
+#define WM8994_AIF1CLK_DIV_MASK                 0x0002  /* AIF1CLK_DIV */
+#define WM8994_AIF1CLK_DIV_SHIFT                     1  /* AIF1CLK_DIV */
+#define WM8994_AIF1CLK_DIV_WIDTH                     1  /* AIF1CLK_DIV */
+#define WM8994_AIF1CLK_ENA                      0x0001  /* AIF1CLK_ENA */
+#define WM8994_AIF1CLK_ENA_MASK                 0x0001  /* AIF1CLK_ENA */
+#define WM8994_AIF1CLK_ENA_SHIFT                     0  /* AIF1CLK_ENA */
+#define WM8994_AIF1CLK_ENA_WIDTH                     1  /* AIF1CLK_ENA */
+
+/*
+ * R513 (0x201) - AIF1 Clocking (2)
+ */
+#define WM8994_AIF1DAC_DIV_MASK                 0x0038  /* AIF1DAC_DIV - [5:3] */
+#define WM8994_AIF1DAC_DIV_SHIFT                     3  /* AIF1DAC_DIV - [5:3] */
+#define WM8994_AIF1DAC_DIV_WIDTH                     3  /* AIF1DAC_DIV - [5:3] */
+#define WM8994_AIF1ADC_DIV_MASK                 0x0007  /* AIF1ADC_DIV - [2:0] */
+#define WM8994_AIF1ADC_DIV_SHIFT                     0  /* AIF1ADC_DIV - [2:0] */
+#define WM8994_AIF1ADC_DIV_WIDTH                     3  /* AIF1ADC_DIV - [2:0] */
+
+/*
+ * R516 (0x204) - AIF2 Clocking (1)
+ */
+#define WM8994_AIF2CLK_SRC_MASK                 0x0018  /* AIF2CLK_SRC - [4:3] */
+#define WM8994_AIF2CLK_SRC_SHIFT                     3  /* AIF2CLK_SRC - [4:3] */
+#define WM8994_AIF2CLK_SRC_WIDTH                     2  /* AIF2CLK_SRC - [4:3] */
+#define WM8994_AIF2CLK_INV                      0x0004  /* AIF2CLK_INV */
+#define WM8994_AIF2CLK_INV_MASK                 0x0004  /* AIF2CLK_INV */
+#define WM8994_AIF2CLK_INV_SHIFT                     2  /* AIF2CLK_INV */
+#define WM8994_AIF2CLK_INV_WIDTH                     1  /* AIF2CLK_INV */
+#define WM8994_AIF2CLK_DIV                      0x0002  /* AIF2CLK_DIV */
+#define WM8994_AIF2CLK_DIV_MASK                 0x0002  /* AIF2CLK_DIV */
+#define WM8994_AIF2CLK_DIV_SHIFT                     1  /* AIF2CLK_DIV */
+#define WM8994_AIF2CLK_DIV_WIDTH                     1  /* AIF2CLK_DIV */
+#define WM8994_AIF2CLK_ENA                      0x0001  /* AIF2CLK_ENA */
+#define WM8994_AIF2CLK_ENA_MASK                 0x0001  /* AIF2CLK_ENA */
+#define WM8994_AIF2CLK_ENA_SHIFT                     0  /* AIF2CLK_ENA */
+#define WM8994_AIF2CLK_ENA_WIDTH                     1  /* AIF2CLK_ENA */
+
+/*
+ * R517 (0x205) - AIF2 Clocking (2)
+ */
+#define WM8994_AIF2DAC_DIV_MASK                 0x0038  /* AIF2DAC_DIV - [5:3] */
+#define WM8994_AIF2DAC_DIV_SHIFT                     3  /* AIF2DAC_DIV - [5:3] */
+#define WM8994_AIF2DAC_DIV_WIDTH                     3  /* AIF2DAC_DIV - [5:3] */
+#define WM8994_AIF2ADC_DIV_MASK                 0x0007  /* AIF2ADC_DIV - [2:0] */
+#define WM8994_AIF2ADC_DIV_SHIFT                     0  /* AIF2ADC_DIV - [2:0] */
+#define WM8994_AIF2ADC_DIV_WIDTH                     3  /* AIF2ADC_DIV - [2:0] */
+
+/*
+ * R520 (0x208) - Clocking (1)
+ */
+#define WM8994_TOCLK_ENA                        0x0010  /* TOCLK_ENA */
+#define WM8994_TOCLK_ENA_MASK                   0x0010  /* TOCLK_ENA */
+#define WM8994_TOCLK_ENA_SHIFT                       4  /* TOCLK_ENA */
+#define WM8994_TOCLK_ENA_WIDTH                       1  /* TOCLK_ENA */
+#define WM8994_AIF1DSPCLK_ENA                   0x0008  /* AIF1DSPCLK_ENA */
+#define WM8994_AIF1DSPCLK_ENA_MASK              0x0008  /* AIF1DSPCLK_ENA */
+#define WM8994_AIF1DSPCLK_ENA_SHIFT                  3  /* AIF1DSPCLK_ENA */
+#define WM8994_AIF1DSPCLK_ENA_WIDTH                  1  /* AIF1DSPCLK_ENA */
+#define WM8994_AIF2DSPCLK_ENA                   0x0004  /* AIF2DSPCLK_ENA */
+#define WM8994_AIF2DSPCLK_ENA_MASK              0x0004  /* AIF2DSPCLK_ENA */
+#define WM8994_AIF2DSPCLK_ENA_SHIFT                  2  /* AIF2DSPCLK_ENA */
+#define WM8994_AIF2DSPCLK_ENA_WIDTH                  1  /* AIF2DSPCLK_ENA */
+#define WM8994_SYSDSPCLK_ENA                    0x0002  /* SYSDSPCLK_ENA */
+#define WM8994_SYSDSPCLK_ENA_MASK               0x0002  /* SYSDSPCLK_ENA */
+#define WM8994_SYSDSPCLK_ENA_SHIFT                   1  /* SYSDSPCLK_ENA */
+#define WM8994_SYSDSPCLK_ENA_WIDTH                   1  /* SYSDSPCLK_ENA */
+#define WM8994_SYSCLK_SRC                       0x0001  /* SYSCLK_SRC */
+#define WM8994_SYSCLK_SRC_MASK                  0x0001  /* SYSCLK_SRC */
+#define WM8994_SYSCLK_SRC_SHIFT                      0  /* SYSCLK_SRC */
+#define WM8994_SYSCLK_SRC_WIDTH                      1  /* SYSCLK_SRC */
+
+/*
+ * R521 (0x209) - Clocking (2)
+ */
+#define WM8994_TOCLK_DIV_MASK                   0x0700  /* TOCLK_DIV - [10:8] */
+#define WM8994_TOCLK_DIV_SHIFT                       8  /* TOCLK_DIV - [10:8] */
+#define WM8994_TOCLK_DIV_WIDTH                       3  /* TOCLK_DIV - [10:8] */
+#define WM8994_DBCLK_DIV_MASK                   0x0070  /* DBCLK_DIV - [6:4] */
+#define WM8994_DBCLK_DIV_SHIFT                       4  /* DBCLK_DIV - [6:4] */
+#define WM8994_DBCLK_DIV_WIDTH                       3  /* DBCLK_DIV - [6:4] */
+#define WM8994_OPCLK_DIV_MASK                   0x0007  /* OPCLK_DIV - [2:0] */
+#define WM8994_OPCLK_DIV_SHIFT                       0  /* OPCLK_DIV - [2:0] */
+#define WM8994_OPCLK_DIV_WIDTH                       3  /* OPCLK_DIV - [2:0] */
+
+/*
+ * R528 (0x210) - AIF1 Rate
+ */
+#define WM8994_AIF1_SR_MASK                     0x00F0  /* AIF1_SR - [7:4] */
+#define WM8994_AIF1_SR_SHIFT                         4  /* AIF1_SR - [7:4] */
+#define WM8994_AIF1_SR_WIDTH                         4  /* AIF1_SR - [7:4] */
+#define WM8994_AIF1CLK_RATE_MASK                0x000F  /* AIF1CLK_RATE - [3:0] */
+#define WM8994_AIF1CLK_RATE_SHIFT                    0  /* AIF1CLK_RATE - [3:0] */
+#define WM8994_AIF1CLK_RATE_WIDTH                    4  /* AIF1CLK_RATE - [3:0] */
+
+/*
+ * R529 (0x211) - AIF2 Rate
+ */
+#define WM8994_AIF2_SR_MASK                     0x00F0  /* AIF2_SR - [7:4] */
+#define WM8994_AIF2_SR_SHIFT                         4  /* AIF2_SR - [7:4] */
+#define WM8994_AIF2_SR_WIDTH                         4  /* AIF2_SR - [7:4] */
+#define WM8994_AIF2CLK_RATE_MASK                0x000F  /* AIF2CLK_RATE - [3:0] */
+#define WM8994_AIF2CLK_RATE_SHIFT                    0  /* AIF2CLK_RATE - [3:0] */
+#define WM8994_AIF2CLK_RATE_WIDTH                    4  /* AIF2CLK_RATE - [3:0] */
+
+/*
+ * R530 (0x212) - Rate Status
+ */
+#define WM8994_SR_ERROR_MASK                    0x000F  /* SR_ERROR - [3:0] */
+#define WM8994_SR_ERROR_SHIFT                        0  /* SR_ERROR - [3:0] */
+#define WM8994_SR_ERROR_WIDTH                        4  /* SR_ERROR - [3:0] */
+
+/*
+ * R544 (0x220) - FLL1 Control (1)
+ */
+#define WM8994_FLL1_FRAC                        0x0004  /* FLL1_FRAC */
+#define WM8994_FLL1_FRAC_MASK                   0x0004  /* FLL1_FRAC */
+#define WM8994_FLL1_FRAC_SHIFT                       2  /* FLL1_FRAC */
+#define WM8994_FLL1_FRAC_WIDTH                       1  /* FLL1_FRAC */
+#define WM8994_FLL1_OSC_ENA                     0x0002  /* FLL1_OSC_ENA */
+#define WM8994_FLL1_OSC_ENA_MASK                0x0002  /* FLL1_OSC_ENA */
+#define WM8994_FLL1_OSC_ENA_SHIFT                    1  /* FLL1_OSC_ENA */
+#define WM8994_FLL1_OSC_ENA_WIDTH                    1  /* FLL1_OSC_ENA */
+#define WM8994_FLL1_ENA                         0x0001  /* FLL1_ENA */
+#define WM8994_FLL1_ENA_MASK                    0x0001  /* FLL1_ENA */
+#define WM8994_FLL1_ENA_SHIFT                        0  /* FLL1_ENA */
+#define WM8994_FLL1_ENA_WIDTH                        1  /* FLL1_ENA */
+
+/*
+ * R545 (0x221) - FLL1 Control (2)
+ */
+#define WM8994_FLL1_OUTDIV_MASK                 0x3F00  /* FLL1_OUTDIV - [13:8] */
+#define WM8994_FLL1_OUTDIV_SHIFT                     8  /* FLL1_OUTDIV - [13:8] */
+#define WM8994_FLL1_OUTDIV_WIDTH                     6  /* FLL1_OUTDIV - [13:8] */
+#define WM8994_FLL1_CTRL_RATE_MASK              0x0070  /* FLL1_CTRL_RATE - [6:4] */
+#define WM8994_FLL1_CTRL_RATE_SHIFT                  4  /* FLL1_CTRL_RATE - [6:4] */
+#define WM8994_FLL1_CTRL_RATE_WIDTH                  3  /* FLL1_CTRL_RATE - [6:4] */
+#define WM8994_FLL1_FRATIO_MASK                 0x0007  /* FLL1_FRATIO - [2:0] */
+#define WM8994_FLL1_FRATIO_SHIFT                     0  /* FLL1_FRATIO - [2:0] */
+#define WM8994_FLL1_FRATIO_WIDTH                     3  /* FLL1_FRATIO - [2:0] */
+
+/*
+ * R546 (0x222) - FLL1 Control (3)
+ */
+#define WM8994_FLL1_K_MASK                      0xFFFF  /* FLL1_K - [15:0] */
+#define WM8994_FLL1_K_SHIFT                          0  /* FLL1_K - [15:0] */
+#define WM8994_FLL1_K_WIDTH                         16  /* FLL1_K - [15:0] */
+
+/*
+ * R547 (0x223) - FLL1 Control (4)
+ */
+#define WM8994_FLL1_N_MASK                      0x7FE0  /* FLL1_N - [14:5] */
+#define WM8994_FLL1_N_SHIFT                          5  /* FLL1_N - [14:5] */
+#define WM8994_FLL1_N_WIDTH                         10  /* FLL1_N - [14:5] */
+#define WM8994_FLL1_LOOP_GAIN_MASK              0x000F  /* FLL1_LOOP_GAIN - [3:0] */
+#define WM8994_FLL1_LOOP_GAIN_SHIFT                  0  /* FLL1_LOOP_GAIN - [3:0] */
+#define WM8994_FLL1_LOOP_GAIN_WIDTH                  4  /* FLL1_LOOP_GAIN - [3:0] */
+
+/*
+ * R548 (0x224) - FLL1 Control (5)
+ */
+#define WM8994_FLL1_FRC_NCO_VAL_MASK            0x1F80  /* FLL1_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL1_FRC_NCO_VAL_SHIFT                7  /* FLL1_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL1_FRC_NCO_VAL_WIDTH                6  /* FLL1_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL1_FRC_NCO                     0x0040  /* FLL1_FRC_NCO */
+#define WM8994_FLL1_FRC_NCO_MASK                0x0040  /* FLL1_FRC_NCO */
+#define WM8994_FLL1_FRC_NCO_SHIFT                    6  /* FLL1_FRC_NCO */
+#define WM8994_FLL1_FRC_NCO_WIDTH                    1  /* FLL1_FRC_NCO */
+#define WM8994_FLL1_REFCLK_DIV_MASK             0x0018  /* FLL1_REFCLK_DIV - [4:3] */
+#define WM8994_FLL1_REFCLK_DIV_SHIFT                 3  /* FLL1_REFCLK_DIV - [4:3] */
+#define WM8994_FLL1_REFCLK_DIV_WIDTH                 2  /* FLL1_REFCLK_DIV - [4:3] */
+#define WM8994_FLL1_REFCLK_SRC_MASK             0x0003  /* FLL1_REFCLK_SRC - [1:0] */
+#define WM8994_FLL1_REFCLK_SRC_SHIFT                 0  /* FLL1_REFCLK_SRC - [1:0] */
+#define WM8994_FLL1_REFCLK_SRC_WIDTH                 2  /* FLL1_REFCLK_SRC - [1:0] */
+
+/*
+ * R576 (0x240) - FLL2 Control (1)
+ */
+#define WM8994_FLL2_FRAC                        0x0004  /* FLL2_FRAC */
+#define WM8994_FLL2_FRAC_MASK                   0x0004  /* FLL2_FRAC */
+#define WM8994_FLL2_FRAC_SHIFT                       2  /* FLL2_FRAC */
+#define WM8994_FLL2_FRAC_WIDTH                       1  /* FLL2_FRAC */
+#define WM8994_FLL2_OSC_ENA                     0x0002  /* FLL2_OSC_ENA */
+#define WM8994_FLL2_OSC_ENA_MASK                0x0002  /* FLL2_OSC_ENA */
+#define WM8994_FLL2_OSC_ENA_SHIFT                    1  /* FLL2_OSC_ENA */
+#define WM8994_FLL2_OSC_ENA_WIDTH                    1  /* FLL2_OSC_ENA */
+#define WM8994_FLL2_ENA                         0x0001  /* FLL2_ENA */
+#define WM8994_FLL2_ENA_MASK                    0x0001  /* FLL2_ENA */
+#define WM8994_FLL2_ENA_SHIFT                        0  /* FLL2_ENA */
+#define WM8994_FLL2_ENA_WIDTH                        1  /* FLL2_ENA */
+
+/*
+ * R577 (0x241) - FLL2 Control (2)
+ */
+#define WM8994_FLL2_OUTDIV_MASK                 0x3F00  /* FLL2_OUTDIV - [13:8] */
+#define WM8994_FLL2_OUTDIV_SHIFT                     8  /* FLL2_OUTDIV - [13:8] */
+#define WM8994_FLL2_OUTDIV_WIDTH                     6  /* FLL2_OUTDIV - [13:8] */
+#define WM8994_FLL2_CTRL_RATE_MASK              0x0070  /* FLL2_CTRL_RATE - [6:4] */
+#define WM8994_FLL2_CTRL_RATE_SHIFT                  4  /* FLL2_CTRL_RATE - [6:4] */
+#define WM8994_FLL2_CTRL_RATE_WIDTH                  3  /* FLL2_CTRL_RATE - [6:4] */
+#define WM8994_FLL2_FRATIO_MASK                 0x0007  /* FLL2_FRATIO - [2:0] */
+#define WM8994_FLL2_FRATIO_SHIFT                     0  /* FLL2_FRATIO - [2:0] */
+#define WM8994_FLL2_FRATIO_WIDTH                     3  /* FLL2_FRATIO - [2:0] */
+
+/*
+ * R578 (0x242) - FLL2 Control (3)
+ */
+#define WM8994_FLL2_K_MASK                      0xFFFF  /* FLL2_K - [15:0] */
+#define WM8994_FLL2_K_SHIFT                          0  /* FLL2_K - [15:0] */
+#define WM8994_FLL2_K_WIDTH                         16  /* FLL2_K - [15:0] */
+
+/*
+ * R579 (0x243) - FLL2 Control (4)
+ */
+#define WM8994_FLL2_N_MASK                      0x7FE0  /* FLL2_N - [14:5] */
+#define WM8994_FLL2_N_SHIFT                          5  /* FLL2_N - [14:5] */
+#define WM8994_FLL2_N_WIDTH                         10  /* FLL2_N - [14:5] */
+#define WM8994_FLL2_LOOP_GAIN_MASK              0x000F  /* FLL2_LOOP_GAIN - [3:0] */
+#define WM8994_FLL2_LOOP_GAIN_SHIFT                  0  /* FLL2_LOOP_GAIN - [3:0] */
+#define WM8994_FLL2_LOOP_GAIN_WIDTH                  4  /* FLL2_LOOP_GAIN - [3:0] */
+
+/*
+ * R580 (0x244) - FLL2 Control (5)
+ */
+#define WM8994_FLL2_FRC_NCO_VAL_MASK            0x1F80  /* FLL2_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL2_FRC_NCO_VAL_SHIFT                7  /* FLL2_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL2_FRC_NCO_VAL_WIDTH                6  /* FLL2_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL2_FRC_NCO                     0x0040  /* FLL2_FRC_NCO */
+#define WM8994_FLL2_FRC_NCO_MASK                0x0040  /* FLL2_FRC_NCO */
+#define WM8994_FLL2_FRC_NCO_SHIFT                    6  /* FLL2_FRC_NCO */
+#define WM8994_FLL2_FRC_NCO_WIDTH                    1  /* FLL2_FRC_NCO */
+#define WM8994_FLL2_REFCLK_DIV_MASK             0x0018  /* FLL2_REFCLK_DIV - [4:3] */
+#define WM8994_FLL2_REFCLK_DIV_SHIFT                 3  /* FLL2_REFCLK_DIV - [4:3] */
+#define WM8994_FLL2_REFCLK_DIV_WIDTH                 2  /* FLL2_REFCLK_DIV - [4:3] */
+#define WM8994_FLL2_REFCLK_SRC_MASK             0x0003  /* FLL2_REFCLK_SRC - [1:0] */
+#define WM8994_FLL2_REFCLK_SRC_SHIFT                 0  /* FLL2_REFCLK_SRC - [1:0] */
+#define WM8994_FLL2_REFCLK_SRC_WIDTH                 2  /* FLL2_REFCLK_SRC - [1:0] */
+
+/*
+ * R768 (0x300) - AIF1 Control (1)
+ */
+#define WM8994_AIF1ADCL_SRC                     0x8000  /* AIF1ADCL_SRC */
+#define WM8994_AIF1ADCL_SRC_MASK                0x8000  /* AIF1ADCL_SRC */
+#define WM8994_AIF1ADCL_SRC_SHIFT                   15  /* AIF1ADCL_SRC */
+#define WM8994_AIF1ADCL_SRC_WIDTH                    1  /* AIF1ADCL_SRC */
+#define WM8994_AIF1ADCR_SRC                     0x4000  /* AIF1ADCR_SRC */
+#define WM8994_AIF1ADCR_SRC_MASK                0x4000  /* AIF1ADCR_SRC */
+#define WM8994_AIF1ADCR_SRC_SHIFT                   14  /* AIF1ADCR_SRC */
+#define WM8994_AIF1ADCR_SRC_WIDTH                    1  /* AIF1ADCR_SRC */
+#define WM8994_AIF1ADC_TDM                      0x2000  /* AIF1ADC_TDM */
+#define WM8994_AIF1ADC_TDM_MASK                 0x2000  /* AIF1ADC_TDM */
+#define WM8994_AIF1ADC_TDM_SHIFT                    13  /* AIF1ADC_TDM */
+#define WM8994_AIF1ADC_TDM_WIDTH                     1  /* AIF1ADC_TDM */
+#define WM8994_AIF1_BCLK_INV                    0x0100  /* AIF1_BCLK_INV */
+#define WM8994_AIF1_BCLK_INV_MASK               0x0100  /* AIF1_BCLK_INV */
+#define WM8994_AIF1_BCLK_INV_SHIFT                   8  /* AIF1_BCLK_INV */
+#define WM8994_AIF1_BCLK_INV_WIDTH                   1  /* AIF1_BCLK_INV */
+#define WM8994_AIF1_LRCLK_INV                   0x0080  /* AIF1_LRCLK_INV */
+#define WM8994_AIF1_LRCLK_INV_MASK              0x0080  /* AIF1_LRCLK_INV */
+#define WM8994_AIF1_LRCLK_INV_SHIFT                  7  /* AIF1_LRCLK_INV */
+#define WM8994_AIF1_LRCLK_INV_WIDTH                  1  /* AIF1_LRCLK_INV */
+#define WM8994_AIF1_WL_MASK                     0x0060  /* AIF1_WL - [6:5] */
+#define WM8994_AIF1_WL_SHIFT                         5  /* AIF1_WL - [6:5] */
+#define WM8994_AIF1_WL_WIDTH                         2  /* AIF1_WL - [6:5] */
+#define WM8994_AIF1_FMT_MASK                    0x0018  /* AIF1_FMT - [4:3] */
+#define WM8994_AIF1_FMT_SHIFT                        3  /* AIF1_FMT - [4:3] */
+#define WM8994_AIF1_FMT_WIDTH                        2  /* AIF1_FMT - [4:3] */
+
+/*
+ * R769 (0x301) - AIF1 Control (2)
+ */
+#define WM8994_AIF1DACL_SRC                     0x8000  /* AIF1DACL_SRC */
+#define WM8994_AIF1DACL_SRC_MASK                0x8000  /* AIF1DACL_SRC */
+#define WM8994_AIF1DACL_SRC_SHIFT                   15  /* AIF1DACL_SRC */
+#define WM8994_AIF1DACL_SRC_WIDTH                    1  /* AIF1DACL_SRC */
+#define WM8994_AIF1DACR_SRC                     0x4000  /* AIF1DACR_SRC */
+#define WM8994_AIF1DACR_SRC_MASK                0x4000  /* AIF1DACR_SRC */
+#define WM8994_AIF1DACR_SRC_SHIFT                   14  /* AIF1DACR_SRC */
+#define WM8994_AIF1DACR_SRC_WIDTH                    1  /* AIF1DACR_SRC */
+#define WM8994_AIF1DAC_BOOST_MASK               0x0C00  /* AIF1DAC_BOOST - [11:10] */
+#define WM8994_AIF1DAC_BOOST_SHIFT                  10  /* AIF1DAC_BOOST - [11:10] */
+#define WM8994_AIF1DAC_BOOST_WIDTH                   2  /* AIF1DAC_BOOST - [11:10] */
+#define WM8994_AIF1_MONO                        0x0100  /* AIF1_MONO */
+#define WM8994_AIF1_MONO_MASK                   0x0100  /* AIF1_MONO */
+#define WM8994_AIF1_MONO_SHIFT                       8  /* AIF1_MONO */
+#define WM8994_AIF1_MONO_WIDTH                       1  /* AIF1_MONO */
+#define WM8994_AIF1DAC_COMP                     0x0010  /* AIF1DAC_COMP */
+#define WM8994_AIF1DAC_COMP_MASK                0x0010  /* AIF1DAC_COMP */
+#define WM8994_AIF1DAC_COMP_SHIFT                    4  /* AIF1DAC_COMP */
+#define WM8994_AIF1DAC_COMP_WIDTH                    1  /* AIF1DAC_COMP */
+#define WM8994_AIF1DAC_COMPMODE                 0x0008  /* AIF1DAC_COMPMODE */
+#define WM8994_AIF1DAC_COMPMODE_MASK            0x0008  /* AIF1DAC_COMPMODE */
+#define WM8994_AIF1DAC_COMPMODE_SHIFT                3  /* AIF1DAC_COMPMODE */
+#define WM8994_AIF1DAC_COMPMODE_WIDTH                1  /* AIF1DAC_COMPMODE */
+#define WM8994_AIF1ADC_COMP                     0x0004  /* AIF1ADC_COMP */
+#define WM8994_AIF1ADC_COMP_MASK                0x0004  /* AIF1ADC_COMP */
+#define WM8994_AIF1ADC_COMP_SHIFT                    2  /* AIF1ADC_COMP */
+#define WM8994_AIF1ADC_COMP_WIDTH                    1  /* AIF1ADC_COMP */
+#define WM8994_AIF1ADC_COMPMODE                 0x0002  /* AIF1ADC_COMPMODE */
+#define WM8994_AIF1ADC_COMPMODE_MASK            0x0002  /* AIF1ADC_COMPMODE */
+#define WM8994_AIF1ADC_COMPMODE_SHIFT                1  /* AIF1ADC_COMPMODE */
+#define WM8994_AIF1ADC_COMPMODE_WIDTH                1  /* AIF1ADC_COMPMODE */
+#define WM8994_AIF1_LOOPBACK                    0x0001  /* AIF1_LOOPBACK */
+#define WM8994_AIF1_LOOPBACK_MASK               0x0001  /* AIF1_LOOPBACK */
+#define WM8994_AIF1_LOOPBACK_SHIFT                   0  /* AIF1_LOOPBACK */
+#define WM8994_AIF1_LOOPBACK_WIDTH                   1  /* AIF1_LOOPBACK */
+
+/*
+ * R770 (0x302) - AIF1 Master/Slave
+ */
+#define WM8994_AIF1_TRI                         0x8000  /* AIF1_TRI */
+#define WM8994_AIF1_TRI_MASK                    0x8000  /* AIF1_TRI */
+#define WM8994_AIF1_TRI_SHIFT                       15  /* AIF1_TRI */
+#define WM8994_AIF1_TRI_WIDTH                        1  /* AIF1_TRI */
+#define WM8994_AIF1_MSTR                        0x4000  /* AIF1_MSTR */
+#define WM8994_AIF1_MSTR_MASK                   0x4000  /* AIF1_MSTR */
+#define WM8994_AIF1_MSTR_SHIFT                      14  /* AIF1_MSTR */
+#define WM8994_AIF1_MSTR_WIDTH                       1  /* AIF1_MSTR */
+#define WM8994_AIF1_CLK_FRC                     0x2000  /* AIF1_CLK_FRC */
+#define WM8994_AIF1_CLK_FRC_MASK                0x2000  /* AIF1_CLK_FRC */
+#define WM8994_AIF1_CLK_FRC_SHIFT                   13  /* AIF1_CLK_FRC */
+#define WM8994_AIF1_CLK_FRC_WIDTH                    1  /* AIF1_CLK_FRC */
+#define WM8994_AIF1_LRCLK_FRC                   0x1000  /* AIF1_LRCLK_FRC */
+#define WM8994_AIF1_LRCLK_FRC_MASK              0x1000  /* AIF1_LRCLK_FRC */
+#define WM8994_AIF1_LRCLK_FRC_SHIFT                 12  /* AIF1_LRCLK_FRC */
+#define WM8994_AIF1_LRCLK_FRC_WIDTH                  1  /* AIF1_LRCLK_FRC */
+
+/*
+ * R771 (0x303) - AIF1 BCLK
+ */
+#define WM8994_AIF1_BCLK_DIV_MASK               0x01F0  /* AIF1_BCLK_DIV - [8:4] */
+#define WM8994_AIF1_BCLK_DIV_SHIFT                   4  /* AIF1_BCLK_DIV - [8:4] */
+#define WM8994_AIF1_BCLK_DIV_WIDTH                   5  /* AIF1_BCLK_DIV - [8:4] */
+
+/*
+ * R772 (0x304) - AIF1ADC LRCLK
+ */
+#define WM8994_AIF1ADC_LRCLK_DIR                0x0800  /* AIF1ADC_LRCLK_DIR */
+#define WM8994_AIF1ADC_LRCLK_DIR_MASK           0x0800  /* AIF1ADC_LRCLK_DIR */
+#define WM8994_AIF1ADC_LRCLK_DIR_SHIFT              11  /* AIF1ADC_LRCLK_DIR */
+#define WM8994_AIF1ADC_LRCLK_DIR_WIDTH               1  /* AIF1ADC_LRCLK_DIR */
+#define WM8994_AIF1ADC_RATE_MASK                0x07FF  /* AIF1ADC_RATE - [10:0] */
+#define WM8994_AIF1ADC_RATE_SHIFT                    0  /* AIF1ADC_RATE - [10:0] */
+#define WM8994_AIF1ADC_RATE_WIDTH                   11  /* AIF1ADC_RATE - [10:0] */
+
+/*
+ * R773 (0x305) - AIF1DAC LRCLK
+ */
+#define WM8994_AIF1DAC_LRCLK_DIR                0x0800  /* AIF1DAC_LRCLK_DIR */
+#define WM8994_AIF1DAC_LRCLK_DIR_MASK           0x0800  /* AIF1DAC_LRCLK_DIR */
+#define WM8994_AIF1DAC_LRCLK_DIR_SHIFT              11  /* AIF1DAC_LRCLK_DIR */
+#define WM8994_AIF1DAC_LRCLK_DIR_WIDTH               1  /* AIF1DAC_LRCLK_DIR */
+#define WM8994_AIF1DAC_RATE_MASK                0x07FF  /* AIF1DAC_RATE - [10:0] */
+#define WM8994_AIF1DAC_RATE_SHIFT                    0  /* AIF1DAC_RATE - [10:0] */
+#define WM8994_AIF1DAC_RATE_WIDTH                   11  /* AIF1DAC_RATE - [10:0] */
+
+/*
+ * R774 (0x306) - AIF1DAC Data
+ */
+#define WM8994_AIF1DACL_DAT_INV                 0x0002  /* AIF1DACL_DAT_INV */
+#define WM8994_AIF1DACL_DAT_INV_MASK            0x0002  /* AIF1DACL_DAT_INV */
+#define WM8994_AIF1DACL_DAT_INV_SHIFT                1  /* AIF1DACL_DAT_INV */
+#define WM8994_AIF1DACL_DAT_INV_WIDTH                1  /* AIF1DACL_DAT_INV */
+#define WM8994_AIF1DACR_DAT_INV                 0x0001  /* AIF1DACR_DAT_INV */
+#define WM8994_AIF1DACR_DAT_INV_MASK            0x0001  /* AIF1DACR_DAT_INV */
+#define WM8994_AIF1DACR_DAT_INV_SHIFT                0  /* AIF1DACR_DAT_INV */
+#define WM8994_AIF1DACR_DAT_INV_WIDTH                1  /* AIF1DACR_DAT_INV */
+
+/*
+ * R775 (0x307) - AIF1ADC Data
+ */
+#define WM8994_AIF1ADCL_DAT_INV                 0x0002  /* AIF1ADCL_DAT_INV */
+#define WM8994_AIF1ADCL_DAT_INV_MASK            0x0002  /* AIF1ADCL_DAT_INV */
+#define WM8994_AIF1ADCL_DAT_INV_SHIFT                1  /* AIF1ADCL_DAT_INV */
+#define WM8994_AIF1ADCL_DAT_INV_WIDTH                1  /* AIF1ADCL_DAT_INV */
+#define WM8994_AIF1ADCR_DAT_INV                 0x0001  /* AIF1ADCR_DAT_INV */
+#define WM8994_AIF1ADCR_DAT_INV_MASK            0x0001  /* AIF1ADCR_DAT_INV */
+#define WM8994_AIF1ADCR_DAT_INV_SHIFT                0  /* AIF1ADCR_DAT_INV */
+#define WM8994_AIF1ADCR_DAT_INV_WIDTH                1  /* AIF1ADCR_DAT_INV */
+
+/*
+ * R784 (0x310) - AIF2 Control (1)
+ */
+#define WM8994_AIF2ADCL_SRC                     0x8000  /* AIF2ADCL_SRC */
+#define WM8994_AIF2ADCL_SRC_MASK                0x8000  /* AIF2ADCL_SRC */
+#define WM8994_AIF2ADCL_SRC_SHIFT                   15  /* AIF2ADCL_SRC */
+#define WM8994_AIF2ADCL_SRC_WIDTH                    1  /* AIF2ADCL_SRC */
+#define WM8994_AIF2ADCR_SRC                     0x4000  /* AIF2ADCR_SRC */
+#define WM8994_AIF2ADCR_SRC_MASK                0x4000  /* AIF2ADCR_SRC */
+#define WM8994_AIF2ADCR_SRC_SHIFT                   14  /* AIF2ADCR_SRC */
+#define WM8994_AIF2ADCR_SRC_WIDTH                    1  /* AIF2ADCR_SRC */
+#define WM8994_AIF2ADC_TDM                      0x2000  /* AIF2ADC_TDM */
+#define WM8994_AIF2ADC_TDM_MASK                 0x2000  /* AIF2ADC_TDM */
+#define WM8994_AIF2ADC_TDM_SHIFT                    13  /* AIF2ADC_TDM */
+#define WM8994_AIF2ADC_TDM_WIDTH                     1  /* AIF2ADC_TDM */
+#define WM8994_AIF2ADC_TDM_CHAN                 0x1000  /* AIF2ADC_TDM_CHAN */
+#define WM8994_AIF2ADC_TDM_CHAN_MASK            0x1000  /* AIF2ADC_TDM_CHAN */
+#define WM8994_AIF2ADC_TDM_CHAN_SHIFT               12  /* AIF2ADC_TDM_CHAN */
+#define WM8994_AIF2ADC_TDM_CHAN_WIDTH                1  /* AIF2ADC_TDM_CHAN */
+#define WM8994_AIF2_BCLK_INV                    0x0100  /* AIF2_BCLK_INV */
+#define WM8994_AIF2_BCLK_INV_MASK               0x0100  /* AIF2_BCLK_INV */
+#define WM8994_AIF2_BCLK_INV_SHIFT                   8  /* AIF2_BCLK_INV */
+#define WM8994_AIF2_BCLK_INV_WIDTH                   1  /* AIF2_BCLK_INV */
+#define WM8994_AIF2_LRCLK_INV                   0x0080  /* AIF2_LRCLK_INV */
+#define WM8994_AIF2_LRCLK_INV_MASK              0x0080  /* AIF2_LRCLK_INV */
+#define WM8994_AIF2_LRCLK_INV_SHIFT                  7  /* AIF2_LRCLK_INV */
+#define WM8994_AIF2_LRCLK_INV_WIDTH                  1  /* AIF2_LRCLK_INV */
+#define WM8994_AIF2_WL_MASK                     0x0060  /* AIF2_WL - [6:5] */
+#define WM8994_AIF2_WL_SHIFT                         5  /* AIF2_WL - [6:5] */
+#define WM8994_AIF2_WL_WIDTH                         2  /* AIF2_WL - [6:5] */
+#define WM8994_AIF2_FMT_MASK                    0x0018  /* AIF2_FMT - [4:3] */
+#define WM8994_AIF2_FMT_SHIFT                        3  /* AIF2_FMT - [4:3] */
+#define WM8994_AIF2_FMT_WIDTH                        2  /* AIF2_FMT - [4:3] */
+
+/*
+ * R785 (0x311) - AIF2 Control (2)
+ */
+#define WM8994_AIF2DACL_SRC                     0x8000  /* AIF2DACL_SRC */
+#define WM8994_AIF2DACL_SRC_MASK                0x8000  /* AIF2DACL_SRC */
+#define WM8994_AIF2DACL_SRC_SHIFT                   15  /* AIF2DACL_SRC */
+#define WM8994_AIF2DACL_SRC_WIDTH                    1  /* AIF2DACL_SRC */
+#define WM8994_AIF2DACR_SRC                     0x4000  /* AIF2DACR_SRC */
+#define WM8994_AIF2DACR_SRC_MASK                0x4000  /* AIF2DACR_SRC */
+#define WM8994_AIF2DACR_SRC_SHIFT                   14  /* AIF2DACR_SRC */
+#define WM8994_AIF2DACR_SRC_WIDTH                    1  /* AIF2DACR_SRC */
+#define WM8994_AIF2DAC_TDM                      0x2000  /* AIF2DAC_TDM */
+#define WM8994_AIF2DAC_TDM_MASK                 0x2000  /* AIF2DAC_TDM */
+#define WM8994_AIF2DAC_TDM_SHIFT                    13  /* AIF2DAC_TDM */
+#define WM8994_AIF2DAC_TDM_WIDTH                     1  /* AIF2DAC_TDM */
+#define WM8994_AIF2DAC_TDM_CHAN                 0x1000  /* AIF2DAC_TDM_CHAN */
+#define WM8994_AIF2DAC_TDM_CHAN_MASK            0x1000  /* AIF2DAC_TDM_CHAN */
+#define WM8994_AIF2DAC_TDM_CHAN_SHIFT               12  /* AIF2DAC_TDM_CHAN */
+#define WM8994_AIF2DAC_TDM_CHAN_WIDTH                1  /* AIF2DAC_TDM_CHAN */
+#define WM8994_AIF2DAC_BOOST_MASK               0x0C00  /* AIF2DAC_BOOST - [11:10] */
+#define WM8994_AIF2DAC_BOOST_SHIFT                  10  /* AIF2DAC_BOOST - [11:10] */
+#define WM8994_AIF2DAC_BOOST_WIDTH                   2  /* AIF2DAC_BOOST - [11:10] */
+#define WM8994_AIF2_MONO                        0x0100  /* AIF2_MONO */
+#define WM8994_AIF2_MONO_MASK                   0x0100  /* AIF2_MONO */
+#define WM8994_AIF2_MONO_SHIFT                       8  /* AIF2_MONO */
+#define WM8994_AIF2_MONO_WIDTH                       1  /* AIF2_MONO */
+#define WM8994_AIF2DAC_COMP                     0x0010  /* AIF2DAC_COMP */
+#define WM8994_AIF2DAC_COMP_MASK                0x0010  /* AIF2DAC_COMP */
+#define WM8994_AIF2DAC_COMP_SHIFT                    4  /* AIF2DAC_COMP */
+#define WM8994_AIF2DAC_COMP_WIDTH                    1  /* AIF2DAC_COMP */
+#define WM8994_AIF2DAC_COMPMODE                 0x0008  /* AIF2DAC_COMPMODE */
+#define WM8994_AIF2DAC_COMPMODE_MASK            0x0008  /* AIF2DAC_COMPMODE */
+#define WM8994_AIF2DAC_COMPMODE_SHIFT                3  /* AIF2DAC_COMPMODE */
+#define WM8994_AIF2DAC_COMPMODE_WIDTH                1  /* AIF2DAC_COMPMODE */
+#define WM8994_AIF2ADC_COMP                     0x0004  /* AIF2ADC_COMP */
+#define WM8994_AIF2ADC_COMP_MASK                0x0004  /* AIF2ADC_COMP */
+#define WM8994_AIF2ADC_COMP_SHIFT                    2  /* AIF2ADC_COMP */
+#define WM8994_AIF2ADC_COMP_WIDTH                    1  /* AIF2ADC_COMP */
+#define WM8994_AIF2ADC_COMPMODE                 0x0002  /* AIF2ADC_COMPMODE */
+#define WM8994_AIF2ADC_COMPMODE_MASK            0x0002  /* AIF2ADC_COMPMODE */
+#define WM8994_AIF2ADC_COMPMODE_SHIFT                1  /* AIF2ADC_COMPMODE */
+#define WM8994_AIF2ADC_COMPMODE_WIDTH                1  /* AIF2ADC_COMPMODE */
+#define WM8994_AIF2_LOOPBACK                    0x0001  /* AIF2_LOOPBACK */
+#define WM8994_AIF2_LOOPBACK_MASK               0x0001  /* AIF2_LOOPBACK */
+#define WM8994_AIF2_LOOPBACK_SHIFT                   0  /* AIF2_LOOPBACK */
+#define WM8994_AIF2_LOOPBACK_WIDTH                   1  /* AIF2_LOOPBACK */
+
+/*
+ * R786 (0x312) - AIF2 Master/Slave
+ */
+#define WM8994_AIF2_TRI                         0x8000  /* AIF2_TRI */
+#define WM8994_AIF2_TRI_MASK                    0x8000  /* AIF2_TRI */
+#define WM8994_AIF2_TRI_SHIFT                       15  /* AIF2_TRI */
+#define WM8994_AIF2_TRI_WIDTH                        1  /* AIF2_TRI */
+#define WM8994_AIF2_MSTR                        0x4000  /* AIF2_MSTR */
+#define WM8994_AIF2_MSTR_MASK                   0x4000  /* AIF2_MSTR */
+#define WM8994_AIF2_MSTR_SHIFT                      14  /* AIF2_MSTR */
+#define WM8994_AIF2_MSTR_WIDTH                       1  /* AIF2_MSTR */
+#define WM8994_AIF2_CLK_FRC                     0x2000  /* AIF2_CLK_FRC */
+#define WM8994_AIF2_CLK_FRC_MASK                0x2000  /* AIF2_CLK_FRC */
+#define WM8994_AIF2_CLK_FRC_SHIFT                   13  /* AIF2_CLK_FRC */
+#define WM8994_AIF2_CLK_FRC_WIDTH                    1  /* AIF2_CLK_FRC */
+#define WM8994_AIF2_LRCLK_FRC                   0x1000  /* AIF2_LRCLK_FRC */
+#define WM8994_AIF2_LRCLK_FRC_MASK              0x1000  /* AIF2_LRCLK_FRC */
+#define WM8994_AIF2_LRCLK_FRC_SHIFT                 12  /* AIF2_LRCLK_FRC */
+#define WM8994_AIF2_LRCLK_FRC_WIDTH                  1  /* AIF2_LRCLK_FRC */
+
+/*
+ * R787 (0x313) - AIF2 BCLK
+ */
+#define WM8994_AIF2_BCLK_DIV_MASK               0x01F0  /* AIF2_BCLK_DIV - [8:4] */
+#define WM8994_AIF2_BCLK_DIV_SHIFT                   4  /* AIF2_BCLK_DIV - [8:4] */
+#define WM8994_AIF2_BCLK_DIV_WIDTH                   5  /* AIF2_BCLK_DIV - [8:4] */
+
+/*
+ * R788 (0x314) - AIF2ADC LRCLK
+ */
+#define WM8994_AIF2ADC_LRCLK_DIR                0x0800  /* AIF2ADC_LRCLK_DIR */
+#define WM8994_AIF2ADC_LRCLK_DIR_MASK           0x0800  /* AIF2ADC_LRCLK_DIR */
+#define WM8994_AIF2ADC_LRCLK_DIR_SHIFT              11  /* AIF2ADC_LRCLK_DIR */
+#define WM8994_AIF2ADC_LRCLK_DIR_WIDTH               1  /* AIF2ADC_LRCLK_DIR */
+#define WM8994_AIF2ADC_RATE_MASK                0x07FF  /* AIF2ADC_RATE - [10:0] */
+#define WM8994_AIF2ADC_RATE_SHIFT                    0  /* AIF2ADC_RATE - [10:0] */
+#define WM8994_AIF2ADC_RATE_WIDTH                   11  /* AIF2ADC_RATE - [10:0] */
+
+/*
+ * R789 (0x315) - AIF2DAC LRCLK
+ */
+#define WM8994_AIF2DAC_LRCLK_DIR                0x0800  /* AIF2DAC_LRCLK_DIR */
+#define WM8994_AIF2DAC_LRCLK_DIR_MASK           0x0800  /* AIF2DAC_LRCLK_DIR */
+#define WM8994_AIF2DAC_LRCLK_DIR_SHIFT              11  /* AIF2DAC_LRCLK_DIR */
+#define WM8994_AIF2DAC_LRCLK_DIR_WIDTH               1  /* AIF2DAC_LRCLK_DIR */
+#define WM8994_AIF2DAC_RATE_MASK                0x07FF  /* AIF2DAC_RATE - [10:0] */
+#define WM8994_AIF2DAC_RATE_SHIFT                    0  /* AIF2DAC_RATE - [10:0] */
+#define WM8994_AIF2DAC_RATE_WIDTH                   11  /* AIF2DAC_RATE - [10:0] */
+
+/*
+ * R790 (0x316) - AIF2DAC Data
+ */
+#define WM8994_AIF2DACL_DAT_INV                 0x0002  /* AIF2DACL_DAT_INV */
+#define WM8994_AIF2DACL_DAT_INV_MASK            0x0002  /* AIF2DACL_DAT_INV */
+#define WM8994_AIF2DACL_DAT_INV_SHIFT                1  /* AIF2DACL_DAT_INV */
+#define WM8994_AIF2DACL_DAT_INV_WIDTH                1  /* AIF2DACL_DAT_INV */
+#define WM8994_AIF2DACR_DAT_INV                 0x0001  /* AIF2DACR_DAT_INV */
+#define WM8994_AIF2DACR_DAT_INV_MASK            0x0001  /* AIF2DACR_DAT_INV */
+#define WM8994_AIF2DACR_DAT_INV_SHIFT                0  /* AIF2DACR_DAT_INV */
+#define WM8994_AIF2DACR_DAT_INV_WIDTH                1  /* AIF2DACR_DAT_INV */
+
+/*
+ * R791 (0x317) - AIF2ADC Data
+ */
+#define WM8994_AIF2ADCL_DAT_INV                 0x0002  /* AIF2ADCL_DAT_INV */
+#define WM8994_AIF2ADCL_DAT_INV_MASK            0x0002  /* AIF2ADCL_DAT_INV */
+#define WM8994_AIF2ADCL_DAT_INV_SHIFT                1  /* AIF2ADCL_DAT_INV */
+#define WM8994_AIF2ADCL_DAT_INV_WIDTH                1  /* AIF2ADCL_DAT_INV */
+#define WM8994_AIF2ADCR_DAT_INV                 0x0001  /* AIF2ADCR_DAT_INV */
+#define WM8994_AIF2ADCR_DAT_INV_MASK            0x0001  /* AIF2ADCR_DAT_INV */
+#define WM8994_AIF2ADCR_DAT_INV_SHIFT                0  /* AIF2ADCR_DAT_INV */
+#define WM8994_AIF2ADCR_DAT_INV_WIDTH                1  /* AIF2ADCR_DAT_INV */
+
+/*
+ * R1024 (0x400) - AIF1 ADC1 Left Volume
+ */
+#define WM8994_AIF1ADC1_VU                      0x0100  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_MASK                 0x0100  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_SHIFT                     8  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_WIDTH                     1  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1L_VOL_MASK               0x00FF  /* AIF1ADC1L_VOL - [7:0] */
+#define WM8994_AIF1ADC1L_VOL_SHIFT                   0  /* AIF1ADC1L_VOL - [7:0] */
+#define WM8994_AIF1ADC1L_VOL_WIDTH                   8  /* AIF1ADC1L_VOL - [7:0] */
+
+/*
+ * R1025 (0x401) - AIF1 ADC1 Right Volume
+ */
+#define WM8994_AIF1ADC1_VU                      0x0100  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_MASK                 0x0100  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_SHIFT                     8  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_WIDTH                     1  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1R_VOL_MASK               0x00FF  /* AIF1ADC1R_VOL - [7:0] */
+#define WM8994_AIF1ADC1R_VOL_SHIFT                   0  /* AIF1ADC1R_VOL - [7:0] */
+#define WM8994_AIF1ADC1R_VOL_WIDTH                   8  /* AIF1ADC1R_VOL - [7:0] */
+
+/*
+ * R1026 (0x402) - AIF1 DAC1 Left Volume
+ */
+#define WM8994_AIF1DAC1_VU                      0x0100  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_MASK                 0x0100  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_SHIFT                     8  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_WIDTH                     1  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1L_VOL_MASK               0x00FF  /* AIF1DAC1L_VOL - [7:0] */
+#define WM8994_AIF1DAC1L_VOL_SHIFT                   0  /* AIF1DAC1L_VOL - [7:0] */
+#define WM8994_AIF1DAC1L_VOL_WIDTH                   8  /* AIF1DAC1L_VOL - [7:0] */
+
+/*
+ * R1027 (0x403) - AIF1 DAC1 Right Volume
+ */
+#define WM8994_AIF1DAC1_VU                      0x0100  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_MASK                 0x0100  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_SHIFT                     8  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_WIDTH                     1  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1R_VOL_MASK               0x00FF  /* AIF1DAC1R_VOL - [7:0] */
+#define WM8994_AIF1DAC1R_VOL_SHIFT                   0  /* AIF1DAC1R_VOL - [7:0] */
+#define WM8994_AIF1DAC1R_VOL_WIDTH                   8  /* AIF1DAC1R_VOL - [7:0] */
+
+/*
+ * R1028 (0x404) - AIF1 ADC2 Left Volume
+ */
+#define WM8994_AIF1ADC2_VU                      0x0100  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_MASK                 0x0100  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_SHIFT                     8  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_WIDTH                     1  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2L_VOL_MASK               0x00FF  /* AIF1ADC2L_VOL - [7:0] */
+#define WM8994_AIF1ADC2L_VOL_SHIFT                   0  /* AIF1ADC2L_VOL - [7:0] */
+#define WM8994_AIF1ADC2L_VOL_WIDTH                   8  /* AIF1ADC2L_VOL - [7:0] */
+
+/*
+ * R1029 (0x405) - AIF1 ADC2 Right Volume
+ */
+#define WM8994_AIF1ADC2_VU                      0x0100  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_MASK                 0x0100  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_SHIFT                     8  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_WIDTH                     1  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2R_VOL_MASK               0x00FF  /* AIF1ADC2R_VOL - [7:0] */
+#define WM8994_AIF1ADC2R_VOL_SHIFT                   0  /* AIF1ADC2R_VOL - [7:0] */
+#define WM8994_AIF1ADC2R_VOL_WIDTH                   8  /* AIF1ADC2R_VOL - [7:0] */
+
+/*
+ * R1030 (0x406) - AIF1 DAC2 Left Volume
+ */
+#define WM8994_AIF1DAC2_VU                      0x0100  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_MASK                 0x0100  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_SHIFT                     8  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_WIDTH                     1  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2L_VOL_MASK               0x00FF  /* AIF1DAC2L_VOL - [7:0] */
+#define WM8994_AIF1DAC2L_VOL_SHIFT                   0  /* AIF1DAC2L_VOL - [7:0] */
+#define WM8994_AIF1DAC2L_VOL_WIDTH                   8  /* AIF1DAC2L_VOL - [7:0] */
+
+/*
+ * R1031 (0x407) - AIF1 DAC2 Right Volume
+ */
+#define WM8994_AIF1DAC2_VU                      0x0100  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_MASK                 0x0100  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_SHIFT                     8  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_WIDTH                     1  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2R_VOL_MASK               0x00FF  /* AIF1DAC2R_VOL - [7:0] */
+#define WM8994_AIF1DAC2R_VOL_SHIFT                   0  /* AIF1DAC2R_VOL - [7:0] */
+#define WM8994_AIF1DAC2R_VOL_WIDTH                   8  /* AIF1DAC2R_VOL - [7:0] */
+
+/*
+ * R1040 (0x410) - AIF1 ADC1 Filters
+ */
+#define WM8994_AIF1ADC_4FS                      0x8000  /* AIF1ADC_4FS */
+#define WM8994_AIF1ADC_4FS_MASK                 0x8000  /* AIF1ADC_4FS */
+#define WM8994_AIF1ADC_4FS_SHIFT                    15  /* AIF1ADC_4FS */
+#define WM8994_AIF1ADC_4FS_WIDTH                     1  /* AIF1ADC_4FS */
+#define WM8994_AIF1ADC1_HPF_CUT_MASK            0x6000  /* AIF1ADC1_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC1_HPF_CUT_SHIFT               13  /* AIF1ADC1_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC1_HPF_CUT_WIDTH                2  /* AIF1ADC1_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC1L_HPF                    0x1000  /* AIF1ADC1L_HPF */
+#define WM8994_AIF1ADC1L_HPF_MASK               0x1000  /* AIF1ADC1L_HPF */
+#define WM8994_AIF1ADC1L_HPF_SHIFT                  12  /* AIF1ADC1L_HPF */
+#define WM8994_AIF1ADC1L_HPF_WIDTH                   1  /* AIF1ADC1L_HPF */
+#define WM8994_AIF1ADC1R_HPF                    0x0800  /* AIF1ADC1R_HPF */
+#define WM8994_AIF1ADC1R_HPF_MASK               0x0800  /* AIF1ADC1R_HPF */
+#define WM8994_AIF1ADC1R_HPF_SHIFT                  11  /* AIF1ADC1R_HPF */
+#define WM8994_AIF1ADC1R_HPF_WIDTH                   1  /* AIF1ADC1R_HPF */
+
+/*
+ * R1041 (0x411) - AIF1 ADC2 Filters
+ */
+#define WM8994_AIF1ADC2_HPF_CUT_MASK            0x6000  /* AIF1ADC2_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC2_HPF_CUT_SHIFT               13  /* AIF1ADC2_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC2_HPF_CUT_WIDTH                2  /* AIF1ADC2_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC2L_HPF                    0x1000  /* AIF1ADC2L_HPF */
+#define WM8994_AIF1ADC2L_HPF_MASK               0x1000  /* AIF1ADC2L_HPF */
+#define WM8994_AIF1ADC2L_HPF_SHIFT                  12  /* AIF1ADC2L_HPF */
+#define WM8994_AIF1ADC2L_HPF_WIDTH                   1  /* AIF1ADC2L_HPF */
+#define WM8994_AIF1ADC2R_HPF                    0x0800  /* AIF1ADC2R_HPF */
+#define WM8994_AIF1ADC2R_HPF_MASK               0x0800  /* AIF1ADC2R_HPF */
+#define WM8994_AIF1ADC2R_HPF_SHIFT                  11  /* AIF1ADC2R_HPF */
+#define WM8994_AIF1ADC2R_HPF_WIDTH                   1  /* AIF1ADC2R_HPF */
+
+/*
+ * R1056 (0x420) - AIF1 DAC1 Filters (1)
+ */
+#define WM8994_AIF1DAC1_MUTE                    0x0200  /* AIF1DAC1_MUTE */
+#define WM8994_AIF1DAC1_MUTE_MASK               0x0200  /* AIF1DAC1_MUTE */
+#define WM8994_AIF1DAC1_MUTE_SHIFT                   9  /* AIF1DAC1_MUTE */
+#define WM8994_AIF1DAC1_MUTE_WIDTH                   1  /* AIF1DAC1_MUTE */
+#define WM8994_AIF1DAC1_MONO                    0x0080  /* AIF1DAC1_MONO */
+#define WM8994_AIF1DAC1_MONO_MASK               0x0080  /* AIF1DAC1_MONO */
+#define WM8994_AIF1DAC1_MONO_SHIFT                   7  /* AIF1DAC1_MONO */
+#define WM8994_AIF1DAC1_MONO_WIDTH                   1  /* AIF1DAC1_MONO */
+#define WM8994_AIF1DAC1_MUTERATE                0x0020  /* AIF1DAC1_MUTERATE */
+#define WM8994_AIF1DAC1_MUTERATE_MASK           0x0020  /* AIF1DAC1_MUTERATE */
+#define WM8994_AIF1DAC1_MUTERATE_SHIFT               5  /* AIF1DAC1_MUTERATE */
+#define WM8994_AIF1DAC1_MUTERATE_WIDTH               1  /* AIF1DAC1_MUTERATE */
+#define WM8994_AIF1DAC1_UNMUTE_RAMP             0x0010  /* AIF1DAC1_UNMUTE_RAMP */
+#define WM8994_AIF1DAC1_UNMUTE_RAMP_MASK        0x0010  /* AIF1DAC1_UNMUTE_RAMP */
+#define WM8994_AIF1DAC1_UNMUTE_RAMP_SHIFT            4  /* AIF1DAC1_UNMUTE_RAMP */
+#define WM8994_AIF1DAC1_UNMUTE_RAMP_WIDTH            1  /* AIF1DAC1_UNMUTE_RAMP */
+#define WM8994_AIF1DAC1_DEEMP_MASK              0x0006  /* AIF1DAC1_DEEMP - [2:1] */
+#define WM8994_AIF1DAC1_DEEMP_SHIFT                  1  /* AIF1DAC1_DEEMP - [2:1] */
+#define WM8994_AIF1DAC1_DEEMP_WIDTH                  2  /* AIF1DAC1_DEEMP - [2:1] */
+
+/*
+ * R1057 (0x421) - AIF1 DAC1 Filters (2)
+ */
+#define WM8994_AIF1DAC1_3D_GAIN_MASK            0x3E00  /* AIF1DAC1_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC1_3D_GAIN_SHIFT                9  /* AIF1DAC1_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC1_3D_GAIN_WIDTH                5  /* AIF1DAC1_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC1_3D_ENA                  0x0100  /* AIF1DAC1_3D_ENA */
+#define WM8994_AIF1DAC1_3D_ENA_MASK             0x0100  /* AIF1DAC1_3D_ENA */
+#define WM8994_AIF1DAC1_3D_ENA_SHIFT                 8  /* AIF1DAC1_3D_ENA */
+#define WM8994_AIF1DAC1_3D_ENA_WIDTH                 1  /* AIF1DAC1_3D_ENA */
+
+/*
+ * R1058 (0x422) - AIF1 DAC2 Filters (1)
+ */
+#define WM8994_AIF1DAC2_MUTE                    0x0200  /* AIF1DAC2_MUTE */
+#define WM8994_AIF1DAC2_MUTE_MASK               0x0200  /* AIF1DAC2_MUTE */
+#define WM8994_AIF1DAC2_MUTE_SHIFT                   9  /* AIF1DAC2_MUTE */
+#define WM8994_AIF1DAC2_MUTE_WIDTH                   1  /* AIF1DAC2_MUTE */
+#define WM8994_AIF1DAC2_MONO                    0x0080  /* AIF1DAC2_MONO */
+#define WM8994_AIF1DAC2_MONO_MASK               0x0080  /* AIF1DAC2_MONO */
+#define WM8994_AIF1DAC2_MONO_SHIFT                   7  /* AIF1DAC2_MONO */
+#define WM8994_AIF1DAC2_MONO_WIDTH                   1  /* AIF1DAC2_MONO */
+#define WM8994_AIF1DAC2_MUTERATE                0x0020  /* AIF1DAC2_MUTERATE */
+#define WM8994_AIF1DAC2_MUTERATE_MASK           0x0020  /* AIF1DAC2_MUTERATE */
+#define WM8994_AIF1DAC2_MUTERATE_SHIFT               5  /* AIF1DAC2_MUTERATE */
+#define WM8994_AIF1DAC2_MUTERATE_WIDTH               1  /* AIF1DAC2_MUTERATE */
+#define WM8994_AIF1DAC2_UNMUTE_RAMP             0x0010  /* AIF1DAC2_UNMUTE_RAMP */
+#define WM8994_AIF1DAC2_UNMUTE_RAMP_MASK        0x0010  /* AIF1DAC2_UNMUTE_RAMP */
+#define WM8994_AIF1DAC2_UNMUTE_RAMP_SHIFT            4  /* AIF1DAC2_UNMUTE_RAMP */
+#define WM8994_AIF1DAC2_UNMUTE_RAMP_WIDTH            1  /* AIF1DAC2_UNMUTE_RAMP */
+#define WM8994_AIF1DAC2_DEEMP_MASK              0x0006  /* AIF1DAC2_DEEMP - [2:1] */
+#define WM8994_AIF1DAC2_DEEMP_SHIFT                  1  /* AIF1DAC2_DEEMP - [2:1] */
+#define WM8994_AIF1DAC2_DEEMP_WIDTH                  2  /* AIF1DAC2_DEEMP - [2:1] */
+
+/*
+ * R1059 (0x423) - AIF1 DAC2 Filters (2)
+ */
+#define WM8994_AIF1DAC2_3D_GAIN_MASK            0x3E00  /* AIF1DAC2_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC2_3D_GAIN_SHIFT                9  /* AIF1DAC2_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC2_3D_GAIN_WIDTH                5  /* AIF1DAC2_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC2_3D_ENA                  0x0100  /* AIF1DAC2_3D_ENA */
+#define WM8994_AIF1DAC2_3D_ENA_MASK             0x0100  /* AIF1DAC2_3D_ENA */
+#define WM8994_AIF1DAC2_3D_ENA_SHIFT                 8  /* AIF1DAC2_3D_ENA */
+#define WM8994_AIF1DAC2_3D_ENA_WIDTH                 1  /* AIF1DAC2_3D_ENA */
+
+/*
+ * R1088 (0x440) - AIF1 DRC1 (1)
+ */
+#define WM8994_AIF1DRC1_SIG_DET_RMS_MASK        0xF800  /* AIF1DRC1_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC1_SIG_DET_RMS_SHIFT           11  /* AIF1DRC1_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC1_SIG_DET_RMS_WIDTH            5  /* AIF1DRC1_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC1_SIG_DET_PK_MASK         0x0600  /* AIF1DRC1_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC1_SIG_DET_PK_SHIFT             9  /* AIF1DRC1_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC1_SIG_DET_PK_WIDTH             2  /* AIF1DRC1_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC1_NG_ENA                  0x0100  /* AIF1DRC1_NG_ENA */
+#define WM8994_AIF1DRC1_NG_ENA_MASK             0x0100  /* AIF1DRC1_NG_ENA */
+#define WM8994_AIF1DRC1_NG_ENA_SHIFT                 8  /* AIF1DRC1_NG_ENA */
+#define WM8994_AIF1DRC1_NG_ENA_WIDTH                 1  /* AIF1DRC1_NG_ENA */
+#define WM8994_AIF1DRC1_SIG_DET_MODE            0x0080  /* AIF1DRC1_SIG_DET_MODE */
+#define WM8994_AIF1DRC1_SIG_DET_MODE_MASK       0x0080  /* AIF1DRC1_SIG_DET_MODE */
+#define WM8994_AIF1DRC1_SIG_DET_MODE_SHIFT           7  /* AIF1DRC1_SIG_DET_MODE */
+#define WM8994_AIF1DRC1_SIG_DET_MODE_WIDTH           1  /* AIF1DRC1_SIG_DET_MODE */
+#define WM8994_AIF1DRC1_SIG_DET                 0x0040  /* AIF1DRC1_SIG_DET */
+#define WM8994_AIF1DRC1_SIG_DET_MASK            0x0040  /* AIF1DRC1_SIG_DET */
+#define WM8994_AIF1DRC1_SIG_DET_SHIFT                6  /* AIF1DRC1_SIG_DET */
+#define WM8994_AIF1DRC1_SIG_DET_WIDTH                1  /* AIF1DRC1_SIG_DET */
+#define WM8994_AIF1DRC1_KNEE2_OP_ENA            0x0020  /* AIF1DRC1_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC1_KNEE2_OP_ENA_MASK       0x0020  /* AIF1DRC1_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC1_KNEE2_OP_ENA_SHIFT           5  /* AIF1DRC1_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC1_KNEE2_OP_ENA_WIDTH           1  /* AIF1DRC1_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC1_QR                      0x0010  /* AIF1DRC1_QR */
+#define WM8994_AIF1DRC1_QR_MASK                 0x0010  /* AIF1DRC1_QR */
+#define WM8994_AIF1DRC1_QR_SHIFT                     4  /* AIF1DRC1_QR */
+#define WM8994_AIF1DRC1_QR_WIDTH                     1  /* AIF1DRC1_QR */
+#define WM8994_AIF1DRC1_ANTICLIP                0x0008  /* AIF1DRC1_ANTICLIP */
+#define WM8994_AIF1DRC1_ANTICLIP_MASK           0x0008  /* AIF1DRC1_ANTICLIP */
+#define WM8994_AIF1DRC1_ANTICLIP_SHIFT               3  /* AIF1DRC1_ANTICLIP */
+#define WM8994_AIF1DRC1_ANTICLIP_WIDTH               1  /* AIF1DRC1_ANTICLIP */
+#define WM8994_AIF1DAC1_DRC_ENA                 0x0004  /* AIF1DAC1_DRC_ENA */
+#define WM8994_AIF1DAC1_DRC_ENA_MASK            0x0004  /* AIF1DAC1_DRC_ENA */
+#define WM8994_AIF1DAC1_DRC_ENA_SHIFT                2  /* AIF1DAC1_DRC_ENA */
+#define WM8994_AIF1DAC1_DRC_ENA_WIDTH                1  /* AIF1DAC1_DRC_ENA */
+#define WM8994_AIF1ADC1L_DRC_ENA                0x0002  /* AIF1ADC1L_DRC_ENA */
+#define WM8994_AIF1ADC1L_DRC_ENA_MASK           0x0002  /* AIF1ADC1L_DRC_ENA */
+#define WM8994_AIF1ADC1L_DRC_ENA_SHIFT               1  /* AIF1ADC1L_DRC_ENA */
+#define WM8994_AIF1ADC1L_DRC_ENA_WIDTH               1  /* AIF1ADC1L_DRC_ENA */
+#define WM8994_AIF1ADC1R_DRC_ENA                0x0001  /* AIF1ADC1R_DRC_ENA */
+#define WM8994_AIF1ADC1R_DRC_ENA_MASK           0x0001  /* AIF1ADC1R_DRC_ENA */
+#define WM8994_AIF1ADC1R_DRC_ENA_SHIFT               0  /* AIF1ADC1R_DRC_ENA */
+#define WM8994_AIF1ADC1R_DRC_ENA_WIDTH               1  /* AIF1ADC1R_DRC_ENA */
+
+/*
+ * R1089 (0x441) - AIF1 DRC1 (2)
+ */
+#define WM8994_AIF1DRC1_ATK_MASK                0x1E00  /* AIF1DRC1_ATK - [12:9] */
+#define WM8994_AIF1DRC1_ATK_SHIFT                    9  /* AIF1DRC1_ATK - [12:9] */
+#define WM8994_AIF1DRC1_ATK_WIDTH                    4  /* AIF1DRC1_ATK - [12:9] */
+#define WM8994_AIF1DRC1_DCY_MASK                0x01E0  /* AIF1DRC1_DCY - [8:5] */
+#define WM8994_AIF1DRC1_DCY_SHIFT                    5  /* AIF1DRC1_DCY - [8:5] */
+#define WM8994_AIF1DRC1_DCY_WIDTH                    4  /* AIF1DRC1_DCY - [8:5] */
+#define WM8994_AIF1DRC1_MINGAIN_MASK            0x001C  /* AIF1DRC1_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC1_MINGAIN_SHIFT                2  /* AIF1DRC1_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC1_MINGAIN_WIDTH                3  /* AIF1DRC1_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC1_MAXGAIN_MASK            0x0003  /* AIF1DRC1_MAXGAIN - [1:0] */
+#define WM8994_AIF1DRC1_MAXGAIN_SHIFT                0  /* AIF1DRC1_MAXGAIN - [1:0] */
+#define WM8994_AIF1DRC1_MAXGAIN_WIDTH                2  /* AIF1DRC1_MAXGAIN - [1:0] */
+
+/*
+ * R1090 (0x442) - AIF1 DRC1 (3)
+ */
+#define WM8994_AIF1DRC1_NG_MINGAIN_MASK         0xF000  /* AIF1DRC1_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC1_NG_MINGAIN_SHIFT            12  /* AIF1DRC1_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC1_NG_MINGAIN_WIDTH             4  /* AIF1DRC1_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC1_NG_EXP_MASK             0x0C00  /* AIF1DRC1_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC1_NG_EXP_SHIFT                10  /* AIF1DRC1_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC1_NG_EXP_WIDTH                 2  /* AIF1DRC1_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC1_QR_THR_MASK             0x0300  /* AIF1DRC1_QR_THR - [9:8] */
+#define WM8994_AIF1DRC1_QR_THR_SHIFT                 8  /* AIF1DRC1_QR_THR - [9:8] */
+#define WM8994_AIF1DRC1_QR_THR_WIDTH                 2  /* AIF1DRC1_QR_THR - [9:8] */
+#define WM8994_AIF1DRC1_QR_DCY_MASK             0x00C0  /* AIF1DRC1_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC1_QR_DCY_SHIFT                 6  /* AIF1DRC1_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC1_QR_DCY_WIDTH                 2  /* AIF1DRC1_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC1_HI_COMP_MASK            0x0038  /* AIF1DRC1_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC1_HI_COMP_SHIFT                3  /* AIF1DRC1_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC1_HI_COMP_WIDTH                3  /* AIF1DRC1_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC1_LO_COMP_MASK            0x0007  /* AIF1DRC1_LO_COMP - [2:0] */
+#define WM8994_AIF1DRC1_LO_COMP_SHIFT                0  /* AIF1DRC1_LO_COMP - [2:0] */
+#define WM8994_AIF1DRC1_LO_COMP_WIDTH                3  /* AIF1DRC1_LO_COMP - [2:0] */
+
+/*
+ * R1091 (0x443) - AIF1 DRC1 (4)
+ */
+#define WM8994_AIF1DRC1_KNEE_IP_MASK            0x07E0  /* AIF1DRC1_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC1_KNEE_IP_SHIFT                5  /* AIF1DRC1_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC1_KNEE_IP_WIDTH                6  /* AIF1DRC1_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC1_KNEE_OP_MASK            0x001F  /* AIF1DRC1_KNEE_OP - [4:0] */
+#define WM8994_AIF1DRC1_KNEE_OP_SHIFT                0  /* AIF1DRC1_KNEE_OP - [4:0] */
+#define WM8994_AIF1DRC1_KNEE_OP_WIDTH                5  /* AIF1DRC1_KNEE_OP - [4:0] */
+
+/*
+ * R1092 (0x444) - AIF1 DRC1 (5)
+ */
+#define WM8994_AIF1DRC1_KNEE2_IP_MASK           0x03E0  /* AIF1DRC1_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC1_KNEE2_IP_SHIFT               5  /* AIF1DRC1_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC1_KNEE2_IP_WIDTH               5  /* AIF1DRC1_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC1_KNEE2_OP_MASK           0x001F  /* AIF1DRC1_KNEE2_OP - [4:0] */
+#define WM8994_AIF1DRC1_KNEE2_OP_SHIFT               0  /* AIF1DRC1_KNEE2_OP - [4:0] */
+#define WM8994_AIF1DRC1_KNEE2_OP_WIDTH               5  /* AIF1DRC1_KNEE2_OP - [4:0] */
+
+/*
+ * R1104 (0x450) - AIF1 DRC2 (1)
+ */
+#define WM8994_AIF1DRC2_SIG_DET_RMS_MASK        0xF800  /* AIF1DRC2_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC2_SIG_DET_RMS_SHIFT           11  /* AIF1DRC2_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC2_SIG_DET_RMS_WIDTH            5  /* AIF1DRC2_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC2_SIG_DET_PK_MASK         0x0600  /* AIF1DRC2_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC2_SIG_DET_PK_SHIFT             9  /* AIF1DRC2_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC2_SIG_DET_PK_WIDTH             2  /* AIF1DRC2_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC2_NG_ENA                  0x0100  /* AIF1DRC2_NG_ENA */
+#define WM8994_AIF1DRC2_NG_ENA_MASK             0x0100  /* AIF1DRC2_NG_ENA */
+#define WM8994_AIF1DRC2_NG_ENA_SHIFT                 8  /* AIF1DRC2_NG_ENA */
+#define WM8994_AIF1DRC2_NG_ENA_WIDTH                 1  /* AIF1DRC2_NG_ENA */
+#define WM8994_AIF1DRC2_SIG_DET_MODE            0x0080  /* AIF1DRC2_SIG_DET_MODE */
+#define WM8994_AIF1DRC2_SIG_DET_MODE_MASK       0x0080  /* AIF1DRC2_SIG_DET_MODE */
+#define WM8994_AIF1DRC2_SIG_DET_MODE_SHIFT           7  /* AIF1DRC2_SIG_DET_MODE */
+#define WM8994_AIF1DRC2_SIG_DET_MODE_WIDTH           1  /* AIF1DRC2_SIG_DET_MODE */
+#define WM8994_AIF1DRC2_SIG_DET                 0x0040  /* AIF1DRC2_SIG_DET */
+#define WM8994_AIF1DRC2_SIG_DET_MASK            0x0040  /* AIF1DRC2_SIG_DET */
+#define WM8994_AIF1DRC2_SIG_DET_SHIFT                6  /* AIF1DRC2_SIG_DET */
+#define WM8994_AIF1DRC2_SIG_DET_WIDTH                1  /* AIF1DRC2_SIG_DET */
+#define WM8994_AIF1DRC2_KNEE2_OP_ENA            0x0020  /* AIF1DRC2_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC2_KNEE2_OP_ENA_MASK       0x0020  /* AIF1DRC2_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC2_KNEE2_OP_ENA_SHIFT           5  /* AIF1DRC2_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC2_KNEE2_OP_ENA_WIDTH           1  /* AIF1DRC2_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC2_QR                      0x0010  /* AIF1DRC2_QR */
+#define WM8994_AIF1DRC2_QR_MASK                 0x0010  /* AIF1DRC2_QR */
+#define WM8994_AIF1DRC2_QR_SHIFT                     4  /* AIF1DRC2_QR */
+#define WM8994_AIF1DRC2_QR_WIDTH                     1  /* AIF1DRC2_QR */
+#define WM8994_AIF1DRC2_ANTICLIP                0x0008  /* AIF1DRC2_ANTICLIP */
+#define WM8994_AIF1DRC2_ANTICLIP_MASK           0x0008  /* AIF1DRC2_ANTICLIP */
+#define WM8994_AIF1DRC2_ANTICLIP_SHIFT               3  /* AIF1DRC2_ANTICLIP */
+#define WM8994_AIF1DRC2_ANTICLIP_WIDTH               1  /* AIF1DRC2_ANTICLIP */
+#define WM8994_AIF1DAC2_DRC_ENA                 0x0004  /* AIF1DAC2_DRC_ENA */
+#define WM8994_AIF1DAC2_DRC_ENA_MASK            0x0004  /* AIF1DAC2_DRC_ENA */
+#define WM8994_AIF1DAC2_DRC_ENA_SHIFT                2  /* AIF1DAC2_DRC_ENA */
+#define WM8994_AIF1DAC2_DRC_ENA_WIDTH                1  /* AIF1DAC2_DRC_ENA */
+#define WM8994_AIF1ADC2L_DRC_ENA                0x0002  /* AIF1ADC2L_DRC_ENA */
+#define WM8994_AIF1ADC2L_DRC_ENA_MASK           0x0002  /* AIF1ADC2L_DRC_ENA */
+#define WM8994_AIF1ADC2L_DRC_ENA_SHIFT               1  /* AIF1ADC2L_DRC_ENA */
+#define WM8994_AIF1ADC2L_DRC_ENA_WIDTH               1  /* AIF1ADC2L_DRC_ENA */
+#define WM8994_AIF1ADC2R_DRC_ENA                0x0001  /* AIF1ADC2R_DRC_ENA */
+#define WM8994_AIF1ADC2R_DRC_ENA_MASK           0x0001  /* AIF1ADC2R_DRC_ENA */
+#define WM8994_AIF1ADC2R_DRC_ENA_SHIFT               0  /* AIF1ADC2R_DRC_ENA */
+#define WM8994_AIF1ADC2R_DRC_ENA_WIDTH               1  /* AIF1ADC2R_DRC_ENA */
+
+/*
+ * R1105 (0x451) - AIF1 DRC2 (2)
+ */
+#define WM8994_AIF1DRC2_ATK_MASK                0x1E00  /* AIF1DRC2_ATK - [12:9] */
+#define WM8994_AIF1DRC2_ATK_SHIFT                    9  /* AIF1DRC2_ATK - [12:9] */
+#define WM8994_AIF1DRC2_ATK_WIDTH                    4  /* AIF1DRC2_ATK - [12:9] */
+#define WM8994_AIF1DRC2_DCY_MASK                0x01E0  /* AIF1DRC2_DCY - [8:5] */
+#define WM8994_AIF1DRC2_DCY_SHIFT                    5  /* AIF1DRC2_DCY - [8:5] */
+#define WM8994_AIF1DRC2_DCY_WIDTH                    4  /* AIF1DRC2_DCY - [8:5] */
+#define WM8994_AIF1DRC2_MINGAIN_MASK            0x001C  /* AIF1DRC2_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC2_MINGAIN_SHIFT                2  /* AIF1DRC2_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC2_MINGAIN_WIDTH                3  /* AIF1DRC2_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC2_MAXGAIN_MASK            0x0003  /* AIF1DRC2_MAXGAIN - [1:0] */
+#define WM8994_AIF1DRC2_MAXGAIN_SHIFT                0  /* AIF1DRC2_MAXGAIN - [1:0] */
+#define WM8994_AIF1DRC2_MAXGAIN_WIDTH                2  /* AIF1DRC2_MAXGAIN - [1:0] */
+
+/*
+ * R1106 (0x452) - AIF1 DRC2 (3)
+ */
+#define WM8994_AIF1DRC2_NG_MINGAIN_MASK         0xF000  /* AIF1DRC2_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC2_NG_MINGAIN_SHIFT            12  /* AIF1DRC2_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC2_NG_MINGAIN_WIDTH             4  /* AIF1DRC2_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC2_NG_EXP_MASK             0x0C00  /* AIF1DRC2_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC2_NG_EXP_SHIFT                10  /* AIF1DRC2_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC2_NG_EXP_WIDTH                 2  /* AIF1DRC2_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC2_QR_THR_MASK             0x0300  /* AIF1DRC2_QR_THR - [9:8] */
+#define WM8994_AIF1DRC2_QR_THR_SHIFT                 8  /* AIF1DRC2_QR_THR - [9:8] */
+#define WM8994_AIF1DRC2_QR_THR_WIDTH                 2  /* AIF1DRC2_QR_THR - [9:8] */
+#define WM8994_AIF1DRC2_QR_DCY_MASK             0x00C0  /* AIF1DRC2_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC2_QR_DCY_SHIFT                 6  /* AIF1DRC2_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC2_QR_DCY_WIDTH                 2  /* AIF1DRC2_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC2_HI_COMP_MASK            0x0038  /* AIF1DRC2_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC2_HI_COMP_SHIFT                3  /* AIF1DRC2_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC2_HI_COMP_WIDTH                3  /* AIF1DRC2_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC2_LO_COMP_MASK            0x0007  /* AIF1DRC2_LO_COMP - [2:0] */
+#define WM8994_AIF1DRC2_LO_COMP_SHIFT                0  /* AIF1DRC2_LO_COMP - [2:0] */
+#define WM8994_AIF1DRC2_LO_COMP_WIDTH                3  /* AIF1DRC2_LO_COMP - [2:0] */
+
+/*
+ * R1107 (0x453) - AIF1 DRC2 (4)
+ */
+#define WM8994_AIF1DRC2_KNEE_IP_MASK            0x07E0  /* AIF1DRC2_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC2_KNEE_IP_SHIFT                5  /* AIF1DRC2_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC2_KNEE_IP_WIDTH                6  /* AIF1DRC2_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC2_KNEE_OP_MASK            0x001F  /* AIF1DRC2_KNEE_OP - [4:0] */
+#define WM8994_AIF1DRC2_KNEE_OP_SHIFT                0  /* AIF1DRC2_KNEE_OP - [4:0] */
+#define WM8994_AIF1DRC2_KNEE_OP_WIDTH                5  /* AIF1DRC2_KNEE_OP - [4:0] */
+
+/*
+ * R1108 (0x454) - AIF1 DRC2 (5)
+ */
+#define WM8994_AIF1DRC2_KNEE2_IP_MASK           0x03E0  /* AIF1DRC2_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC2_KNEE2_IP_SHIFT               5  /* AIF1DRC2_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC2_KNEE2_IP_WIDTH               5  /* AIF1DRC2_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC2_KNEE2_OP_MASK           0x001F  /* AIF1DRC2_KNEE2_OP - [4:0] */
+#define WM8994_AIF1DRC2_KNEE2_OP_SHIFT               0  /* AIF1DRC2_KNEE2_OP - [4:0] */
+#define WM8994_AIF1DRC2_KNEE2_OP_WIDTH               5  /* AIF1DRC2_KNEE2_OP - [4:0] */
+
+/*
+ * R1152 (0x480) - AIF1 DAC1 EQ Gains (1)
+ */
+#define WM8994_AIF1DAC1_EQ_B1_GAIN_MASK         0xF800  /* AIF1DAC1_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B1_GAIN_SHIFT            11  /* AIF1DAC1_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B1_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B2_GAIN_MASK         0x07C0  /* AIF1DAC1_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B2_GAIN_SHIFT             6  /* AIF1DAC1_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B2_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B3_GAIN_MASK         0x003E  /* AIF1DAC1_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC1_EQ_B3_GAIN_SHIFT             1  /* AIF1DAC1_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC1_EQ_B3_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC1_EQ_ENA                  0x0001  /* AIF1DAC1_EQ_ENA */
+#define WM8994_AIF1DAC1_EQ_ENA_MASK             0x0001  /* AIF1DAC1_EQ_ENA */
+#define WM8994_AIF1DAC1_EQ_ENA_SHIFT                 0  /* AIF1DAC1_EQ_ENA */
+#define WM8994_AIF1DAC1_EQ_ENA_WIDTH                 1  /* AIF1DAC1_EQ_ENA */
+
+/*
+ * R1153 (0x481) - AIF1 DAC1 EQ Gains (2)
+ */
+#define WM8994_AIF1DAC1_EQ_B4_GAIN_MASK         0xF800  /* AIF1DAC1_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B4_GAIN_SHIFT            11  /* AIF1DAC1_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B4_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B5_GAIN_MASK         0x07C0  /* AIF1DAC1_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B5_GAIN_SHIFT             6  /* AIF1DAC1_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B5_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B5_GAIN - [10:6] */
+
+/*
+ * R1154 (0x482) - AIF1 DAC1 EQ Band 1 A
+ */
+#define WM8994_AIF1DAC1_EQ_B1_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B1_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_A_SHIFT                0  /* AIF1DAC1_EQ_B1_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_A_WIDTH               16  /* AIF1DAC1_EQ_B1_A - [15:0] */
+
+/*
+ * R1155 (0x483) - AIF1 DAC1 EQ Band 1 B
+ */
+#define WM8994_AIF1DAC1_EQ_B1_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B1_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_B_SHIFT                0  /* AIF1DAC1_EQ_B1_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_B_WIDTH               16  /* AIF1DAC1_EQ_B1_B - [15:0] */
+
+/*
+ * R1156 (0x484) - AIF1 DAC1 EQ Band 1 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B1_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B1_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_PG_SHIFT               0  /* AIF1DAC1_EQ_B1_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_PG_WIDTH              16  /* AIF1DAC1_EQ_B1_PG - [15:0] */
+
+/*
+ * R1157 (0x485) - AIF1 DAC1 EQ Band 2 A
+ */
+#define WM8994_AIF1DAC1_EQ_B2_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B2_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_A_SHIFT                0  /* AIF1DAC1_EQ_B2_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_A_WIDTH               16  /* AIF1DAC1_EQ_B2_A - [15:0] */
+
+/*
+ * R1158 (0x486) - AIF1 DAC1 EQ Band 2 B
+ */
+#define WM8994_AIF1DAC1_EQ_B2_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B2_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_B_SHIFT                0  /* AIF1DAC1_EQ_B2_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_B_WIDTH               16  /* AIF1DAC1_EQ_B2_B - [15:0] */
+
+/*
+ * R1159 (0x487) - AIF1 DAC1 EQ Band 2 C
+ */
+#define WM8994_AIF1DAC1_EQ_B2_C_MASK            0xFFFF  /* AIF1DAC1_EQ_B2_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_C_SHIFT                0  /* AIF1DAC1_EQ_B2_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_C_WIDTH               16  /* AIF1DAC1_EQ_B2_C - [15:0] */
+
+/*
+ * R1160 (0x488) - AIF1 DAC1 EQ Band 2 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B2_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B2_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_PG_SHIFT               0  /* AIF1DAC1_EQ_B2_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_PG_WIDTH              16  /* AIF1DAC1_EQ_B2_PG - [15:0] */
+
+/*
+ * R1161 (0x489) - AIF1 DAC1 EQ Band 3 A
+ */
+#define WM8994_AIF1DAC1_EQ_B3_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B3_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_A_SHIFT                0  /* AIF1DAC1_EQ_B3_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_A_WIDTH               16  /* AIF1DAC1_EQ_B3_A - [15:0] */
+
+/*
+ * R1162 (0x48A) - AIF1 DAC1 EQ Band 3 B
+ */
+#define WM8994_AIF1DAC1_EQ_B3_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B3_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_B_SHIFT                0  /* AIF1DAC1_EQ_B3_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_B_WIDTH               16  /* AIF1DAC1_EQ_B3_B - [15:0] */
+
+/*
+ * R1163 (0x48B) - AIF1 DAC1 EQ Band 3 C
+ */
+#define WM8994_AIF1DAC1_EQ_B3_C_MASK            0xFFFF  /* AIF1DAC1_EQ_B3_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_C_SHIFT                0  /* AIF1DAC1_EQ_B3_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_C_WIDTH               16  /* AIF1DAC1_EQ_B3_C - [15:0] */
+
+/*
+ * R1164 (0x48C) - AIF1 DAC1 EQ Band 3 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B3_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B3_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_PG_SHIFT               0  /* AIF1DAC1_EQ_B3_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_PG_WIDTH              16  /* AIF1DAC1_EQ_B3_PG - [15:0] */
+
+/*
+ * R1165 (0x48D) - AIF1 DAC1 EQ Band 4 A
+ */
+#define WM8994_AIF1DAC1_EQ_B4_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B4_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_A_SHIFT                0  /* AIF1DAC1_EQ_B4_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_A_WIDTH               16  /* AIF1DAC1_EQ_B4_A - [15:0] */
+
+/*
+ * R1166 (0x48E) - AIF1 DAC1 EQ Band 4 B
+ */
+#define WM8994_AIF1DAC1_EQ_B4_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B4_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_B_SHIFT                0  /* AIF1DAC1_EQ_B4_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_B_WIDTH               16  /* AIF1DAC1_EQ_B4_B - [15:0] */
+
+/*
+ * R1167 (0x48F) - AIF1 DAC1 EQ Band 4 C
+ */
+#define WM8994_AIF1DAC1_EQ_B4_C_MASK            0xFFFF  /* AIF1DAC1_EQ_B4_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_C_SHIFT                0  /* AIF1DAC1_EQ_B4_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_C_WIDTH               16  /* AIF1DAC1_EQ_B4_C - [15:0] */
+
+/*
+ * R1168 (0x490) - AIF1 DAC1 EQ Band 4 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B4_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B4_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_PG_SHIFT               0  /* AIF1DAC1_EQ_B4_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_PG_WIDTH              16  /* AIF1DAC1_EQ_B4_PG - [15:0] */
+
+/*
+ * R1169 (0x491) - AIF1 DAC1 EQ Band 5 A
+ */
+#define WM8994_AIF1DAC1_EQ_B5_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B5_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_A_SHIFT                0  /* AIF1DAC1_EQ_B5_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_A_WIDTH               16  /* AIF1DAC1_EQ_B5_A - [15:0] */
+
+/*
+ * R1170 (0x492) - AIF1 DAC1 EQ Band 5 B
+ */
+#define WM8994_AIF1DAC1_EQ_B5_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B5_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_B_SHIFT                0  /* AIF1DAC1_EQ_B5_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_B_WIDTH               16  /* AIF1DAC1_EQ_B5_B - [15:0] */
+
+/*
+ * R1171 (0x493) - AIF1 DAC1 EQ Band 5 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B5_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B5_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_PG_SHIFT               0  /* AIF1DAC1_EQ_B5_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_PG_WIDTH              16  /* AIF1DAC1_EQ_B5_PG - [15:0] */
+
+/*
+ * R1184 (0x4A0) - AIF1 DAC2 EQ Gains (1)
+ */
+#define WM8994_AIF1DAC2_EQ_B1_GAIN_MASK         0xF800  /* AIF1DAC2_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B1_GAIN_SHIFT            11  /* AIF1DAC2_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B1_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B2_GAIN_MASK         0x07C0  /* AIF1DAC2_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B2_GAIN_SHIFT             6  /* AIF1DAC2_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B2_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B3_GAIN_MASK         0x003E  /* AIF1DAC2_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC2_EQ_B3_GAIN_SHIFT             1  /* AIF1DAC2_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC2_EQ_B3_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC2_EQ_ENA                  0x0001  /* AIF1DAC2_EQ_ENA */
+#define WM8994_AIF1DAC2_EQ_ENA_MASK             0x0001  /* AIF1DAC2_EQ_ENA */
+#define WM8994_AIF1DAC2_EQ_ENA_SHIFT                 0  /* AIF1DAC2_EQ_ENA */
+#define WM8994_AIF1DAC2_EQ_ENA_WIDTH                 1  /* AIF1DAC2_EQ_ENA */
+
+/*
+ * R1185 (0x4A1) - AIF1 DAC2 EQ Gains (2)
+ */
+#define WM8994_AIF1DAC2_EQ_B4_GAIN_MASK         0xF800  /* AIF1DAC2_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B4_GAIN_SHIFT            11  /* AIF1DAC2_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B4_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B5_GAIN_MASK         0x07C0  /* AIF1DAC2_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B5_GAIN_SHIFT             6  /* AIF1DAC2_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B5_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B5_GAIN - [10:6] */
+
+/*
+ * R1186 (0x4A2) - AIF1 DAC2 EQ Band 1 A
+ */
+#define WM8994_AIF1DAC2_EQ_B1_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B1_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_A_SHIFT                0  /* AIF1DAC2_EQ_B1_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_A_WIDTH               16  /* AIF1DAC2_EQ_B1_A - [15:0] */
+
+/*
+ * R1187 (0x4A3) - AIF1 DAC2 EQ Band 1 B
+ */
+#define WM8994_AIF1DAC2_EQ_B1_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B1_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_B_SHIFT                0  /* AIF1DAC2_EQ_B1_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_B_WIDTH               16  /* AIF1DAC2_EQ_B1_B - [15:0] */
+
+/*
+ * R1188 (0x4A4) - AIF1 DAC2 EQ Band 1 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B1_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B1_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_PG_SHIFT               0  /* AIF1DAC2_EQ_B1_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_PG_WIDTH              16  /* AIF1DAC2_EQ_B1_PG - [15:0] */
+
+/*
+ * R1189 (0x4A5) - AIF1 DAC2 EQ Band 2 A
+ */
+#define WM8994_AIF1DAC2_EQ_B2_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B2_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_A_SHIFT                0  /* AIF1DAC2_EQ_B2_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_A_WIDTH               16  /* AIF1DAC2_EQ_B2_A - [15:0] */
+
+/*
+ * R1190 (0x4A6) - AIF1 DAC2 EQ Band 2 B
+ */
+#define WM8994_AIF1DAC2_EQ_B2_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B2_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_B_SHIFT                0  /* AIF1DAC2_EQ_B2_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_B_WIDTH               16  /* AIF1DAC2_EQ_B2_B - [15:0] */
+
+/*
+ * R1191 (0x4A7) - AIF1 DAC2 EQ Band 2 C
+ */
+#define WM8994_AIF1DAC2_EQ_B2_C_MASK            0xFFFF  /* AIF1DAC2_EQ_B2_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_C_SHIFT                0  /* AIF1DAC2_EQ_B2_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_C_WIDTH               16  /* AIF1DAC2_EQ_B2_C - [15:0] */
+
+/*
+ * R1192 (0x4A8) - AIF1 DAC2 EQ Band 2 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B2_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B2_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_PG_SHIFT               0  /* AIF1DAC2_EQ_B2_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_PG_WIDTH              16  /* AIF1DAC2_EQ_B2_PG - [15:0] */
+
+/*
+ * R1193 (0x4A9) - AIF1 DAC2 EQ Band 3 A
+ */
+#define WM8994_AIF1DAC2_EQ_B3_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B3_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_A_SHIFT                0  /* AIF1DAC2_EQ_B3_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_A_WIDTH               16  /* AIF1DAC2_EQ_B3_A - [15:0] */
+
+/*
+ * R1194 (0x4AA) - AIF1 DAC2 EQ Band 3 B
+ */
+#define WM8994_AIF1DAC2_EQ_B3_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B3_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_B_SHIFT                0  /* AIF1DAC2_EQ_B3_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_B_WIDTH               16  /* AIF1DAC2_EQ_B3_B - [15:0] */
+
+/*
+ * R1195 (0x4AB) - AIF1 DAC2 EQ Band 3 C
+ */
+#define WM8994_AIF1DAC2_EQ_B3_C_MASK            0xFFFF  /* AIF1DAC2_EQ_B3_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_C_SHIFT                0  /* AIF1DAC2_EQ_B3_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_C_WIDTH               16  /* AIF1DAC2_EQ_B3_C - [15:0] */
+
+/*
+ * R1196 (0x4AC) - AIF1 DAC2 EQ Band 3 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B3_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B3_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_PG_SHIFT               0  /* AIF1DAC2_EQ_B3_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_PG_WIDTH              16  /* AIF1DAC2_EQ_B3_PG - [15:0] */
+
+/*
+ * R1197 (0x4AD) - AIF1 DAC2 EQ Band 4 A
+ */
+#define WM8994_AIF1DAC2_EQ_B4_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B4_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_A_SHIFT                0  /* AIF1DAC2_EQ_B4_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_A_WIDTH               16  /* AIF1DAC2_EQ_B4_A - [15:0] */
+
+/*
+ * R1198 (0x4AE) - AIF1 DAC2 EQ Band 4 B
+ */
+#define WM8994_AIF1DAC2_EQ_B4_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B4_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_B_SHIFT                0  /* AIF1DAC2_EQ_B4_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_B_WIDTH               16  /* AIF1DAC2_EQ_B4_B - [15:0] */
+
+/*
+ * R1199 (0x4AF) - AIF1 DAC2 EQ Band 4 C
+ */
+#define WM8994_AIF1DAC2_EQ_B4_C_MASK            0xFFFF  /* AIF1DAC2_EQ_B4_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_C_SHIFT                0  /* AIF1DAC2_EQ_B4_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_C_WIDTH               16  /* AIF1DAC2_EQ_B4_C - [15:0] */
+
+/*
+ * R1200 (0x4B0) - AIF1 DAC2 EQ Band 4 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B4_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B4_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_PG_SHIFT               0  /* AIF1DAC2_EQ_B4_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_PG_WIDTH              16  /* AIF1DAC2_EQ_B4_PG - [15:0] */
+
+/*
+ * R1201 (0x4B1) - AIF1 DAC2 EQ Band 5 A
+ */
+#define WM8994_AIF1DAC2_EQ_B5_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B5_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_A_SHIFT                0  /* AIF1DAC2_EQ_B5_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_A_WIDTH               16  /* AIF1DAC2_EQ_B5_A - [15:0] */
+
+/*
+ * R1202 (0x4B2) - AIF1 DAC2 EQ Band 5 B
+ */
+#define WM8994_AIF1DAC2_EQ_B5_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B5_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_B_SHIFT                0  /* AIF1DAC2_EQ_B5_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_B_WIDTH               16  /* AIF1DAC2_EQ_B5_B - [15:0] */
+
+/*
+ * R1203 (0x4B3) - AIF1 DAC2 EQ Band 5 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B5_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B5_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_PG_SHIFT               0  /* AIF1DAC2_EQ_B5_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_PG_WIDTH              16  /* AIF1DAC2_EQ_B5_PG - [15:0] */
+
+/*
+ * R1280 (0x500) - AIF2 ADC Left Volume
+ */
+#define WM8994_AIF2ADC_VU                       0x0100  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_MASK                  0x0100  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_SHIFT                      8  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_WIDTH                      1  /* AIF2ADC_VU */
+#define WM8994_AIF2ADCL_VOL_MASK                0x00FF  /* AIF2ADCL_VOL - [7:0] */
+#define WM8994_AIF2ADCL_VOL_SHIFT                    0  /* AIF2ADCL_VOL - [7:0] */
+#define WM8994_AIF2ADCL_VOL_WIDTH                    8  /* AIF2ADCL_VOL - [7:0] */
+
+/*
+ * R1281 (0x501) - AIF2 ADC Right Volume
+ */
+#define WM8994_AIF2ADC_VU                       0x0100  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_MASK                  0x0100  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_SHIFT                      8  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_WIDTH                      1  /* AIF2ADC_VU */
+#define WM8994_AIF2ADCR_VOL_MASK                0x00FF  /* AIF2ADCR_VOL - [7:0] */
+#define WM8994_AIF2ADCR_VOL_SHIFT                    0  /* AIF2ADCR_VOL - [7:0] */
+#define WM8994_AIF2ADCR_VOL_WIDTH                    8  /* AIF2ADCR_VOL - [7:0] */
+
+/*
+ * R1282 (0x502) - AIF2 DAC Left Volume
+ */
+#define WM8994_AIF2DAC_VU                       0x0100  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_MASK                  0x0100  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_SHIFT                      8  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_WIDTH                      1  /* AIF2DAC_VU */
+#define WM8994_AIF2DACL_VOL_MASK                0x00FF  /* AIF2DACL_VOL - [7:0] */
+#define WM8994_AIF2DACL_VOL_SHIFT                    0  /* AIF2DACL_VOL - [7:0] */
+#define WM8994_AIF2DACL_VOL_WIDTH                    8  /* AIF2DACL_VOL - [7:0] */
+
+/*
+ * R1283 (0x503) - AIF2 DAC Right Volume
+ */
+#define WM8994_AIF2DAC_VU                       0x0100  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_MASK                  0x0100  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_SHIFT                      8  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_WIDTH                      1  /* AIF2DAC_VU */
+#define WM8994_AIF2DACR_VOL_MASK                0x00FF  /* AIF2DACR_VOL - [7:0] */
+#define WM8994_AIF2DACR_VOL_SHIFT                    0  /* AIF2DACR_VOL - [7:0] */
+#define WM8994_AIF2DACR_VOL_WIDTH                    8  /* AIF2DACR_VOL - [7:0] */
+
+/*
+ * R1296 (0x510) - AIF2 ADC Filters
+ */
+#define WM8994_AIF2ADC_4FS                      0x8000  /* AIF2ADC_4FS */
+#define WM8994_AIF2ADC_4FS_MASK                 0x8000  /* AIF2ADC_4FS */
+#define WM8994_AIF2ADC_4FS_SHIFT                    15  /* AIF2ADC_4FS */
+#define WM8994_AIF2ADC_4FS_WIDTH                     1  /* AIF2ADC_4FS */
+#define WM8994_AIF2ADC_HPF_CUT_MASK             0x6000  /* AIF2ADC_HPF_CUT - [14:13] */
+#define WM8994_AIF2ADC_HPF_CUT_SHIFT                13  /* AIF2ADC_HPF_CUT - [14:13] */
+#define WM8994_AIF2ADC_HPF_CUT_WIDTH                 2  /* AIF2ADC_HPF_CUT - [14:13] */
+#define WM8994_AIF2ADCL_HPF                     0x1000  /* AIF2ADCL_HPF */
+#define WM8994_AIF2ADCL_HPF_MASK                0x1000  /* AIF2ADCL_HPF */
+#define WM8994_AIF2ADCL_HPF_SHIFT                   12  /* AIF2ADCL_HPF */
+#define WM8994_AIF2ADCL_HPF_WIDTH                    1  /* AIF2ADCL_HPF */
+#define WM8994_AIF2ADCR_HPF                     0x0800  /* AIF2ADCR_HPF */
+#define WM8994_AIF2ADCR_HPF_MASK                0x0800  /* AIF2ADCR_HPF */
+#define WM8994_AIF2ADCR_HPF_SHIFT                   11  /* AIF2ADCR_HPF */
+#define WM8994_AIF2ADCR_HPF_WIDTH                    1  /* AIF2ADCR_HPF */
+
+/*
+ * R1312 (0x520) - AIF2 DAC Filters (1)
+ */
+#define WM8994_AIF2DAC_MUTE                     0x0200  /* AIF2DAC_MUTE */
+#define WM8994_AIF2DAC_MUTE_MASK                0x0200  /* AIF2DAC_MUTE */
+#define WM8994_AIF2DAC_MUTE_SHIFT                    9  /* AIF2DAC_MUTE */
+#define WM8994_AIF2DAC_MUTE_WIDTH                    1  /* AIF2DAC_MUTE */
+#define WM8994_AIF2DAC_MONO                     0x0080  /* AIF2DAC_MONO */
+#define WM8994_AIF2DAC_MONO_MASK                0x0080  /* AIF2DAC_MONO */
+#define WM8994_AIF2DAC_MONO_SHIFT                    7  /* AIF2DAC_MONO */
+#define WM8994_AIF2DAC_MONO_WIDTH                    1  /* AIF2DAC_MONO */
+#define WM8994_AIF2DAC_MUTERATE                 0x0020  /* AIF2DAC_MUTERATE */
+#define WM8994_AIF2DAC_MUTERATE_MASK            0x0020  /* AIF2DAC_MUTERATE */
+#define WM8994_AIF2DAC_MUTERATE_SHIFT                5  /* AIF2DAC_MUTERATE */
+#define WM8994_AIF2DAC_MUTERATE_WIDTH                1  /* AIF2DAC_MUTERATE */
+#define WM8994_AIF2DAC_UNMUTE_RAMP              0x0010  /* AIF2DAC_UNMUTE_RAMP */
+#define WM8994_AIF2DAC_UNMUTE_RAMP_MASK         0x0010  /* AIF2DAC_UNMUTE_RAMP */
+#define WM8994_AIF2DAC_UNMUTE_RAMP_SHIFT             4  /* AIF2DAC_UNMUTE_RAMP */
+#define WM8994_AIF2DAC_UNMUTE_RAMP_WIDTH             1  /* AIF2DAC_UNMUTE_RAMP */
+#define WM8994_AIF2DAC_DEEMP_MASK               0x0006  /* AIF2DAC_DEEMP - [2:1] */
+#define WM8994_AIF2DAC_DEEMP_SHIFT                   1  /* AIF2DAC_DEEMP - [2:1] */
+#define WM8994_AIF2DAC_DEEMP_WIDTH                   2  /* AIF2DAC_DEEMP - [2:1] */
+
+/*
+ * R1313 (0x521) - AIF2 DAC Filters (2)
+ */
+#define WM8994_AIF2DAC_3D_GAIN_MASK             0x3E00  /* AIF2DAC_3D_GAIN - [13:9] */
+#define WM8994_AIF2DAC_3D_GAIN_SHIFT                 9  /* AIF2DAC_3D_GAIN - [13:9] */
+#define WM8994_AIF2DAC_3D_GAIN_WIDTH                 5  /* AIF2DAC_3D_GAIN - [13:9] */
+#define WM8994_AIF2DAC_3D_ENA                   0x0100  /* AIF2DAC_3D_ENA */
+#define WM8994_AIF2DAC_3D_ENA_MASK              0x0100  /* AIF2DAC_3D_ENA */
+#define WM8994_AIF2DAC_3D_ENA_SHIFT                  8  /* AIF2DAC_3D_ENA */
+#define WM8994_AIF2DAC_3D_ENA_WIDTH                  1  /* AIF2DAC_3D_ENA */
+
+/*
+ * R1344 (0x540) - AIF2 DRC (1)
+ */
+#define WM8994_AIF2DRC_SIG_DET_RMS_MASK         0xF800  /* AIF2DRC_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF2DRC_SIG_DET_RMS_SHIFT            11  /* AIF2DRC_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF2DRC_SIG_DET_RMS_WIDTH             5  /* AIF2DRC_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF2DRC_SIG_DET_PK_MASK          0x0600  /* AIF2DRC_SIG_DET_PK - [10:9] */
+#define WM8994_AIF2DRC_SIG_DET_PK_SHIFT              9  /* AIF2DRC_SIG_DET_PK - [10:9] */
+#define WM8994_AIF2DRC_SIG_DET_PK_WIDTH              2  /* AIF2DRC_SIG_DET_PK - [10:9] */
+#define WM8994_AIF2DRC_NG_ENA                   0x0100  /* AIF2DRC_NG_ENA */
+#define WM8994_AIF2DRC_NG_ENA_MASK              0x0100  /* AIF2DRC_NG_ENA */
+#define WM8994_AIF2DRC_NG_ENA_SHIFT                  8  /* AIF2DRC_NG_ENA */
+#define WM8994_AIF2DRC_NG_ENA_WIDTH                  1  /* AIF2DRC_NG_ENA */
+#define WM8994_AIF2DRC_SIG_DET_MODE             0x0080  /* AIF2DRC_SIG_DET_MODE */
+#define WM8994_AIF2DRC_SIG_DET_MODE_MASK        0x0080  /* AIF2DRC_SIG_DET_MODE */
+#define WM8994_AIF2DRC_SIG_DET_MODE_SHIFT            7  /* AIF2DRC_SIG_DET_MODE */
+#define WM8994_AIF2DRC_SIG_DET_MODE_WIDTH            1  /* AIF2DRC_SIG_DET_MODE */
+#define WM8994_AIF2DRC_SIG_DET                  0x0040  /* AIF2DRC_SIG_DET */
+#define WM8994_AIF2DRC_SIG_DET_MASK             0x0040  /* AIF2DRC_SIG_DET */
+#define WM8994_AIF2DRC_SIG_DET_SHIFT                 6  /* AIF2DRC_SIG_DET */
+#define WM8994_AIF2DRC_SIG_DET_WIDTH                 1  /* AIF2DRC_SIG_DET */
+#define WM8994_AIF2DRC_KNEE2_OP_ENA             0x0020  /* AIF2DRC_KNEE2_OP_ENA */
+#define WM8994_AIF2DRC_KNEE2_OP_ENA_MASK        0x0020  /* AIF2DRC_KNEE2_OP_ENA */
+#define WM8994_AIF2DRC_KNEE2_OP_ENA_SHIFT            5  /* AIF2DRC_KNEE2_OP_ENA */
+#define WM8994_AIF2DRC_KNEE2_OP_ENA_WIDTH            1  /* AIF2DRC_KNEE2_OP_ENA */
+#define WM8994_AIF2DRC_QR                       0x0010  /* AIF2DRC_QR */
+#define WM8994_AIF2DRC_QR_MASK                  0x0010  /* AIF2DRC_QR */
+#define WM8994_AIF2DRC_QR_SHIFT                      4  /* AIF2DRC_QR */
+#define WM8994_AIF2DRC_QR_WIDTH                      1  /* AIF2DRC_QR */
+#define WM8994_AIF2DRC_ANTICLIP                 0x0008  /* AIF2DRC_ANTICLIP */
+#define WM8994_AIF2DRC_ANTICLIP_MASK            0x0008  /* AIF2DRC_ANTICLIP */
+#define WM8994_AIF2DRC_ANTICLIP_SHIFT                3  /* AIF2DRC_ANTICLIP */
+#define WM8994_AIF2DRC_ANTICLIP_WIDTH                1  /* AIF2DRC_ANTICLIP */
+#define WM8994_AIF2DAC_DRC_ENA                  0x0004  /* AIF2DAC_DRC_ENA */
+#define WM8994_AIF2DAC_DRC_ENA_MASK             0x0004  /* AIF2DAC_DRC_ENA */
+#define WM8994_AIF2DAC_DRC_ENA_SHIFT                 2  /* AIF2DAC_DRC_ENA */
+#define WM8994_AIF2DAC_DRC_ENA_WIDTH                 1  /* AIF2DAC_DRC_ENA */
+#define WM8994_AIF2ADCL_DRC_ENA                 0x0002  /* AIF2ADCL_DRC_ENA */
+#define WM8994_AIF2ADCL_DRC_ENA_MASK            0x0002  /* AIF2ADCL_DRC_ENA */
+#define WM8994_AIF2ADCL_DRC_ENA_SHIFT                1  /* AIF2ADCL_DRC_ENA */
+#define WM8994_AIF2ADCL_DRC_ENA_WIDTH                1  /* AIF2ADCL_DRC_ENA */
+#define WM8994_AIF2ADCR_DRC_ENA                 0x0001  /* AIF2ADCR_DRC_ENA */
+#define WM8994_AIF2ADCR_DRC_ENA_MASK            0x0001  /* AIF2ADCR_DRC_ENA */
+#define WM8994_AIF2ADCR_DRC_ENA_SHIFT                0  /* AIF2ADCR_DRC_ENA */
+#define WM8994_AIF2ADCR_DRC_ENA_WIDTH                1  /* AIF2ADCR_DRC_ENA */
+
+/*
+ * R1345 (0x541) - AIF2 DRC (2)
+ */
+#define WM8994_AIF2DRC_ATK_MASK                 0x1E00  /* AIF2DRC_ATK - [12:9] */
+#define WM8994_AIF2DRC_ATK_SHIFT                     9  /* AIF2DRC_ATK - [12:9] */
+#define WM8994_AIF2DRC_ATK_WIDTH                     4  /* AIF2DRC_ATK - [12:9] */
+#define WM8994_AIF2DRC_DCY_MASK                 0x01E0  /* AIF2DRC_DCY - [8:5] */
+#define WM8994_AIF2DRC_DCY_SHIFT                     5  /* AIF2DRC_DCY - [8:5] */
+#define WM8994_AIF2DRC_DCY_WIDTH                     4  /* AIF2DRC_DCY - [8:5] */
+#define WM8994_AIF2DRC_MINGAIN_MASK             0x001C  /* AIF2DRC_MINGAIN - [4:2] */
+#define WM8994_AIF2DRC_MINGAIN_SHIFT                 2  /* AIF2DRC_MINGAIN - [4:2] */
+#define WM8994_AIF2DRC_MINGAIN_WIDTH                 3  /* AIF2DRC_MINGAIN - [4:2] */
+#define WM8994_AIF2DRC_MAXGAIN_MASK             0x0003  /* AIF2DRC_MAXGAIN - [1:0] */
+#define WM8994_AIF2DRC_MAXGAIN_SHIFT                 0  /* AIF2DRC_MAXGAIN - [1:0] */
+#define WM8994_AIF2DRC_MAXGAIN_WIDTH                 2  /* AIF2DRC_MAXGAIN - [1:0] */
+
+/*
+ * R1346 (0x542) - AIF2 DRC (3)
+ */
+#define WM8994_AIF2DRC_NG_MINGAIN_MASK          0xF000  /* AIF2DRC_NG_MINGAIN - [15:12] */
+#define WM8994_AIF2DRC_NG_MINGAIN_SHIFT             12  /* AIF2DRC_NG_MINGAIN - [15:12] */
+#define WM8994_AIF2DRC_NG_MINGAIN_WIDTH              4  /* AIF2DRC_NG_MINGAIN - [15:12] */
+#define WM8994_AIF2DRC_NG_EXP_MASK              0x0C00  /* AIF2DRC_NG_EXP - [11:10] */
+#define WM8994_AIF2DRC_NG_EXP_SHIFT                 10  /* AIF2DRC_NG_EXP - [11:10] */
+#define WM8994_AIF2DRC_NG_EXP_WIDTH                  2  /* AIF2DRC_NG_EXP - [11:10] */
+#define WM8994_AIF2DRC_QR_THR_MASK              0x0300  /* AIF2DRC_QR_THR - [9:8] */
+#define WM8994_AIF2DRC_QR_THR_SHIFT                  8  /* AIF2DRC_QR_THR - [9:8] */
+#define WM8994_AIF2DRC_QR_THR_WIDTH                  2  /* AIF2DRC_QR_THR - [9:8] */
+#define WM8994_AIF2DRC_QR_DCY_MASK              0x00C0  /* AIF2DRC_QR_DCY - [7:6] */
+#define WM8994_AIF2DRC_QR_DCY_SHIFT                  6  /* AIF2DRC_QR_DCY - [7:6] */
+#define WM8994_AIF2DRC_QR_DCY_WIDTH                  2  /* AIF2DRC_QR_DCY - [7:6] */
+#define WM8994_AIF2DRC_HI_COMP_MASK             0x0038  /* AIF2DRC_HI_COMP - [5:3] */
+#define WM8994_AIF2DRC_HI_COMP_SHIFT                 3  /* AIF2DRC_HI_COMP - [5:3] */
+#define WM8994_AIF2DRC_HI_COMP_WIDTH                 3  /* AIF2DRC_HI_COMP - [5:3] */
+#define WM8994_AIF2DRC_LO_COMP_MASK             0x0007  /* AIF2DRC_LO_COMP - [2:0] */
+#define WM8994_AIF2DRC_LO_COMP_SHIFT                 0  /* AIF2DRC_LO_COMP - [2:0] */
+#define WM8994_AIF2DRC_LO_COMP_WIDTH                 3  /* AIF2DRC_LO_COMP - [2:0] */
+
+/*
+ * R1347 (0x543) - AIF2 DRC (4)
+ */
+#define WM8994_AIF2DRC_KNEE_IP_MASK             0x07E0  /* AIF2DRC_KNEE_IP - [10:5] */
+#define WM8994_AIF2DRC_KNEE_IP_SHIFT                 5  /* AIF2DRC_KNEE_IP - [10:5] */
+#define WM8994_AIF2DRC_KNEE_IP_WIDTH                 6  /* AIF2DRC_KNEE_IP - [10:5] */
+#define WM8994_AIF2DRC_KNEE_OP_MASK             0x001F  /* AIF2DRC_KNEE_OP - [4:0] */
+#define WM8994_AIF2DRC_KNEE_OP_SHIFT                 0  /* AIF2DRC_KNEE_OP - [4:0] */
+#define WM8994_AIF2DRC_KNEE_OP_WIDTH                 5  /* AIF2DRC_KNEE_OP - [4:0] */
+
+/*
+ * R1348 (0x544) - AIF2 DRC (5)
+ */
+#define WM8994_AIF2DRC_KNEE2_IP_MASK            0x03E0  /* AIF2DRC_KNEE2_IP - [9:5] */
+#define WM8994_AIF2DRC_KNEE2_IP_SHIFT                5  /* AIF2DRC_KNEE2_IP - [9:5] */
+#define WM8994_AIF2DRC_KNEE2_IP_WIDTH                5  /* AIF2DRC_KNEE2_IP - [9:5] */
+#define WM8994_AIF2DRC_KNEE2_OP_MASK            0x001F  /* AIF2DRC_KNEE2_OP - [4:0] */
+#define WM8994_AIF2DRC_KNEE2_OP_SHIFT                0  /* AIF2DRC_KNEE2_OP - [4:0] */
+#define WM8994_AIF2DRC_KNEE2_OP_WIDTH                5  /* AIF2DRC_KNEE2_OP - [4:0] */
+
+/*
+ * R1408 (0x580) - AIF2 EQ Gains (1)
+ */
+#define WM8994_AIF2DAC_EQ_B1_GAIN_MASK          0xF800  /* AIF2DAC_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B1_GAIN_SHIFT             11  /* AIF2DAC_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B1_GAIN_WIDTH              5  /* AIF2DAC_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B2_GAIN_MASK          0x07C0  /* AIF2DAC_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B2_GAIN_SHIFT              6  /* AIF2DAC_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B2_GAIN_WIDTH              5  /* AIF2DAC_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B3_GAIN_MASK          0x003E  /* AIF2DAC_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF2DAC_EQ_B3_GAIN_SHIFT              1  /* AIF2DAC_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF2DAC_EQ_B3_GAIN_WIDTH              5  /* AIF2DAC_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF2DAC_EQ_ENA                   0x0001  /* AIF2DAC_EQ_ENA */
+#define WM8994_AIF2DAC_EQ_ENA_MASK              0x0001  /* AIF2DAC_EQ_ENA */
+#define WM8994_AIF2DAC_EQ_ENA_SHIFT                  0  /* AIF2DAC_EQ_ENA */
+#define WM8994_AIF2DAC_EQ_ENA_WIDTH                  1  /* AIF2DAC_EQ_ENA */
+
+/*
+ * R1409 (0x581) - AIF2 EQ Gains (2)
+ */
+#define WM8994_AIF2DAC_EQ_B4_GAIN_MASK          0xF800  /* AIF2DAC_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B4_GAIN_SHIFT             11  /* AIF2DAC_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B4_GAIN_WIDTH              5  /* AIF2DAC_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B5_GAIN_MASK          0x07C0  /* AIF2DAC_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B5_GAIN_SHIFT              6  /* AIF2DAC_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B5_GAIN_WIDTH              5  /* AIF2DAC_EQ_B5_GAIN - [10:6] */
+
+/*
+ * R1410 (0x582) - AIF2 EQ Band 1 A
+ */
+#define WM8994_AIF2DAC_EQ_B1_A_MASK             0xFFFF  /* AIF2DAC_EQ_B1_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_A_SHIFT                 0  /* AIF2DAC_EQ_B1_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_A_WIDTH                16  /* AIF2DAC_EQ_B1_A - [15:0] */
+
+/*
+ * R1411 (0x583) - AIF2 EQ Band 1 B
+ */
+#define WM8994_AIF2DAC_EQ_B1_B_MASK             0xFFFF  /* AIF2DAC_EQ_B1_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_B_SHIFT                 0  /* AIF2DAC_EQ_B1_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_B_WIDTH                16  /* AIF2DAC_EQ_B1_B - [15:0] */
+
+/*
+ * R1412 (0x584) - AIF2 EQ Band 1 PG
+ */
+#define WM8994_AIF2DAC_EQ_B1_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B1_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_PG_SHIFT                0  /* AIF2DAC_EQ_B1_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_PG_WIDTH               16  /* AIF2DAC_EQ_B1_PG - [15:0] */
+
+/*
+ * R1413 (0x585) - AIF2 EQ Band 2 A
+ */
+#define WM8994_AIF2DAC_EQ_B2_A_MASK             0xFFFF  /* AIF2DAC_EQ_B2_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_A_SHIFT                 0  /* AIF2DAC_EQ_B2_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_A_WIDTH                16  /* AIF2DAC_EQ_B2_A - [15:0] */
+
+/*
+ * R1414 (0x586) - AIF2 EQ Band 2 B
+ */
+#define WM8994_AIF2DAC_EQ_B2_B_MASK             0xFFFF  /* AIF2DAC_EQ_B2_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_B_SHIFT                 0  /* AIF2DAC_EQ_B2_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_B_WIDTH                16  /* AIF2DAC_EQ_B2_B - [15:0] */
+
+/*
+ * R1415 (0x587) - AIF2 EQ Band 2 C
+ */
+#define WM8994_AIF2DAC_EQ_B2_C_MASK             0xFFFF  /* AIF2DAC_EQ_B2_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_C_SHIFT                 0  /* AIF2DAC_EQ_B2_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_C_WIDTH                16  /* AIF2DAC_EQ_B2_C - [15:0] */
+
+/*
+ * R1416 (0x588) - AIF2 EQ Band 2 PG
+ */
+#define WM8994_AIF2DAC_EQ_B2_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B2_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_PG_SHIFT                0  /* AIF2DAC_EQ_B2_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_PG_WIDTH               16  /* AIF2DAC_EQ_B2_PG - [15:0] */
+
+/*
+ * R1417 (0x589) - AIF2 EQ Band 3 A
+ */
+#define WM8994_AIF2DAC_EQ_B3_A_MASK             0xFFFF  /* AIF2DAC_EQ_B3_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_A_SHIFT                 0  /* AIF2DAC_EQ_B3_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_A_WIDTH                16  /* AIF2DAC_EQ_B3_A - [15:0] */
+
+/*
+ * R1418 (0x58A) - AIF2 EQ Band 3 B
+ */
+#define WM8994_AIF2DAC_EQ_B3_B_MASK             0xFFFF  /* AIF2DAC_EQ_B3_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_B_SHIFT                 0  /* AIF2DAC_EQ_B3_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_B_WIDTH                16  /* AIF2DAC_EQ_B3_B - [15:0] */
+
+/*
+ * R1419 (0x58B) - AIF2 EQ Band 3 C
+ */
+#define WM8994_AIF2DAC_EQ_B3_C_MASK             0xFFFF  /* AIF2DAC_EQ_B3_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_C_SHIFT                 0  /* AIF2DAC_EQ_B3_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_C_WIDTH                16  /* AIF2DAC_EQ_B3_C - [15:0] */
+
+/*
+ * R1420 (0x58C) - AIF2 EQ Band 3 PG
+ */
+#define WM8994_AIF2DAC_EQ_B3_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B3_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_PG_SHIFT                0  /* AIF2DAC_EQ_B3_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_PG_WIDTH               16  /* AIF2DAC_EQ_B3_PG - [15:0] */
+
+/*
+ * R1421 (0x58D) - AIF2 EQ Band 4 A
+ */
+#define WM8994_AIF2DAC_EQ_B4_A_MASK             0xFFFF  /* AIF2DAC_EQ_B4_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_A_SHIFT                 0  /* AIF2DAC_EQ_B4_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_A_WIDTH                16  /* AIF2DAC_EQ_B4_A - [15:0] */
+
+/*
+ * R1422 (0x58E) - AIF2 EQ Band 4 B
+ */
+#define WM8994_AIF2DAC_EQ_B4_B_MASK             0xFFFF  /* AIF2DAC_EQ_B4_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_B_SHIFT                 0  /* AIF2DAC_EQ_B4_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_B_WIDTH                16  /* AIF2DAC_EQ_B4_B - [15:0] */
+
+/*
+ * R1423 (0x58F) - AIF2 EQ Band 4 C
+ */
+#define WM8994_AIF2DAC_EQ_B4_C_MASK             0xFFFF  /* AIF2DAC_EQ_B4_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_C_SHIFT                 0  /* AIF2DAC_EQ_B4_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_C_WIDTH                16  /* AIF2DAC_EQ_B4_C - [15:0] */
+
+/*
+ * R1424 (0x590) - AIF2 EQ Band 4 PG
+ */
+#define WM8994_AIF2DAC_EQ_B4_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B4_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_PG_SHIFT                0  /* AIF2DAC_EQ_B4_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_PG_WIDTH               16  /* AIF2DAC_EQ_B4_PG - [15:0] */
+
+/*
+ * R1425 (0x591) - AIF2 EQ Band 5 A
+ */
+#define WM8994_AIF2DAC_EQ_B5_A_MASK             0xFFFF  /* AIF2DAC_EQ_B5_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_A_SHIFT                 0  /* AIF2DAC_EQ_B5_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_A_WIDTH                16  /* AIF2DAC_EQ_B5_A - [15:0] */
+
+/*
+ * R1426 (0x592) - AIF2 EQ Band 5 B
+ */
+#define WM8994_AIF2DAC_EQ_B5_B_MASK             0xFFFF  /* AIF2DAC_EQ_B5_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_B_SHIFT                 0  /* AIF2DAC_EQ_B5_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_B_WIDTH                16  /* AIF2DAC_EQ_B5_B - [15:0] */
+
+/*
+ * R1427 (0x593) - AIF2 EQ Band 5 PG
+ */
+#define WM8994_AIF2DAC_EQ_B5_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B5_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_PG_SHIFT                0  /* AIF2DAC_EQ_B5_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_PG_WIDTH               16  /* AIF2DAC_EQ_B5_PG - [15:0] */
+
+/*
+ * R1536 (0x600) - DAC1 Mixer Volumes
+ */
+#define WM8994_ADCR_DAC1_VOL_MASK               0x01E0  /* ADCR_DAC1_VOL - [8:5] */
+#define WM8994_ADCR_DAC1_VOL_SHIFT                   5  /* ADCR_DAC1_VOL - [8:5] */
+#define WM8994_ADCR_DAC1_VOL_WIDTH                   4  /* ADCR_DAC1_VOL - [8:5] */
+#define WM8994_ADCL_DAC1_VOL_MASK               0x000F  /* ADCL_DAC1_VOL - [3:0] */
+#define WM8994_ADCL_DAC1_VOL_SHIFT                   0  /* ADCL_DAC1_VOL - [3:0] */
+#define WM8994_ADCL_DAC1_VOL_WIDTH                   4  /* ADCL_DAC1_VOL - [3:0] */
+
+/*
+ * R1537 (0x601) - DAC1 Left Mixer Routing
+ */
+#define WM8994_ADCR_TO_DAC1L                    0x0020  /* ADCR_TO_DAC1L */
+#define WM8994_ADCR_TO_DAC1L_MASK               0x0020  /* ADCR_TO_DAC1L */
+#define WM8994_ADCR_TO_DAC1L_SHIFT                   5  /* ADCR_TO_DAC1L */
+#define WM8994_ADCR_TO_DAC1L_WIDTH                   1  /* ADCR_TO_DAC1L */
+#define WM8994_ADCL_TO_DAC1L                    0x0010  /* ADCL_TO_DAC1L */
+#define WM8994_ADCL_TO_DAC1L_MASK               0x0010  /* ADCL_TO_DAC1L */
+#define WM8994_ADCL_TO_DAC1L_SHIFT                   4  /* ADCL_TO_DAC1L */
+#define WM8994_ADCL_TO_DAC1L_WIDTH                   1  /* ADCL_TO_DAC1L */
+#define WM8994_AIF2DACL_TO_DAC1L                0x0004  /* AIF2DACL_TO_DAC1L */
+#define WM8994_AIF2DACL_TO_DAC1L_MASK           0x0004  /* AIF2DACL_TO_DAC1L */
+#define WM8994_AIF2DACL_TO_DAC1L_SHIFT               2  /* AIF2DACL_TO_DAC1L */
+#define WM8994_AIF2DACL_TO_DAC1L_WIDTH               1  /* AIF2DACL_TO_DAC1L */
+#define WM8994_AIF1DAC2L_TO_DAC1L               0x0002  /* AIF1DAC2L_TO_DAC1L */
+#define WM8994_AIF1DAC2L_TO_DAC1L_MASK          0x0002  /* AIF1DAC2L_TO_DAC1L */
+#define WM8994_AIF1DAC2L_TO_DAC1L_SHIFT              1  /* AIF1DAC2L_TO_DAC1L */
+#define WM8994_AIF1DAC2L_TO_DAC1L_WIDTH              1  /* AIF1DAC2L_TO_DAC1L */
+#define WM8994_AIF1DAC1L_TO_DAC1L               0x0001  /* AIF1DAC1L_TO_DAC1L */
+#define WM8994_AIF1DAC1L_TO_DAC1L_MASK          0x0001  /* AIF1DAC1L_TO_DAC1L */
+#define WM8994_AIF1DAC1L_TO_DAC1L_SHIFT              0  /* AIF1DAC1L_TO_DAC1L */
+#define WM8994_AIF1DAC1L_TO_DAC1L_WIDTH              1  /* AIF1DAC1L_TO_DAC1L */
+
+/*
+ * R1538 (0x602) - DAC1 Right Mixer Routing
+ */
+#define WM8994_ADCR_TO_DAC1R                    0x0020  /* ADCR_TO_DAC1R */
+#define WM8994_ADCR_TO_DAC1R_MASK               0x0020  /* ADCR_TO_DAC1R */
+#define WM8994_ADCR_TO_DAC1R_SHIFT                   5  /* ADCR_TO_DAC1R */
+#define WM8994_ADCR_TO_DAC1R_WIDTH                   1  /* ADCR_TO_DAC1R */
+#define WM8994_ADCL_TO_DAC1R                    0x0010  /* ADCL_TO_DAC1R */
+#define WM8994_ADCL_TO_DAC1R_MASK               0x0010  /* ADCL_TO_DAC1R */
+#define WM8994_ADCL_TO_DAC1R_SHIFT                   4  /* ADCL_TO_DAC1R */
+#define WM8994_ADCL_TO_DAC1R_WIDTH                   1  /* ADCL_TO_DAC1R */
+#define WM8994_AIF2DACR_TO_DAC1R                0x0004  /* AIF2DACR_TO_DAC1R */
+#define WM8994_AIF2DACR_TO_DAC1R_MASK           0x0004  /* AIF2DACR_TO_DAC1R */
+#define WM8994_AIF2DACR_TO_DAC1R_SHIFT               2  /* AIF2DACR_TO_DAC1R */
+#define WM8994_AIF2DACR_TO_DAC1R_WIDTH               1  /* AIF2DACR_TO_DAC1R */
+#define WM8994_AIF1DAC2R_TO_DAC1R               0x0002  /* AIF1DAC2R_TO_DAC1R */
+#define WM8994_AIF1DAC2R_TO_DAC1R_MASK          0x0002  /* AIF1DAC2R_TO_DAC1R */
+#define WM8994_AIF1DAC2R_TO_DAC1R_SHIFT              1  /* AIF1DAC2R_TO_DAC1R */
+#define WM8994_AIF1DAC2R_TO_DAC1R_WIDTH              1  /* AIF1DAC2R_TO_DAC1R */
+#define WM8994_AIF1DAC1R_TO_DAC1R               0x0001  /* AIF1DAC1R_TO_DAC1R */
+#define WM8994_AIF1DAC1R_TO_DAC1R_MASK          0x0001  /* AIF1DAC1R_TO_DAC1R */
+#define WM8994_AIF1DAC1R_TO_DAC1R_SHIFT              0  /* AIF1DAC1R_TO_DAC1R */
+#define WM8994_AIF1DAC1R_TO_DAC1R_WIDTH              1  /* AIF1DAC1R_TO_DAC1R */
+
+/*
+ * R1539 (0x603) - DAC2 Mixer Volumes
+ */
+#define WM8994_ADCR_DAC2_VOL_MASK               0x01E0  /* ADCR_DAC2_VOL - [8:5] */
+#define WM8994_ADCR_DAC2_VOL_SHIFT                   5  /* ADCR_DAC2_VOL - [8:5] */
+#define WM8994_ADCR_DAC2_VOL_WIDTH                   4  /* ADCR_DAC2_VOL - [8:5] */
+#define WM8994_ADCL_DAC2_VOL_MASK               0x000F  /* ADCL_DAC2_VOL - [3:0] */
+#define WM8994_ADCL_DAC2_VOL_SHIFT                   0  /* ADCL_DAC2_VOL - [3:0] */
+#define WM8994_ADCL_DAC2_VOL_WIDTH                   4  /* ADCL_DAC2_VOL - [3:0] */
+
+/*
+ * R1540 (0x604) - DAC2 Left Mixer Routing
+ */
+#define WM8994_ADCR_TO_DAC2L                    0x0020  /* ADCR_TO_DAC2L */
+#define WM8994_ADCR_TO_DAC2L_MASK               0x0020  /* ADCR_TO_DAC2L */
+#define WM8994_ADCR_TO_DAC2L_SHIFT                   5  /* ADCR_TO_DAC2L */
+#define WM8994_ADCR_TO_DAC2L_WIDTH                   1  /* ADCR_TO_DAC2L */
+#define WM8994_ADCL_TO_DAC2L                    0x0010  /* ADCL_TO_DAC2L */
+#define WM8994_ADCL_TO_DAC2L_MASK               0x0010  /* ADCL_TO_DAC2L */
+#define WM8994_ADCL_TO_DAC2L_SHIFT                   4  /* ADCL_TO_DAC2L */
+#define WM8994_ADCL_TO_DAC2L_WIDTH                   1  /* ADCL_TO_DAC2L */
+#define WM8994_AIF2DACL_TO_DAC2L                0x0004  /* AIF2DACL_TO_DAC2L */
+#define WM8994_AIF2DACL_TO_DAC2L_MASK           0x0004  /* AIF2DACL_TO_DAC2L */
+#define WM8994_AIF2DACL_TO_DAC2L_SHIFT               2  /* AIF2DACL_TO_DAC2L */
+#define WM8994_AIF2DACL_TO_DAC2L_WIDTH               1  /* AIF2DACL_TO_DAC2L */
+#define WM8994_AIF1DAC2L_TO_DAC2L               0x0002  /* AIF1DAC2L_TO_DAC2L */
+#define WM8994_AIF1DAC2L_TO_DAC2L_MASK          0x0002  /* AIF1DAC2L_TO_DAC2L */
+#define WM8994_AIF1DAC2L_TO_DAC2L_SHIFT              1  /* AIF1DAC2L_TO_DAC2L */
+#define WM8994_AIF1DAC2L_TO_DAC2L_WIDTH              1  /* AIF1DAC2L_TO_DAC2L */
+#define WM8994_AIF1DAC1L_TO_DAC2L               0x0001  /* AIF1DAC1L_TO_DAC2L */
+#define WM8994_AIF1DAC1L_TO_DAC2L_MASK          0x0001  /* AIF1DAC1L_TO_DAC2L */
+#define WM8994_AIF1DAC1L_TO_DAC2L_SHIFT              0  /* AIF1DAC1L_TO_DAC2L */
+#define WM8994_AIF1DAC1L_TO_DAC2L_WIDTH              1  /* AIF1DAC1L_TO_DAC2L */
+
+/*
+ * R1541 (0x605) - DAC2 Right Mixer Routing
+ */
+#define WM8994_ADCR_TO_DAC2R                    0x0020  /* ADCR_TO_DAC2R */
+#define WM8994_ADCR_TO_DAC2R_MASK               0x0020  /* ADCR_TO_DAC2R */
+#define WM8994_ADCR_TO_DAC2R_SHIFT                   5  /* ADCR_TO_DAC2R */
+#define WM8994_ADCR_TO_DAC2R_WIDTH                   1  /* ADCR_TO_DAC2R */
+#define WM8994_ADCL_TO_DAC2R                    0x0010  /* ADCL_TO_DAC2R */
+#define WM8994_ADCL_TO_DAC2R_MASK               0x0010  /* ADCL_TO_DAC2R */
+#define WM8994_ADCL_TO_DAC2R_SHIFT                   4  /* ADCL_TO_DAC2R */
+#define WM8994_ADCL_TO_DAC2R_WIDTH                   1  /* ADCL_TO_DAC2R */
+#define WM8994_AIF2DACR_TO_DAC2R                0x0004  /* AIF2DACR_TO_DAC2R */
+#define WM8994_AIF2DACR_TO_DAC2R_MASK           0x0004  /* AIF2DACR_TO_DAC2R */
+#define WM8994_AIF2DACR_TO_DAC2R_SHIFT               2  /* AIF2DACR_TO_DAC2R */
+#define WM8994_AIF2DACR_TO_DAC2R_WIDTH               1  /* AIF2DACR_TO_DAC2R */
+#define WM8994_AIF1DAC2R_TO_DAC2R               0x0002  /* AIF1DAC2R_TO_DAC2R */
+#define WM8994_AIF1DAC2R_TO_DAC2R_MASK          0x0002  /* AIF1DAC2R_TO_DAC2R */
+#define WM8994_AIF1DAC2R_TO_DAC2R_SHIFT              1  /* AIF1DAC2R_TO_DAC2R */
+#define WM8994_AIF1DAC2R_TO_DAC2R_WIDTH              1  /* AIF1DAC2R_TO_DAC2R */
+#define WM8994_AIF1DAC1R_TO_DAC2R               0x0001  /* AIF1DAC1R_TO_DAC2R */
+#define WM8994_AIF1DAC1R_TO_DAC2R_MASK          0x0001  /* AIF1DAC1R_TO_DAC2R */
+#define WM8994_AIF1DAC1R_TO_DAC2R_SHIFT              0  /* AIF1DAC1R_TO_DAC2R */
+#define WM8994_AIF1DAC1R_TO_DAC2R_WIDTH              1  /* AIF1DAC1R_TO_DAC2R */
+
+/*
+ * R1542 (0x606) - AIF1 ADC1 Left Mixer Routing
+ */
+#define WM8994_ADC1L_TO_AIF1ADC1L               0x0002  /* ADC1L_TO_AIF1ADC1L */
+#define WM8994_ADC1L_TO_AIF1ADC1L_MASK          0x0002  /* ADC1L_TO_AIF1ADC1L */
+#define WM8994_ADC1L_TO_AIF1ADC1L_SHIFT              1  /* ADC1L_TO_AIF1ADC1L */
+#define WM8994_ADC1L_TO_AIF1ADC1L_WIDTH              1  /* ADC1L_TO_AIF1ADC1L */
+#define WM8994_AIF2DACL_TO_AIF1ADC1L            0x0001  /* AIF2DACL_TO_AIF1ADC1L */
+#define WM8994_AIF2DACL_TO_AIF1ADC1L_MASK       0x0001  /* AIF2DACL_TO_AIF1ADC1L */
+#define WM8994_AIF2DACL_TO_AIF1ADC1L_SHIFT           0  /* AIF2DACL_TO_AIF1ADC1L */
+#define WM8994_AIF2DACL_TO_AIF1ADC1L_WIDTH           1  /* AIF2DACL_TO_AIF1ADC1L */
+
+/*
+ * R1543 (0x607) - AIF1 ADC1 Right Mixer Routing
+ */
+#define WM8994_ADC1R_TO_AIF1ADC1R               0x0002  /* ADC1R_TO_AIF1ADC1R */
+#define WM8994_ADC1R_TO_AIF1ADC1R_MASK          0x0002  /* ADC1R_TO_AIF1ADC1R */
+#define WM8994_ADC1R_TO_AIF1ADC1R_SHIFT              1  /* ADC1R_TO_AIF1ADC1R */
+#define WM8994_ADC1R_TO_AIF1ADC1R_WIDTH              1  /* ADC1R_TO_AIF1ADC1R */
+#define WM8994_AIF2DACR_TO_AIF1ADC1R            0x0001  /* AIF2DACR_TO_AIF1ADC1R */
+#define WM8994_AIF2DACR_TO_AIF1ADC1R_MASK       0x0001  /* AIF2DACR_TO_AIF1ADC1R */
+#define WM8994_AIF2DACR_TO_AIF1ADC1R_SHIFT           0  /* AIF2DACR_TO_AIF1ADC1R */
+#define WM8994_AIF2DACR_TO_AIF1ADC1R_WIDTH           1  /* AIF2DACR_TO_AIF1ADC1R */
+
+/*
+ * R1544 (0x608) - AIF1 ADC2 Left Mixer Routing
+ */
+#define WM8994_ADC2L_TO_AIF1ADC2L               0x0002  /* ADC2L_TO_AIF1ADC2L */
+#define WM8994_ADC2L_TO_AIF1ADC2L_MASK          0x0002  /* ADC2L_TO_AIF1ADC2L */
+#define WM8994_ADC2L_TO_AIF1ADC2L_SHIFT              1  /* ADC2L_TO_AIF1ADC2L */
+#define WM8994_ADC2L_TO_AIF1ADC2L_WIDTH              1  /* ADC2L_TO_AIF1ADC2L */
+#define WM8994_AIF2DACL_TO_AIF1ADC2L            0x0001  /* AIF2DACL_TO_AIF1ADC2L */
+#define WM8994_AIF2DACL_TO_AIF1ADC2L_MASK       0x0001  /* AIF2DACL_TO_AIF1ADC2L */
+#define WM8994_AIF2DACL_TO_AIF1ADC2L_SHIFT           0  /* AIF2DACL_TO_AIF1ADC2L */
+#define WM8994_AIF2DACL_TO_AIF1ADC2L_WIDTH           1  /* AIF2DACL_TO_AIF1ADC2L */
+
+/*
+ * R1545 (0x609) - AIF1 ADC2 Right mixer Routing
+ */
+#define WM8994_ADC2R_TO_AIF1ADC2R               0x0002  /* ADC2R_TO_AIF1ADC2R */
+#define WM8994_ADC2R_TO_AIF1ADC2R_MASK          0x0002  /* ADC2R_TO_AIF1ADC2R */
+#define WM8994_ADC2R_TO_AIF1ADC2R_SHIFT              1  /* ADC2R_TO_AIF1ADC2R */
+#define WM8994_ADC2R_TO_AIF1ADC2R_WIDTH              1  /* ADC2R_TO_AIF1ADC2R */
+#define WM8994_AIF2DACR_TO_AIF1ADC2R            0x0001  /* AIF2DACR_TO_AIF1ADC2R */
+#define WM8994_AIF2DACR_TO_AIF1ADC2R_MASK       0x0001  /* AIF2DACR_TO_AIF1ADC2R */
+#define WM8994_AIF2DACR_TO_AIF1ADC2R_SHIFT           0  /* AIF2DACR_TO_AIF1ADC2R */
+#define WM8994_AIF2DACR_TO_AIF1ADC2R_WIDTH           1  /* AIF2DACR_TO_AIF1ADC2R */
+
+/*
+ * R1552 (0x610) - DAC1 Left Volume
+ */
+#define WM8994_DAC1L_MUTE                       0x0200  /* DAC1L_MUTE */
+#define WM8994_DAC1L_MUTE_MASK                  0x0200  /* DAC1L_MUTE */
+#define WM8994_DAC1L_MUTE_SHIFT                      9  /* DAC1L_MUTE */
+#define WM8994_DAC1L_MUTE_WIDTH                      1  /* DAC1L_MUTE */
+#define WM8994_DAC1_VU                          0x0100  /* DAC1_VU */
+#define WM8994_DAC1_VU_MASK                     0x0100  /* DAC1_VU */
+#define WM8994_DAC1_VU_SHIFT                         8  /* DAC1_VU */
+#define WM8994_DAC1_VU_WIDTH                         1  /* DAC1_VU */
+#define WM8994_DAC1L_VOL_MASK                   0x00FF  /* DAC1L_VOL - [7:0] */
+#define WM8994_DAC1L_VOL_SHIFT                       0  /* DAC1L_VOL - [7:0] */
+#define WM8994_DAC1L_VOL_WIDTH                       8  /* DAC1L_VOL - [7:0] */
+
+/*
+ * R1553 (0x611) - DAC1 Right Volume
+ */
+#define WM8994_DAC1R_MUTE                       0x0200  /* DAC1R_MUTE */
+#define WM8994_DAC1R_MUTE_MASK                  0x0200  /* DAC1R_MUTE */
+#define WM8994_DAC1R_MUTE_SHIFT                      9  /* DAC1R_MUTE */
+#define WM8994_DAC1R_MUTE_WIDTH                      1  /* DAC1R_MUTE */
+#define WM8994_DAC1_VU                          0x0100  /* DAC1_VU */
+#define WM8994_DAC1_VU_MASK                     0x0100  /* DAC1_VU */
+#define WM8994_DAC1_VU_SHIFT                         8  /* DAC1_VU */
+#define WM8994_DAC1_VU_WIDTH                         1  /* DAC1_VU */
+#define WM8994_DAC1R_VOL_MASK                   0x00FF  /* DAC1R_VOL - [7:0] */
+#define WM8994_DAC1R_VOL_SHIFT                       0  /* DAC1R_VOL - [7:0] */
+#define WM8994_DAC1R_VOL_WIDTH                       8  /* DAC1R_VOL - [7:0] */
+
+/*
+ * R1554 (0x612) - DAC2 Left Volume
+ */
+#define WM8994_DAC2L_MUTE                       0x0200  /* DAC2L_MUTE */
+#define WM8994_DAC2L_MUTE_MASK                  0x0200  /* DAC2L_MUTE */
+#define WM8994_DAC2L_MUTE_SHIFT                      9  /* DAC2L_MUTE */
+#define WM8994_DAC2L_MUTE_WIDTH                      1  /* DAC2L_MUTE */
+#define WM8994_DAC2_VU                          0x0100  /* DAC2_VU */
+#define WM8994_DAC2_VU_MASK                     0x0100  /* DAC2_VU */
+#define WM8994_DAC2_VU_SHIFT                         8  /* DAC2_VU */
+#define WM8994_DAC2_VU_WIDTH                         1  /* DAC2_VU */
+#define WM8994_DAC2L_VOL_MASK                   0x00FF  /* DAC2L_VOL - [7:0] */
+#define WM8994_DAC2L_VOL_SHIFT                       0  /* DAC2L_VOL - [7:0] */
+#define WM8994_DAC2L_VOL_WIDTH                       8  /* DAC2L_VOL - [7:0] */
+
+/*
+ * R1555 (0x613) - DAC2 Right Volume
+ */
+#define WM8994_DAC2R_MUTE                       0x0200  /* DAC2R_MUTE */
+#define WM8994_DAC2R_MUTE_MASK                  0x0200  /* DAC2R_MUTE */
+#define WM8994_DAC2R_MUTE_SHIFT                      9  /* DAC2R_MUTE */
+#define WM8994_DAC2R_MUTE_WIDTH                      1  /* DAC2R_MUTE */
+#define WM8994_DAC2_VU                          0x0100  /* DAC2_VU */
+#define WM8994_DAC2_VU_MASK                     0x0100  /* DAC2_VU */
+#define WM8994_DAC2_VU_SHIFT                         8  /* DAC2_VU */
+#define WM8994_DAC2_VU_WIDTH                         1  /* DAC2_VU */
+#define WM8994_DAC2R_VOL_MASK                   0x00FF  /* DAC2R_VOL - [7:0] */
+#define WM8994_DAC2R_VOL_SHIFT                       0  /* DAC2R_VOL - [7:0] */
+#define WM8994_DAC2R_VOL_WIDTH                       8  /* DAC2R_VOL - [7:0] */
+
+/*
+ * R1556 (0x614) - DAC Softmute
+ */
+#define WM8994_DAC_SOFTMUTEMODE                 0x0002  /* DAC_SOFTMUTEMODE */
+#define WM8994_DAC_SOFTMUTEMODE_MASK            0x0002  /* DAC_SOFTMUTEMODE */
+#define WM8994_DAC_SOFTMUTEMODE_SHIFT                1  /* DAC_SOFTMUTEMODE */
+#define WM8994_DAC_SOFTMUTEMODE_WIDTH                1  /* DAC_SOFTMUTEMODE */
+#define WM8994_DAC_MUTERATE                     0x0001  /* DAC_MUTERATE */
+#define WM8994_DAC_MUTERATE_MASK                0x0001  /* DAC_MUTERATE */
+#define WM8994_DAC_MUTERATE_SHIFT                    0  /* DAC_MUTERATE */
+#define WM8994_DAC_MUTERATE_WIDTH                    1  /* DAC_MUTERATE */
+
+/*
+ * R1568 (0x620) - Oversampling
+ */
+#define WM8994_ADC_OSR128                       0x0002  /* ADC_OSR128 */
+#define WM8994_ADC_OSR128_MASK                  0x0002  /* ADC_OSR128 */
+#define WM8994_ADC_OSR128_SHIFT                      1  /* ADC_OSR128 */
+#define WM8994_ADC_OSR128_WIDTH                      1  /* ADC_OSR128 */
+#define WM8994_DAC_OSR128                       0x0001  /* DAC_OSR128 */
+#define WM8994_DAC_OSR128_MASK                  0x0001  /* DAC_OSR128 */
+#define WM8994_DAC_OSR128_SHIFT                      0  /* DAC_OSR128 */
+#define WM8994_DAC_OSR128_WIDTH                      1  /* DAC_OSR128 */
+
+/*
+ * R1569 (0x621) - Sidetone
+ */
+#define WM8994_ST_HPF_CUT_MASK                  0x0380  /* ST_HPF_CUT - [9:7] */
+#define WM8994_ST_HPF_CUT_SHIFT                      7  /* ST_HPF_CUT - [9:7] */
+#define WM8994_ST_HPF_CUT_WIDTH                      3  /* ST_HPF_CUT - [9:7] */
+#define WM8994_ST_HPF                           0x0040  /* ST_HPF */
+#define WM8994_ST_HPF_MASK                      0x0040  /* ST_HPF */
+#define WM8994_ST_HPF_SHIFT                          6  /* ST_HPF */
+#define WM8994_ST_HPF_WIDTH                          1  /* ST_HPF */
+#define WM8994_STR_SEL                          0x0002  /* STR_SEL */
+#define WM8994_STR_SEL_MASK                     0x0002  /* STR_SEL */
+#define WM8994_STR_SEL_SHIFT                         1  /* STR_SEL */
+#define WM8994_STR_SEL_WIDTH                         1  /* STR_SEL */
+#define WM8994_STL_SEL                          0x0001  /* STL_SEL */
+#define WM8994_STL_SEL_MASK                     0x0001  /* STL_SEL */
+#define WM8994_STL_SEL_SHIFT                         0  /* STL_SEL */
+#define WM8994_STL_SEL_WIDTH                         1  /* STL_SEL */
+
+/*
+ * R1824 (0x720) - Pull Control (1)
+ */
+#define WM8994_DMICDAT2_PU                      0x0800  /* DMICDAT2_PU */
+#define WM8994_DMICDAT2_PU_MASK                 0x0800  /* DMICDAT2_PU */
+#define WM8994_DMICDAT2_PU_SHIFT                    11  /* DMICDAT2_PU */
+#define WM8994_DMICDAT2_PU_WIDTH                     1  /* DMICDAT2_PU */
+#define WM8994_DMICDAT2_PD                      0x0400  /* DMICDAT2_PD */
+#define WM8994_DMICDAT2_PD_MASK                 0x0400  /* DMICDAT2_PD */
+#define WM8994_DMICDAT2_PD_SHIFT                    10  /* DMICDAT2_PD */
+#define WM8994_DMICDAT2_PD_WIDTH                     1  /* DMICDAT2_PD */
+#define WM8994_DMICDAT1_PU                      0x0200  /* DMICDAT1_PU */
+#define WM8994_DMICDAT1_PU_MASK                 0x0200  /* DMICDAT1_PU */
+#define WM8994_DMICDAT1_PU_SHIFT                     9  /* DMICDAT1_PU */
+#define WM8994_DMICDAT1_PU_WIDTH                     1  /* DMICDAT1_PU */
+#define WM8994_DMICDAT1_PD                      0x0100  /* DMICDAT1_PD */
+#define WM8994_DMICDAT1_PD_MASK                 0x0100  /* DMICDAT1_PD */
+#define WM8994_DMICDAT1_PD_SHIFT                     8  /* DMICDAT1_PD */
+#define WM8994_DMICDAT1_PD_WIDTH                     1  /* DMICDAT1_PD */
+#define WM8994_MCLK1_PU                         0x0080  /* MCLK1_PU */
+#define WM8994_MCLK1_PU_MASK                    0x0080  /* MCLK1_PU */
+#define WM8994_MCLK1_PU_SHIFT                        7  /* MCLK1_PU */
+#define WM8994_MCLK1_PU_WIDTH                        1  /* MCLK1_PU */
+#define WM8994_MCLK1_PD                         0x0040  /* MCLK1_PD */
+#define WM8994_MCLK1_PD_MASK                    0x0040  /* MCLK1_PD */
+#define WM8994_MCLK1_PD_SHIFT                        6  /* MCLK1_PD */
+#define WM8994_MCLK1_PD_WIDTH                        1  /* MCLK1_PD */
+#define WM8994_DACDAT1_PU                       0x0020  /* DACDAT1_PU */
+#define WM8994_DACDAT1_PU_MASK                  0x0020  /* DACDAT1_PU */
+#define WM8994_DACDAT1_PU_SHIFT                      5  /* DACDAT1_PU */
+#define WM8994_DACDAT1_PU_WIDTH                      1  /* DACDAT1_PU */
+#define WM8994_DACDAT1_PD                       0x0010  /* DACDAT1_PD */
+#define WM8994_DACDAT1_PD_MASK                  0x0010  /* DACDAT1_PD */
+#define WM8994_DACDAT1_PD_SHIFT                      4  /* DACDAT1_PD */
+#define WM8994_DACDAT1_PD_WIDTH                      1  /* DACDAT1_PD */
+#define WM8994_DACLRCLK1_PU                     0x0008  /* DACLRCLK1_PU */
+#define WM8994_DACLRCLK1_PU_MASK                0x0008  /* DACLRCLK1_PU */
+#define WM8994_DACLRCLK1_PU_SHIFT                    3  /* DACLRCLK1_PU */
+#define WM8994_DACLRCLK1_PU_WIDTH                    1  /* DACLRCLK1_PU */
+#define WM8994_DACLRCLK1_PD                     0x0004  /* DACLRCLK1_PD */
+#define WM8994_DACLRCLK1_PD_MASK                0x0004  /* DACLRCLK1_PD */
+#define WM8994_DACLRCLK1_PD_SHIFT                    2  /* DACLRCLK1_PD */
+#define WM8994_DACLRCLK1_PD_WIDTH                    1  /* DACLRCLK1_PD */
+#define WM8994_BCLK1_PU                         0x0002  /* BCLK1_PU */
+#define WM8994_BCLK1_PU_MASK                    0x0002  /* BCLK1_PU */
+#define WM8994_BCLK1_PU_SHIFT                        1  /* BCLK1_PU */
+#define WM8994_BCLK1_PU_WIDTH                        1  /* BCLK1_PU */
+#define WM8994_BCLK1_PD                         0x0001  /* BCLK1_PD */
+#define WM8994_BCLK1_PD_MASK                    0x0001  /* BCLK1_PD */
+#define WM8994_BCLK1_PD_SHIFT                        0  /* BCLK1_PD */
+#define WM8994_BCLK1_PD_WIDTH                        1  /* BCLK1_PD */
+
+/*
+ * R1825 (0x721) - Pull Control (2)
+ */
+#define WM8994_CSNADDR_PD                       0x0100  /* CSNADDR_PD */
+#define WM8994_CSNADDR_PD_MASK                  0x0100  /* CSNADDR_PD */
+#define WM8994_CSNADDR_PD_SHIFT                      8  /* CSNADDR_PD */
+#define WM8994_CSNADDR_PD_WIDTH                      1  /* CSNADDR_PD */
+#define WM8994_LDO2ENA_PD                       0x0040  /* LDO2ENA_PD */
+#define WM8994_LDO2ENA_PD_MASK                  0x0040  /* LDO2ENA_PD */
+#define WM8994_LDO2ENA_PD_SHIFT                      6  /* LDO2ENA_PD */
+#define WM8994_LDO2ENA_PD_WIDTH                      1  /* LDO2ENA_PD */
+#define WM8994_LDO1ENA_PD                       0x0010  /* LDO1ENA_PD */
+#define WM8994_LDO1ENA_PD_MASK                  0x0010  /* LDO1ENA_PD */
+#define WM8994_LDO1ENA_PD_SHIFT                      4  /* LDO1ENA_PD */
+#define WM8994_LDO1ENA_PD_WIDTH                      1  /* LDO1ENA_PD */
+#define WM8994_CIFMODE_PD                       0x0004  /* CIFMODE_PD */
+#define WM8994_CIFMODE_PD_MASK                  0x0004  /* CIFMODE_PD */
+#define WM8994_CIFMODE_PD_SHIFT                      2  /* CIFMODE_PD */
+#define WM8994_CIFMODE_PD_WIDTH                      1  /* CIFMODE_PD */
+#define WM8994_SPKMODE_PU                       0x0002  /* SPKMODE_PU */
+#define WM8994_SPKMODE_PU_MASK                  0x0002  /* SPKMODE_PU */
+#define WM8994_SPKMODE_PU_SHIFT                      1  /* SPKMODE_PU */
+#define WM8994_SPKMODE_PU_WIDTH                      1  /* SPKMODE_PU */
+
+/*
+ * R1840 (0x730) - Interrupt Status 1
+ */
+#define WM8994_GP11_EINT                        0x0400  /* GP11_EINT */
+#define WM8994_GP11_EINT_MASK                   0x0400  /* GP11_EINT */
+#define WM8994_GP11_EINT_SHIFT                      10  /* GP11_EINT */
+#define WM8994_GP11_EINT_WIDTH                       1  /* GP11_EINT */
+#define WM8994_GP10_EINT                        0x0200  /* GP10_EINT */
+#define WM8994_GP10_EINT_MASK                   0x0200  /* GP10_EINT */
+#define WM8994_GP10_EINT_SHIFT                       9  /* GP10_EINT */
+#define WM8994_GP10_EINT_WIDTH                       1  /* GP10_EINT */
+#define WM8994_GP9_EINT                         0x0100  /* GP9_EINT */
+#define WM8994_GP9_EINT_MASK                    0x0100  /* GP9_EINT */
+#define WM8994_GP9_EINT_SHIFT                        8  /* GP9_EINT */
+#define WM8994_GP9_EINT_WIDTH                        1  /* GP9_EINT */
+#define WM8994_GP8_EINT                         0x0080  /* GP8_EINT */
+#define WM8994_GP8_EINT_MASK                    0x0080  /* GP8_EINT */
+#define WM8994_GP8_EINT_SHIFT                        7  /* GP8_EINT */
+#define WM8994_GP8_EINT_WIDTH                        1  /* GP8_EINT */
+#define WM8994_GP7_EINT                         0x0040  /* GP7_EINT */
+#define WM8994_GP7_EINT_MASK                    0x0040  /* GP7_EINT */
+#define WM8994_GP7_EINT_SHIFT                        6  /* GP7_EINT */
+#define WM8994_GP7_EINT_WIDTH                        1  /* GP7_EINT */
+#define WM8994_GP6_EINT                         0x0020  /* GP6_EINT */
+#define WM8994_GP6_EINT_MASK                    0x0020  /* GP6_EINT */
+#define WM8994_GP6_EINT_SHIFT                        5  /* GP6_EINT */
+#define WM8994_GP6_EINT_WIDTH                        1  /* GP6_EINT */
+#define WM8994_GP5_EINT                         0x0010  /* GP5_EINT */
+#define WM8994_GP5_EINT_MASK                    0x0010  /* GP5_EINT */
+#define WM8994_GP5_EINT_SHIFT                        4  /* GP5_EINT */
+#define WM8994_GP5_EINT_WIDTH                        1  /* GP5_EINT */
+#define WM8994_GP4_EINT                         0x0008  /* GP4_EINT */
+#define WM8994_GP4_EINT_MASK                    0x0008  /* GP4_EINT */
+#define WM8994_GP4_EINT_SHIFT                        3  /* GP4_EINT */
+#define WM8994_GP4_EINT_WIDTH                        1  /* GP4_EINT */
+#define WM8994_GP3_EINT                         0x0004  /* GP3_EINT */
+#define WM8994_GP3_EINT_MASK                    0x0004  /* GP3_EINT */
+#define WM8994_GP3_EINT_SHIFT                        2  /* GP3_EINT */
+#define WM8994_GP3_EINT_WIDTH                        1  /* GP3_EINT */
+#define WM8994_GP2_EINT                         0x0002  /* GP2_EINT */
+#define WM8994_GP2_EINT_MASK                    0x0002  /* GP2_EINT */
+#define WM8994_GP2_EINT_SHIFT                        1  /* GP2_EINT */
+#define WM8994_GP2_EINT_WIDTH                        1  /* GP2_EINT */
+#define WM8994_GP1_EINT                         0x0001  /* GP1_EINT */
+#define WM8994_GP1_EINT_MASK                    0x0001  /* GP1_EINT */
+#define WM8994_GP1_EINT_SHIFT                        0  /* GP1_EINT */
+#define WM8994_GP1_EINT_WIDTH                        1  /* GP1_EINT */
+
+/*
+ * R1841 (0x731) - Interrupt Status 2
+ */
+#define WM8994_TEMP_WARN_EINT                   0x8000  /* TEMP_WARN_EINT */
+#define WM8994_TEMP_WARN_EINT_MASK              0x8000  /* TEMP_WARN_EINT */
+#define WM8994_TEMP_WARN_EINT_SHIFT                 15  /* TEMP_WARN_EINT */
+#define WM8994_TEMP_WARN_EINT_WIDTH                  1  /* TEMP_WARN_EINT */
+#define WM8994_DCS_DONE_EINT                    0x4000  /* DCS_DONE_EINT */
+#define WM8994_DCS_DONE_EINT_MASK               0x4000  /* DCS_DONE_EINT */
+#define WM8994_DCS_DONE_EINT_SHIFT                  14  /* DCS_DONE_EINT */
+#define WM8994_DCS_DONE_EINT_WIDTH                   1  /* DCS_DONE_EINT */
+#define WM8994_WSEQ_DONE_EINT                   0x2000  /* WSEQ_DONE_EINT */
+#define WM8994_WSEQ_DONE_EINT_MASK              0x2000  /* WSEQ_DONE_EINT */
+#define WM8994_WSEQ_DONE_EINT_SHIFT                 13  /* WSEQ_DONE_EINT */
+#define WM8994_WSEQ_DONE_EINT_WIDTH                  1  /* WSEQ_DONE_EINT */
+#define WM8994_FIFOS_ERR_EINT                   0x1000  /* FIFOS_ERR_EINT */
+#define WM8994_FIFOS_ERR_EINT_MASK              0x1000  /* FIFOS_ERR_EINT */
+#define WM8994_FIFOS_ERR_EINT_SHIFT                 12  /* FIFOS_ERR_EINT */
+#define WM8994_FIFOS_ERR_EINT_WIDTH                  1  /* FIFOS_ERR_EINT */
+#define WM8994_AIF2DRC_SIG_DET_EINT             0x0800  /* AIF2DRC_SIG_DET_EINT */
+#define WM8994_AIF2DRC_SIG_DET_EINT_MASK        0x0800  /* AIF2DRC_SIG_DET_EINT */
+#define WM8994_AIF2DRC_SIG_DET_EINT_SHIFT           11  /* AIF2DRC_SIG_DET_EINT */
+#define WM8994_AIF2DRC_SIG_DET_EINT_WIDTH            1  /* AIF2DRC_SIG_DET_EINT */
+#define WM8994_AIF1DRC2_SIG_DET_EINT            0x0400  /* AIF1DRC2_SIG_DET_EINT */
+#define WM8994_AIF1DRC2_SIG_DET_EINT_MASK       0x0400  /* AIF1DRC2_SIG_DET_EINT */
+#define WM8994_AIF1DRC2_SIG_DET_EINT_SHIFT          10  /* AIF1DRC2_SIG_DET_EINT */
+#define WM8994_AIF1DRC2_SIG_DET_EINT_WIDTH           1  /* AIF1DRC2_SIG_DET_EINT */
+#define WM8994_AIF1DRC1_SIG_DET_EINT            0x0200  /* AIF1DRC1_SIG_DET_EINT */
+#define WM8994_AIF1DRC1_SIG_DET_EINT_MASK       0x0200  /* AIF1DRC1_SIG_DET_EINT */
+#define WM8994_AIF1DRC1_SIG_DET_EINT_SHIFT           9  /* AIF1DRC1_SIG_DET_EINT */
+#define WM8994_AIF1DRC1_SIG_DET_EINT_WIDTH           1  /* AIF1DRC1_SIG_DET_EINT */
+#define WM8994_SRC2_LOCK_EINT                   0x0100  /* SRC2_LOCK_EINT */
+#define WM8994_SRC2_LOCK_EINT_MASK              0x0100  /* SRC2_LOCK_EINT */
+#define WM8994_SRC2_LOCK_EINT_SHIFT                  8  /* SRC2_LOCK_EINT */
+#define WM8994_SRC2_LOCK_EINT_WIDTH                  1  /* SRC2_LOCK_EINT */
+#define WM8994_SRC1_LOCK_EINT                   0x0080  /* SRC1_LOCK_EINT */
+#define WM8994_SRC1_LOCK_EINT_MASK              0x0080  /* SRC1_LOCK_EINT */
+#define WM8994_SRC1_LOCK_EINT_SHIFT                  7  /* SRC1_LOCK_EINT */
+#define WM8994_SRC1_LOCK_EINT_WIDTH                  1  /* SRC1_LOCK_EINT */
+#define WM8994_FLL2_LOCK_EINT                   0x0040  /* FLL2_LOCK_EINT */
+#define WM8994_FLL2_LOCK_EINT_MASK              0x0040  /* FLL2_LOCK_EINT */
+#define WM8994_FLL2_LOCK_EINT_SHIFT                  6  /* FLL2_LOCK_EINT */
+#define WM8994_FLL2_LOCK_EINT_WIDTH                  1  /* FLL2_LOCK_EINT */
+#define WM8994_FLL1_LOCK_EINT                   0x0020  /* FLL1_LOCK_EINT */
+#define WM8994_FLL1_LOCK_EINT_MASK              0x0020  /* FLL1_LOCK_EINT */
+#define WM8994_FLL1_LOCK_EINT_SHIFT                  5  /* FLL1_LOCK_EINT */
+#define WM8994_FLL1_LOCK_EINT_WIDTH                  1  /* FLL1_LOCK_EINT */
+#define WM8994_MIC2_SHRT_EINT                   0x0010  /* MIC2_SHRT_EINT */
+#define WM8994_MIC2_SHRT_EINT_MASK              0x0010  /* MIC2_SHRT_EINT */
+#define WM8994_MIC2_SHRT_EINT_SHIFT                  4  /* MIC2_SHRT_EINT */
+#define WM8994_MIC2_SHRT_EINT_WIDTH                  1  /* MIC2_SHRT_EINT */
+#define WM8994_MIC2_DET_EINT                    0x0008  /* MIC2_DET_EINT */
+#define WM8994_MIC2_DET_EINT_MASK               0x0008  /* MIC2_DET_EINT */
+#define WM8994_MIC2_DET_EINT_SHIFT                   3  /* MIC2_DET_EINT */
+#define WM8994_MIC2_DET_EINT_WIDTH                   1  /* MIC2_DET_EINT */
+#define WM8994_MIC1_SHRT_EINT                   0x0004  /* MIC1_SHRT_EINT */
+#define WM8994_MIC1_SHRT_EINT_MASK              0x0004  /* MIC1_SHRT_EINT */
+#define WM8994_MIC1_SHRT_EINT_SHIFT                  2  /* MIC1_SHRT_EINT */
+#define WM8994_MIC1_SHRT_EINT_WIDTH                  1  /* MIC1_SHRT_EINT */
+#define WM8994_MIC1_DET_EINT                    0x0002  /* MIC1_DET_EINT */
+#define WM8994_MIC1_DET_EINT_MASK               0x0002  /* MIC1_DET_EINT */
+#define WM8994_MIC1_DET_EINT_SHIFT                   1  /* MIC1_DET_EINT */
+#define WM8994_MIC1_DET_EINT_WIDTH                   1  /* MIC1_DET_EINT */
+#define WM8994_TEMP_SHUT_EINT                   0x0001  /* TEMP_SHUT_EINT */
+#define WM8994_TEMP_SHUT_EINT_MASK              0x0001  /* TEMP_SHUT_EINT */
+#define WM8994_TEMP_SHUT_EINT_SHIFT                  0  /* TEMP_SHUT_EINT */
+#define WM8994_TEMP_SHUT_EINT_WIDTH                  1  /* TEMP_SHUT_EINT */
+
+/*
+ * R1842 (0x732) - Interrupt Raw Status 2
+ */
+#define WM8994_TEMP_WARN_STS                    0x8000  /* TEMP_WARN_STS */
+#define WM8994_TEMP_WARN_STS_MASK               0x8000  /* TEMP_WARN_STS */
+#define WM8994_TEMP_WARN_STS_SHIFT                  15  /* TEMP_WARN_STS */
+#define WM8994_TEMP_WARN_STS_WIDTH                   1  /* TEMP_WARN_STS */
+#define WM8994_DCS_DONE_STS                     0x4000  /* DCS_DONE_STS */
+#define WM8994_DCS_DONE_STS_MASK                0x4000  /* DCS_DONE_STS */
+#define WM8994_DCS_DONE_STS_SHIFT                   14  /* DCS_DONE_STS */
+#define WM8994_DCS_DONE_STS_WIDTH                    1  /* DCS_DONE_STS */
+#define WM8994_WSEQ_DONE_STS                    0x2000  /* WSEQ_DONE_STS */
+#define WM8994_WSEQ_DONE_STS_MASK               0x2000  /* WSEQ_DONE_STS */
+#define WM8994_WSEQ_DONE_STS_SHIFT                  13  /* WSEQ_DONE_STS */
+#define WM8994_WSEQ_DONE_STS_WIDTH                   1  /* WSEQ_DONE_STS */
+#define WM8994_FIFOS_ERR_STS                    0x1000  /* FIFOS_ERR_STS */
+#define WM8994_FIFOS_ERR_STS_MASK               0x1000  /* FIFOS_ERR_STS */
+#define WM8994_FIFOS_ERR_STS_SHIFT                  12  /* FIFOS_ERR_STS */
+#define WM8994_FIFOS_ERR_STS_WIDTH                   1  /* FIFOS_ERR_STS */
+#define WM8994_AIF2DRC_SIG_DET_STS              0x0800  /* AIF2DRC_SIG_DET_STS */
+#define WM8994_AIF2DRC_SIG_DET_STS_MASK         0x0800  /* AIF2DRC_SIG_DET_STS */
+#define WM8994_AIF2DRC_SIG_DET_STS_SHIFT            11  /* AIF2DRC_SIG_DET_STS */
+#define WM8994_AIF2DRC_SIG_DET_STS_WIDTH             1  /* AIF2DRC_SIG_DET_STS */
+#define WM8994_AIF1DRC2_SIG_DET_STS             0x0400  /* AIF1DRC2_SIG_DET_STS */
+#define WM8994_AIF1DRC2_SIG_DET_STS_MASK        0x0400  /* AIF1DRC2_SIG_DET_STS */
+#define WM8994_AIF1DRC2_SIG_DET_STS_SHIFT           10  /* AIF1DRC2_SIG_DET_STS */
+#define WM8994_AIF1DRC2_SIG_DET_STS_WIDTH            1  /* AIF1DRC2_SIG_DET_STS */
+#define WM8994_AIF1DRC1_SIG_DET_STS             0x0200  /* AIF1DRC1_SIG_DET_STS */
+#define WM8994_AIF1DRC1_SIG_DET_STS_MASK        0x0200  /* AIF1DRC1_SIG_DET_STS */
+#define WM8994_AIF1DRC1_SIG_DET_STS_SHIFT            9  /* AIF1DRC1_SIG_DET_STS */
+#define WM8994_AIF1DRC1_SIG_DET_STS_WIDTH            1  /* AIF1DRC1_SIG_DET_STS */
+#define WM8994_SRC2_LOCK_STS                    0x0100  /* SRC2_LOCK_STS */
+#define WM8994_SRC2_LOCK_STS_MASK               0x0100  /* SRC2_LOCK_STS */
+#define WM8994_SRC2_LOCK_STS_SHIFT                   8  /* SRC2_LOCK_STS */
+#define WM8994_SRC2_LOCK_STS_WIDTH                   1  /* SRC2_LOCK_STS */
+#define WM8994_SRC1_LOCK_STS                    0x0080  /* SRC1_LOCK_STS */
+#define WM8994_SRC1_LOCK_STS_MASK               0x0080  /* SRC1_LOCK_STS */
+#define WM8994_SRC1_LOCK_STS_SHIFT                   7  /* SRC1_LOCK_STS */
+#define WM8994_SRC1_LOCK_STS_WIDTH                   1  /* SRC1_LOCK_STS */
+#define WM8994_FLL2_LOCK_STS                    0x0040  /* FLL2_LOCK_STS */
+#define WM8994_FLL2_LOCK_STS_MASK               0x0040  /* FLL2_LOCK_STS */
+#define WM8994_FLL2_LOCK_STS_SHIFT                   6  /* FLL2_LOCK_STS */
+#define WM8994_FLL2_LOCK_STS_WIDTH                   1  /* FLL2_LOCK_STS */
+#define WM8994_FLL1_LOCK_STS                    0x0020  /* FLL1_LOCK_STS */
+#define WM8994_FLL1_LOCK_STS_MASK               0x0020  /* FLL1_LOCK_STS */
+#define WM8994_FLL1_LOCK_STS_SHIFT                   5  /* FLL1_LOCK_STS */
+#define WM8994_FLL1_LOCK_STS_WIDTH                   1  /* FLL1_LOCK_STS */
+#define WM8994_MIC2_SHRT_STS                    0x0010  /* MIC2_SHRT_STS */
+#define WM8994_MIC2_SHRT_STS_MASK               0x0010  /* MIC2_SHRT_STS */
+#define WM8994_MIC2_SHRT_STS_SHIFT                   4  /* MIC2_SHRT_STS */
+#define WM8994_MIC2_SHRT_STS_WIDTH                   1  /* MIC2_SHRT_STS */
+#define WM8994_MIC2_DET_STS                     0x0008  /* MIC2_DET_STS */
+#define WM8994_MIC2_DET_STS_MASK                0x0008  /* MIC2_DET_STS */
+#define WM8994_MIC2_DET_STS_SHIFT                    3  /* MIC2_DET_STS */
+#define WM8994_MIC2_DET_STS_WIDTH                    1  /* MIC2_DET_STS */
+#define WM8994_MIC1_SHRT_STS                    0x0004  /* MIC1_SHRT_STS */
+#define WM8994_MIC1_SHRT_STS_MASK               0x0004  /* MIC1_SHRT_STS */
+#define WM8994_MIC1_SHRT_STS_SHIFT                   2  /* MIC1_SHRT_STS */
+#define WM8994_MIC1_SHRT_STS_WIDTH                   1  /* MIC1_SHRT_STS */
+#define WM8994_MIC1_DET_STS                     0x0002  /* MIC1_DET_STS */
+#define WM8994_MIC1_DET_STS_MASK                0x0002  /* MIC1_DET_STS */
+#define WM8994_MIC1_DET_STS_SHIFT                    1  /* MIC1_DET_STS */
+#define WM8994_MIC1_DET_STS_WIDTH                    1  /* MIC1_DET_STS */
+#define WM8994_TEMP_SHUT_STS                    0x0001  /* TEMP_SHUT_STS */
+#define WM8994_TEMP_SHUT_STS_MASK               0x0001  /* TEMP_SHUT_STS */
+#define WM8994_TEMP_SHUT_STS_SHIFT                   0  /* TEMP_SHUT_STS */
+#define WM8994_TEMP_SHUT_STS_WIDTH                   1  /* TEMP_SHUT_STS */
+
+/*
+ * R1848 (0x738) - Interrupt Status 1 Mask
+ */
+#define WM8994_IM_GP11_EINT                     0x0400  /* IM_GP11_EINT */
+#define WM8994_IM_GP11_EINT_MASK                0x0400  /* IM_GP11_EINT */
+#define WM8994_IM_GP11_EINT_SHIFT                   10  /* IM_GP11_EINT */
+#define WM8994_IM_GP11_EINT_WIDTH                    1  /* IM_GP11_EINT */
+#define WM8994_IM_GP10_EINT                     0x0200  /* IM_GP10_EINT */
+#define WM8994_IM_GP10_EINT_MASK                0x0200  /* IM_GP10_EINT */
+#define WM8994_IM_GP10_EINT_SHIFT                    9  /* IM_GP10_EINT */
+#define WM8994_IM_GP10_EINT_WIDTH                    1  /* IM_GP10_EINT */
+#define WM8994_IM_GP9_EINT                      0x0100  /* IM_GP9_EINT */
+#define WM8994_IM_GP9_EINT_MASK                 0x0100  /* IM_GP9_EINT */
+#define WM8994_IM_GP9_EINT_SHIFT                     8  /* IM_GP9_EINT */
+#define WM8994_IM_GP9_EINT_WIDTH                     1  /* IM_GP9_EINT */
+#define WM8994_IM_GP8_EINT                      0x0080  /* IM_GP8_EINT */
+#define WM8994_IM_GP8_EINT_MASK                 0x0080  /* IM_GP8_EINT */
+#define WM8994_IM_GP8_EINT_SHIFT                     7  /* IM_GP8_EINT */
+#define WM8994_IM_GP8_EINT_WIDTH                     1  /* IM_GP8_EINT */
+#define WM8994_IM_GP7_EINT                      0x0040  /* IM_GP7_EINT */
+#define WM8994_IM_GP7_EINT_MASK                 0x0040  /* IM_GP7_EINT */
+#define WM8994_IM_GP7_EINT_SHIFT                     6  /* IM_GP7_EINT */
+#define WM8994_IM_GP7_EINT_WIDTH                     1  /* IM_GP7_EINT */
+#define WM8994_IM_GP6_EINT                      0x0020  /* IM_GP6_EINT */
+#define WM8994_IM_GP6_EINT_MASK                 0x0020  /* IM_GP6_EINT */
+#define WM8994_IM_GP6_EINT_SHIFT                     5  /* IM_GP6_EINT */
+#define WM8994_IM_GP6_EINT_WIDTH                     1  /* IM_GP6_EINT */
+#define WM8994_IM_GP5_EINT                      0x0010  /* IM_GP5_EINT */
+#define WM8994_IM_GP5_EINT_MASK                 0x0010  /* IM_GP5_EINT */
+#define WM8994_IM_GP5_EINT_SHIFT                     4  /* IM_GP5_EINT */
+#define WM8994_IM_GP5_EINT_WIDTH                     1  /* IM_GP5_EINT */
+#define WM8994_IM_GP4_EINT                      0x0008  /* IM_GP4_EINT */
+#define WM8994_IM_GP4_EINT_MASK                 0x0008  /* IM_GP4_EINT */
+#define WM8994_IM_GP4_EINT_SHIFT                     3  /* IM_GP4_EINT */
+#define WM8994_IM_GP4_EINT_WIDTH                     1  /* IM_GP4_EINT */
+#define WM8994_IM_GP3_EINT                      0x0004  /* IM_GP3_EINT */
+#define WM8994_IM_GP3_EINT_MASK                 0x0004  /* IM_GP3_EINT */
+#define WM8994_IM_GP3_EINT_SHIFT                     2  /* IM_GP3_EINT */
+#define WM8994_IM_GP3_EINT_WIDTH                     1  /* IM_GP3_EINT */
+#define WM8994_IM_GP2_EINT                      0x0002  /* IM_GP2_EINT */
+#define WM8994_IM_GP2_EINT_MASK                 0x0002  /* IM_GP2_EINT */
+#define WM8994_IM_GP2_EINT_SHIFT                     1  /* IM_GP2_EINT */
+#define WM8994_IM_GP2_EINT_WIDTH                     1  /* IM_GP2_EINT */
+#define WM8994_IM_GP1_EINT                      0x0001  /* IM_GP1_EINT */
+#define WM8994_IM_GP1_EINT_MASK                 0x0001  /* IM_GP1_EINT */
+#define WM8994_IM_GP1_EINT_SHIFT                     0  /* IM_GP1_EINT */
+#define WM8994_IM_GP1_EINT_WIDTH                     1  /* IM_GP1_EINT */
+
+/*
+ * R1849 (0x739) - Interrupt Status 2 Mask
+ */
+#define WM8994_IM_TEMP_WARN_EINT                0x8000  /* IM_TEMP_WARN_EINT */
+#define WM8994_IM_TEMP_WARN_EINT_MASK           0x8000  /* IM_TEMP_WARN_EINT */
+#define WM8994_IM_TEMP_WARN_EINT_SHIFT              15  /* IM_TEMP_WARN_EINT */
+#define WM8994_IM_TEMP_WARN_EINT_WIDTH               1  /* IM_TEMP_WARN_EINT */
+#define WM8994_IM_DCS_DONE_EINT                 0x4000  /* IM_DCS_DONE_EINT */
+#define WM8994_IM_DCS_DONE_EINT_MASK            0x4000  /* IM_DCS_DONE_EINT */
+#define WM8994_IM_DCS_DONE_EINT_SHIFT               14  /* IM_DCS_DONE_EINT */
+#define WM8994_IM_DCS_DONE_EINT_WIDTH                1  /* IM_DCS_DONE_EINT */
+#define WM8994_IM_WSEQ_DONE_EINT                0x2000  /* IM_WSEQ_DONE_EINT */
+#define WM8994_IM_WSEQ_DONE_EINT_MASK           0x2000  /* IM_WSEQ_DONE_EINT */
+#define WM8994_IM_WSEQ_DONE_EINT_SHIFT              13  /* IM_WSEQ_DONE_EINT */
+#define WM8994_IM_WSEQ_DONE_EINT_WIDTH               1  /* IM_WSEQ_DONE_EINT */
+#define WM8994_IM_FIFOS_ERR_EINT                0x1000  /* IM_FIFOS_ERR_EINT */
+#define WM8994_IM_FIFOS_ERR_EINT_MASK           0x1000  /* IM_FIFOS_ERR_EINT */
+#define WM8994_IM_FIFOS_ERR_EINT_SHIFT              12  /* IM_FIFOS_ERR_EINT */
+#define WM8994_IM_FIFOS_ERR_EINT_WIDTH               1  /* IM_FIFOS_ERR_EINT */
+#define WM8994_IM_AIF2DRC_SIG_DET_EINT          0x0800  /* IM_AIF2DRC_SIG_DET_EINT */
+#define WM8994_IM_AIF2DRC_SIG_DET_EINT_MASK     0x0800  /* IM_AIF2DRC_SIG_DET_EINT */
+#define WM8994_IM_AIF2DRC_SIG_DET_EINT_SHIFT        11  /* IM_AIF2DRC_SIG_DET_EINT */
+#define WM8994_IM_AIF2DRC_SIG_DET_EINT_WIDTH         1  /* IM_AIF2DRC_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC2_SIG_DET_EINT         0x0400  /* IM_AIF1DRC2_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC2_SIG_DET_EINT_MASK    0x0400  /* IM_AIF1DRC2_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC2_SIG_DET_EINT_SHIFT       10  /* IM_AIF1DRC2_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC2_SIG_DET_EINT_WIDTH        1  /* IM_AIF1DRC2_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC1_SIG_DET_EINT         0x0200  /* IM_AIF1DRC1_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC1_SIG_DET_EINT_MASK    0x0200  /* IM_AIF1DRC1_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC1_SIG_DET_EINT_SHIFT        9  /* IM_AIF1DRC1_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC1_SIG_DET_EINT_WIDTH        1  /* IM_AIF1DRC1_SIG_DET_EINT */
+#define WM8994_IM_SRC2_LOCK_EINT                0x0100  /* IM_SRC2_LOCK_EINT */
+#define WM8994_IM_SRC2_LOCK_EINT_MASK           0x0100  /* IM_SRC2_LOCK_EINT */
+#define WM8994_IM_SRC2_LOCK_EINT_SHIFT               8  /* IM_SRC2_LOCK_EINT */
+#define WM8994_IM_SRC2_LOCK_EINT_WIDTH               1  /* IM_SRC2_LOCK_EINT */
+#define WM8994_IM_SRC1_LOCK_EINT                0x0080  /* IM_SRC1_LOCK_EINT */
+#define WM8994_IM_SRC1_LOCK_EINT_MASK           0x0080  /* IM_SRC1_LOCK_EINT */
+#define WM8994_IM_SRC1_LOCK_EINT_SHIFT               7  /* IM_SRC1_LOCK_EINT */
+#define WM8994_IM_SRC1_LOCK_EINT_WIDTH               1  /* IM_SRC1_LOCK_EINT */
+#define WM8994_IM_FLL2_LOCK_EINT                0x0040  /* IM_FLL2_LOCK_EINT */
+#define WM8994_IM_FLL2_LOCK_EINT_MASK           0x0040  /* IM_FLL2_LOCK_EINT */
+#define WM8994_IM_FLL2_LOCK_EINT_SHIFT               6  /* IM_FLL2_LOCK_EINT */
+#define WM8994_IM_FLL2_LOCK_EINT_WIDTH               1  /* IM_FLL2_LOCK_EINT */
+#define WM8994_IM_FLL1_LOCK_EINT                0x0020  /* IM_FLL1_LOCK_EINT */
+#define WM8994_IM_FLL1_LOCK_EINT_MASK           0x0020  /* IM_FLL1_LOCK_EINT */
+#define WM8994_IM_FLL1_LOCK_EINT_SHIFT               5  /* IM_FLL1_LOCK_EINT */
+#define WM8994_IM_FLL1_LOCK_EINT_WIDTH               1  /* IM_FLL1_LOCK_EINT */
+#define WM8994_IM_MIC2_SHRT_EINT                0x0010  /* IM_MIC2_SHRT_EINT */
+#define WM8994_IM_MIC2_SHRT_EINT_MASK           0x0010  /* IM_MIC2_SHRT_EINT */
+#define WM8994_IM_MIC2_SHRT_EINT_SHIFT               4  /* IM_MIC2_SHRT_EINT */
+#define WM8994_IM_MIC2_SHRT_EINT_WIDTH               1  /* IM_MIC2_SHRT_EINT */
+#define WM8994_IM_MIC2_DET_EINT                 0x0008  /* IM_MIC2_DET_EINT */
+#define WM8994_IM_MIC2_DET_EINT_MASK            0x0008  /* IM_MIC2_DET_EINT */
+#define WM8994_IM_MIC2_DET_EINT_SHIFT                3  /* IM_MIC2_DET_EINT */
+#define WM8994_IM_MIC2_DET_EINT_WIDTH                1  /* IM_MIC2_DET_EINT */
+#define WM8994_IM_MIC1_SHRT_EINT                0x0004  /* IM_MIC1_SHRT_EINT */
+#define WM8994_IM_MIC1_SHRT_EINT_MASK           0x0004  /* IM_MIC1_SHRT_EINT */
+#define WM8994_IM_MIC1_SHRT_EINT_SHIFT               2  /* IM_MIC1_SHRT_EINT */
+#define WM8994_IM_MIC1_SHRT_EINT_WIDTH               1  /* IM_MIC1_SHRT_EINT */
+#define WM8994_IM_MIC1_DET_EINT                 0x0002  /* IM_MIC1_DET_EINT */
+#define WM8994_IM_MIC1_DET_EINT_MASK            0x0002  /* IM_MIC1_DET_EINT */
+#define WM8994_IM_MIC1_DET_EINT_SHIFT                1  /* IM_MIC1_DET_EINT */
+#define WM8994_IM_MIC1_DET_EINT_WIDTH                1  /* IM_MIC1_DET_EINT */
+#define WM8994_IM_TEMP_SHUT_EINT                0x0001  /* IM_TEMP_SHUT_EINT */
+#define WM8994_IM_TEMP_SHUT_EINT_MASK           0x0001  /* IM_TEMP_SHUT_EINT */
+#define WM8994_IM_TEMP_SHUT_EINT_SHIFT               0  /* IM_TEMP_SHUT_EINT */
+#define WM8994_IM_TEMP_SHUT_EINT_WIDTH               1  /* IM_TEMP_SHUT_EINT */
+
+/*
+ * R1856 (0x740) - Interrupt Control
+ */
+#define WM8994_IM_IRQ                           0x0001  /* IM_IRQ */
+#define WM8994_IM_IRQ_MASK                      0x0001  /* IM_IRQ */
+#define WM8994_IM_IRQ_SHIFT                          0  /* IM_IRQ */
+#define WM8994_IM_IRQ_WIDTH                          1  /* IM_IRQ */
+
+/*
+ * R1864 (0x748) - IRQ Debounce
+ */
+#define WM8994_TEMP_WARN_DB                     0x0020  /* TEMP_WARN_DB */
+#define WM8994_TEMP_WARN_DB_MASK                0x0020  /* TEMP_WARN_DB */
+#define WM8994_TEMP_WARN_DB_SHIFT                    5  /* TEMP_WARN_DB */
+#define WM8994_TEMP_WARN_DB_WIDTH                    1  /* TEMP_WARN_DB */
+#define WM8994_MIC2_SHRT_DB                     0x0010  /* MIC2_SHRT_DB */
+#define WM8994_MIC2_SHRT_DB_MASK                0x0010  /* MIC2_SHRT_DB */
+#define WM8994_MIC2_SHRT_DB_SHIFT                    4  /* MIC2_SHRT_DB */
+#define WM8994_MIC2_SHRT_DB_WIDTH                    1  /* MIC2_SHRT_DB */
+#define WM8994_MIC2_DET_DB                      0x0008  /* MIC2_DET_DB */
+#define WM8994_MIC2_DET_DB_MASK                 0x0008  /* MIC2_DET_DB */
+#define WM8994_MIC2_DET_DB_SHIFT                     3  /* MIC2_DET_DB */
+#define WM8994_MIC2_DET_DB_WIDTH                     1  /* MIC2_DET_DB */
+#define WM8994_MIC1_SHRT_DB                     0x0004  /* MIC1_SHRT_DB */
+#define WM8994_MIC1_SHRT_DB_MASK                0x0004  /* MIC1_SHRT_DB */
+#define WM8994_MIC1_SHRT_DB_SHIFT                    2  /* MIC1_SHRT_DB */
+#define WM8994_MIC1_SHRT_DB_WIDTH                    1  /* MIC1_SHRT_DB */
+#define WM8994_MIC1_DET_DB                      0x0002  /* MIC1_DET_DB */
+#define WM8994_MIC1_DET_DB_MASK                 0x0002  /* MIC1_DET_DB */
+#define WM8994_MIC1_DET_DB_SHIFT                     1  /* MIC1_DET_DB */
+#define WM8994_MIC1_DET_DB_WIDTH                     1  /* MIC1_DET_DB */
+#define WM8994_TEMP_SHUT_DB                     0x0001  /* TEMP_SHUT_DB */
+#define WM8994_TEMP_SHUT_DB_MASK                0x0001  /* TEMP_SHUT_DB */
+#define WM8994_TEMP_SHUT_DB_SHIFT                    0  /* TEMP_SHUT_DB */
+#define WM8994_TEMP_SHUT_DB_WIDTH                    1  /* TEMP_SHUT_DB */
+
+#endif
-- 
cgit v1.2.3


From 9e50108668a70a9927257298bd4e679300124420 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 29 Jan 2010 18:20:29 +0000
Subject: mfd: Add initial WM8994 support

The WM8994 is a highly integrated ultra low power audio hub CODEC.
Since it includes on-board regulators and GPIOs it is represented
as a multi-function device, though the overwhelming majority of
the functionality is provided by the ASoC CODEC driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig              |  12 +
 drivers/mfd/Makefile             |   1 +
 drivers/mfd/wm8994-core.c        | 537 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8994/core.h  |  54 ++++
 include/linux/mfd/wm8994/gpio.h  |  72 ++++++
 include/linux/mfd/wm8994/pdata.h |  97 +++++++
 6 files changed, 773 insertions(+)
 create mode 100644 drivers/mfd/wm8994-core.c
 create mode 100644 include/linux/mfd/wm8994/core.h
 create mode 100644 include/linux/mfd/wm8994/gpio.h
 create mode 100644 include/linux/mfd/wm8994/pdata.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index a760dbefd27a..64fbe3334eb3 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -295,6 +295,18 @@ config MFD_WM8350_I2C
 	  I2C as the control interface.  Additional options must be
 	  selected to enable support for the functionality of the chip.
 
+config MFD_WM8994
+	tristate "Support Wolfson Microelectronics WM8994"
+	select MFD_CORE
+	depends on I2C
+	help
+	  The WM8994 is a highly integrated hi-fi CODEC designed for
+	  smartphone applicatiosn.  As well as audio functionality it
+	  has on board GPIO and regulator functionality which is
+	  supported via the relevant subsystems.  This driver provides
+	  core support for the WM8994, in order to use the actual
+	  functionaltiy of the device other drivers must be enabled.
+
 config MFD_PCF50633
 	tristate "Support for NXP PCF50633"
 	depends on I2C
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 142d31202b14..878cac691067 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -25,6 +25,7 @@ wm8350-objs			:= wm8350-core.o wm8350-regmap.o wm8350-gpio.o
 wm8350-objs			+= wm8350-irq.o
 obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
 obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
+obj-$(CONFIG_MFD_WM8994)	+= wm8994-core.o
 
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
new file mode 100644
index 000000000000..299c1af1334e
--- /dev/null
+++ b/drivers/mfd/wm8994-core.c
@@ -0,0 +1,537 @@
+/*
+ * wm8994-core.c  --  Device access for Wolfson WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/mfd/core.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/machine.h>
+
+#include <linux/mfd/wm8994/core.h>
+#include <linux/mfd/wm8994/pdata.h>
+#include <linux/mfd/wm8994/registers.h>
+
+static int wm8994_read(struct wm8994 *wm8994, unsigned short reg,
+		       int bytes, void *dest)
+{
+	int ret, i;
+	u16 *buf = dest;
+
+	BUG_ON(bytes % 2);
+	BUG_ON(bytes <= 0);
+
+	ret = wm8994->read_dev(wm8994, reg, bytes, dest);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < bytes / 2; i++) {
+		buf[i] = be16_to_cpu(buf[i]);
+
+		dev_vdbg(wm8994->dev, "Read %04x from R%d(0x%x)\n",
+			 buf[i], reg + i, reg + i);
+	}
+
+	return 0;
+}
+
+/**
+ * wm8994_reg_read: Read a single WM8994 register.
+ *
+ * @wm8994: Device to read from.
+ * @reg: Register to read.
+ */
+int wm8994_reg_read(struct wm8994 *wm8994, unsigned short reg)
+{
+	unsigned short val;
+	int ret;
+
+	mutex_lock(&wm8994->io_lock);
+
+	ret = wm8994_read(wm8994, reg, 2, &val);
+
+	mutex_unlock(&wm8994->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return val;
+}
+EXPORT_SYMBOL_GPL(wm8994_reg_read);
+
+/**
+ * wm8994_bulk_read: Read multiple WM8994 registers
+ *
+ * @wm8994: Device to read from
+ * @reg: First register
+ * @count: Number of registers
+ * @buf: Buffer to fill.
+ */
+int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
+		     int count, u16 *buf)
+{
+	int ret;
+
+	mutex_lock(&wm8994->io_lock);
+
+	ret = wm8994_read(wm8994, reg, count * 2, buf);
+
+	mutex_unlock(&wm8994->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm8994_bulk_read);
+
+static int wm8994_write(struct wm8994 *wm8994, unsigned short reg,
+			int bytes, void *src)
+{
+	u16 *buf = src;
+	int i;
+
+	BUG_ON(bytes % 2);
+	BUG_ON(bytes <= 0);
+
+	for (i = 0; i < bytes / 2; i++) {
+		dev_vdbg(wm8994->dev, "Write %04x to R%d(0x%x)\n",
+			 buf[i], reg + i, reg + i);
+
+		buf[i] = cpu_to_be16(buf[i]);
+	}
+
+	return wm8994->write_dev(wm8994, reg, bytes, src);
+}
+
+/**
+ * wm8994_reg_write: Write a single WM8994 register.
+ *
+ * @wm8994: Device to write to.
+ * @reg: Register to write to.
+ * @val: Value to write.
+ */
+int wm8994_reg_write(struct wm8994 *wm8994, unsigned short reg,
+		     unsigned short val)
+{
+	int ret;
+
+	mutex_lock(&wm8994->io_lock);
+
+	ret = wm8994_write(wm8994, reg, 2, &val);
+
+	mutex_unlock(&wm8994->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm8994_reg_write);
+
+/**
+ * wm8994_set_bits: Set the value of a bitfield in a WM8994 register
+ *
+ * @wm8994: Device to write to.
+ * @reg: Register to write to.
+ * @mask: Mask of bits to set.
+ * @val: Value to set (unshifted)
+ */
+int wm8994_set_bits(struct wm8994 *wm8994, unsigned short reg,
+		    unsigned short mask, unsigned short val)
+{
+	int ret;
+	u16 r;
+
+	mutex_lock(&wm8994->io_lock);
+
+	ret = wm8994_read(wm8994, reg, 2, &r);
+	if (ret < 0)
+		goto out;
+
+	r &= ~mask;
+	r |= val;
+
+	ret = wm8994_write(wm8994, reg, 2, &r);
+
+out:
+	mutex_unlock(&wm8994->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm8994_set_bits);
+
+static struct mfd_cell wm8994_regulator_devs[] = {
+	{ .name = "wm8994-ldo", .id = 1 },
+	{ .name = "wm8994-ldo", .id = 2 },
+};
+
+static struct mfd_cell wm8994_devs[] = {
+	{ .name = "wm8994-codec" },
+	{ .name = "wm8994-gpio" },
+};
+
+/*
+ * Supplies for the main bulk of CODEC; the LDO supplies are ignored
+ * and should be handled via the standard regulator API supply
+ * management.
+ */
+static const char *wm8994_main_supplies[] = {
+	"DBVDD",
+	"DCVDD",
+	"AVDD1",
+	"AVDD2",
+	"CPVDD",
+	"SPKVDD1",
+	"SPKVDD2",
+};
+
+#ifdef CONFIG_PM
+static int wm8994_device_suspend(struct device *dev)
+{
+	struct wm8994 *wm8994 = dev_get_drvdata(dev);
+	int ret;
+
+	/* GPIO configuration state is saved here since we may be configuring
+	 * the GPIO alternate functions even if we're not using the gpiolib
+	 * driver for them.
+	 */
+	ret = wm8994_read(wm8994, WM8994_GPIO_1, WM8994_NUM_GPIO_REGS * 2,
+			  &wm8994->gpio_regs);
+	if (ret < 0)
+		dev_err(dev, "Failed to save GPIO registers: %d\n", ret);
+
+	/* For similar reasons we also stash the regulator states */
+	ret = wm8994_read(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
+			  &wm8994->ldo_regs);
+	if (ret < 0)
+		dev_err(dev, "Failed to save LDO registers: %d\n", ret);
+
+	ret = regulator_bulk_disable(ARRAY_SIZE(wm8994_main_supplies),
+				     wm8994->supplies);
+	if (ret != 0) {
+		dev_err(dev, "Failed to disable supplies: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int wm8994_device_resume(struct device *dev)
+{
+	struct wm8994 *wm8994 = dev_get_drvdata(dev);
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(wm8994_main_supplies),
+				    wm8994->supplies);
+	if (ret != 0) {
+		dev_err(dev, "Failed to enable supplies: %d\n", ret);
+		return ret;
+	}
+
+	ret = wm8994_write(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
+			   &wm8994->ldo_regs);
+	if (ret < 0)
+		dev_err(dev, "Failed to restore LDO registers: %d\n", ret);
+
+	ret = wm8994_write(wm8994, WM8994_GPIO_1, WM8994_NUM_GPIO_REGS * 2,
+			   &wm8994->gpio_regs);
+	if (ret < 0)
+		dev_err(dev, "Failed to restore GPIO registers: %d\n", ret);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_REGULATOR
+static int wm8994_ldo_in_use(struct wm8994_pdata *pdata, int ldo)
+{
+	struct wm8994_ldo_pdata *ldo_pdata;
+
+	if (!pdata)
+		return 0;
+
+	ldo_pdata = &pdata->ldo[ldo];
+
+	if (!ldo_pdata->init_data)
+		return 0;
+
+	return ldo_pdata->init_data->num_consumer_supplies != 0;
+}
+#else
+static int wm8994_ldo_in_use(struct wm8994_pdata *pdata, int ldo)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Instantiate the generic non-control parts of the device.
+ */
+static int wm8994_device_init(struct wm8994 *wm8994, unsigned long id, int irq)
+{
+	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
+	int ret, i;
+
+	mutex_init(&wm8994->io_lock);
+	dev_set_drvdata(wm8994->dev, wm8994);
+
+	/* Add the on-chip regulators first for bootstrapping */
+	ret = mfd_add_devices(wm8994->dev, -1,
+			      wm8994_regulator_devs,
+			      ARRAY_SIZE(wm8994_regulator_devs),
+			      NULL, 0);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to add children: %d\n", ret);
+		goto err;
+	}
+
+	wm8994->supplies = kzalloc(sizeof(struct regulator_bulk_data) *
+				   ARRAY_SIZE(wm8994_main_supplies),
+				   GFP_KERNEL);
+	if (!wm8994->supplies)
+		goto err;
+
+	for (i = 0; i < ARRAY_SIZE(wm8994_main_supplies); i++)
+		wm8994->supplies[i].supply = wm8994_main_supplies[i];
+
+	ret = regulator_bulk_get(wm8994->dev, ARRAY_SIZE(wm8994_main_supplies),
+				 wm8994->supplies);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to get supplies: %d\n", ret);
+		goto err_get;
+	}
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(wm8994_main_supplies),
+				    wm8994->supplies);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to enable supplies: %d\n", ret);
+		goto err_supplies;
+	}
+
+	ret = wm8994_reg_read(wm8994, WM8994_SOFTWARE_RESET);
+	if (ret < 0) {
+		dev_err(wm8994->dev, "Failed to read ID register\n");
+		goto err_enable;
+	}
+	if (ret != 0x8994) {
+		dev_err(wm8994->dev, "Device is not a WM8994, ID is %x\n",
+			ret);
+		ret = -EINVAL;
+		goto err_enable;
+	}
+
+	ret = wm8994_reg_read(wm8994, WM8994_CHIP_REVISION);
+	if (ret < 0) {
+		dev_err(wm8994->dev, "Failed to read revision register: %d\n",
+			ret);
+		goto err_enable;
+	}
+
+	switch (ret) {
+	case 0:
+	case 1:
+		dev_warn(wm8994->dev, "revision %c not fully supported\n",
+			'A' + ret);
+		break;
+	default:
+		dev_info(wm8994->dev, "revision %c\n", 'A' + ret);
+		break;
+	}
+
+
+	if (pdata) {
+		wm8994->gpio_base = pdata->gpio_base;
+
+		/* GPIO configuration is only applied if it's non-zero */
+		for (i = 0; i < ARRAY_SIZE(pdata->gpio_defaults); i++) {
+			if (pdata->gpio_defaults[i]) {
+				wm8994_set_bits(wm8994, WM8994_GPIO_1 + i,
+						0xffff,
+						pdata->gpio_defaults[i]);
+			}
+		}
+	}
+
+	/* In some system designs where the regulators are not in use,
+	 * we can achieve a small reduction in leakage currents by
+	 * floating LDO outputs.  This bit makes no difference if the
+	 * LDOs are enabled, it only affects cases where the LDOs were
+	 * in operation and are then disabled.
+	 */
+	for (i = 0; i < WM8994_NUM_LDO_REGS; i++) {
+		if (wm8994_ldo_in_use(pdata, i))
+			wm8994_set_bits(wm8994, WM8994_LDO_1 + i,
+					WM8994_LDO1_DISCH, WM8994_LDO1_DISCH);
+		else
+			wm8994_set_bits(wm8994, WM8994_LDO_1 + i,
+					WM8994_LDO1_DISCH, 0);
+	}
+
+	ret = mfd_add_devices(wm8994->dev, -1,
+			      wm8994_devs, ARRAY_SIZE(wm8994_devs),
+			      NULL, 0);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to add children: %d\n", ret);
+		goto err_enable;
+	}
+
+	return 0;
+
+err_enable:
+	regulator_bulk_disable(ARRAY_SIZE(wm8994_main_supplies),
+			       wm8994->supplies);
+err_get:
+	regulator_bulk_free(ARRAY_SIZE(wm8994_main_supplies), wm8994->supplies);
+err_supplies:
+	kfree(wm8994->supplies);
+err:
+	mfd_remove_devices(wm8994->dev);
+	kfree(wm8994);
+	return ret;
+}
+
+static void wm8994_device_exit(struct wm8994 *wm8994)
+{
+	mfd_remove_devices(wm8994->dev);
+	regulator_bulk_disable(ARRAY_SIZE(wm8994_main_supplies),
+			       wm8994->supplies);
+	regulator_bulk_free(ARRAY_SIZE(wm8994_main_supplies), wm8994->supplies);
+	kfree(wm8994->supplies);
+	kfree(wm8994);
+}
+
+static int wm8994_i2c_read_device(struct wm8994 *wm8994, unsigned short reg,
+				  int bytes, void *dest)
+{
+	struct i2c_client *i2c = wm8994->control_data;
+	int ret;
+	u16 r = cpu_to_be16(reg);
+
+	ret = i2c_master_send(i2c, (unsigned char *)&r, 2);
+	if (ret < 0)
+		return ret;
+	if (ret != 2)
+		return -EIO;
+
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	if (ret != bytes)
+		return -EIO;
+	return 0;
+}
+
+/* Currently we allocate the write buffer on the stack; this is OK for
+ * small writes - if we need to do large writes this will need to be
+ * revised.
+ */
+static int wm8994_i2c_write_device(struct wm8994 *wm8994, unsigned short reg,
+				   int bytes, void *src)
+{
+	struct i2c_client *i2c = wm8994->control_data;
+	unsigned char msg[bytes + 2];
+	int ret;
+
+	reg = cpu_to_be16(reg);
+	memcpy(&msg[0], &reg, 2);
+	memcpy(&msg[2], src, bytes);
+
+	ret = i2c_master_send(i2c, msg, bytes + 2);
+	if (ret < 0)
+		return ret;
+	if (ret < bytes + 2)
+		return -EIO;
+
+	return 0;
+}
+
+static int wm8994_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm8994 *wm8994;
+
+	wm8994 = kzalloc(sizeof(struct wm8994), GFP_KERNEL);
+	if (wm8994 == NULL) {
+		kfree(i2c);
+		return -ENOMEM;
+	}
+
+	i2c_set_clientdata(i2c, wm8994);
+	wm8994->dev = &i2c->dev;
+	wm8994->control_data = i2c;
+	wm8994->read_dev = wm8994_i2c_read_device;
+	wm8994->write_dev = wm8994_i2c_write_device;
+
+	return wm8994_device_init(wm8994, id->driver_data, i2c->irq);
+}
+
+static int wm8994_i2c_remove(struct i2c_client *i2c)
+{
+	struct wm8994 *wm8994 = i2c_get_clientdata(i2c);
+
+	wm8994_device_exit(wm8994);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int wm8994_i2c_suspend(struct i2c_client *i2c, pm_message_t state)
+{
+	return wm8994_device_suspend(&i2c->dev);
+}
+
+static int wm8994_i2c_resume(struct i2c_client *i2c)
+{
+	return wm8994_device_resume(&i2c->dev);
+}
+#else
+#define wm8994_i2c_suspend NULL
+#define wm8994_i2c_resume NULL
+#endif
+
+static const struct i2c_device_id wm8994_i2c_id[] = {
+	{ "wm8994", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm8994_i2c_id);
+
+static struct i2c_driver wm8994_i2c_driver = {
+	.driver = {
+		   .name = "wm8994",
+		   .owner = THIS_MODULE,
+	},
+	.probe = wm8994_i2c_probe,
+	.remove = wm8994_i2c_remove,
+	.suspend = wm8994_i2c_suspend,
+	.resume = wm8994_i2c_resume,
+	.id_table = wm8994_i2c_id,
+};
+
+static int __init wm8994_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm8994_i2c_driver);
+	if (ret != 0)
+		pr_err("Failed to register wm8994 I2C driver: %d\n", ret);
+
+	return ret;
+}
+module_init(wm8994_i2c_init);
+
+static void __exit wm8994_i2c_exit(void)
+{
+	i2c_del_driver(&wm8994_i2c_driver);
+}
+module_exit(wm8994_i2c_exit);
+
+MODULE_DESCRIPTION("Core support for the WM8994 audio CODEC");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
new file mode 100644
index 000000000000..b06ff2846748
--- /dev/null
+++ b/include/linux/mfd/wm8994/core.h
@@ -0,0 +1,54 @@
+/*
+ * include/linux/mfd/wm8994/core.h -- Core interface for WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM8994_CORE_H__
+#define __MFD_WM8994_CORE_H__
+
+struct regulator_dev;
+struct regulator_bulk_data;
+
+#define WM8994_NUM_GPIO_REGS 11
+#define WM8994_NUM_LDO_REGS 2
+
+struct wm8994 {
+	struct mutex io_lock;
+
+	struct device *dev;
+	int (*read_dev)(struct wm8994 *wm8994, unsigned short reg,
+			int bytes, void *dest);
+	int (*write_dev)(struct wm8994 *wm8994, unsigned short reg,
+			 int bytes, void *src);
+
+	void *control_data;
+
+	int gpio_base;
+
+	/* Used over suspend/resume */
+	u16 ldo_regs[WM8994_NUM_LDO_REGS];
+	u16 gpio_regs[WM8994_NUM_GPIO_REGS];
+
+	struct regulator_dev *dbvdd;
+	struct regulator_bulk_data *supplies;
+};
+
+/* Device I/O API */
+int wm8994_reg_read(struct wm8994 *wm8994, unsigned short reg);
+int wm8994_reg_write(struct wm8994 *wm8994, unsigned short reg,
+		 unsigned short val);
+int wm8994_set_bits(struct wm8994 *wm8994, unsigned short reg,
+		    unsigned short mask, unsigned short val);
+int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
+		     int count, u16 *buf);
+
+#endif
diff --git a/include/linux/mfd/wm8994/gpio.h b/include/linux/mfd/wm8994/gpio.h
new file mode 100644
index 000000000000..b4d4c22991e8
--- /dev/null
+++ b/include/linux/mfd/wm8994/gpio.h
@@ -0,0 +1,72 @@
+/*
+ * include/linux/mfd/wm8994/gpio.h - GPIO configuration for WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM8994_GPIO_H__
+#define __MFD_WM8994_GPIO_H__
+
+#define WM8994_GPIO_MAX 11
+
+#define WM8994_GP_FN_PIN_SPECIFIC    0
+#define WM8994_GP_FN_GPIO            1
+#define WM8994_GP_FN_SDOUT           2
+#define WM8994_GP_FN_IRQ             3
+#define WM8994_GP_FN_TEMPERATURE     4
+#define WM8994_GP_FN_MICBIAS1_DET    5
+#define WM8994_GP_FN_MICBIAS1_SHORT  6
+#define WM8994_GP_FN_MICBIAS2_DET    7
+#define WM8994_GP_FN_MICBIAS2_SHORT  8
+#define WM8994_GP_FN_FLL1_LOCK       9
+#define WM8994_GP_FN_FLL2_LOCK      10
+#define WM8994_GP_FN_SRC1_LOCK      11
+#define WM8994_GP_FN_SRC2_LOCK      12
+#define WM8994_GP_FN_DRC1_ACT       13
+#define WM8994_GP_FN_DRC2_ACT       14
+#define WM8994_GP_FN_DRC3_ACT       15
+#define WM8994_GP_FN_WSEQ_STATUS    16
+#define WM8994_GP_FN_FIFO_ERROR     17
+#define WM8994_GP_FN_OPCLK          18
+
+#define WM8994_GPN_DIR                          0x8000  /* GPN_DIR */
+#define WM8994_GPN_DIR_MASK                     0x8000  /* GPN_DIR */
+#define WM8994_GPN_DIR_SHIFT                        15  /* GPN_DIR */
+#define WM8994_GPN_DIR_WIDTH                         1  /* GPN_DIR */
+#define WM8994_GPN_PU                           0x4000  /* GPN_PU */
+#define WM8994_GPN_PU_MASK                      0x4000  /* GPN_PU */
+#define WM8994_GPN_PU_SHIFT                         14  /* GPN_PU */
+#define WM8994_GPN_PU_WIDTH                          1  /* GPN_PU */
+#define WM8994_GPN_PD                           0x2000  /* GPN_PD */
+#define WM8994_GPN_PD_MASK                      0x2000  /* GPN_PD */
+#define WM8994_GPN_PD_SHIFT                         13  /* GPN_PD */
+#define WM8994_GPN_PD_WIDTH                          1  /* GPN_PD */
+#define WM8994_GPN_POL                          0x0400  /* GPN_POL */
+#define WM8994_GPN_POL_MASK                     0x0400  /* GPN_POL */
+#define WM8994_GPN_POL_SHIFT                        10  /* GPN_POL */
+#define WM8994_GPN_POL_WIDTH                         1  /* GPN_POL */
+#define WM8994_GPN_OP_CFG                       0x0200  /* GPN_OP_CFG */
+#define WM8994_GPN_OP_CFG_MASK                  0x0200  /* GPN_OP_CFG */
+#define WM8994_GPN_OP_CFG_SHIFT                      9  /* GPN_OP_CFG */
+#define WM8994_GPN_OP_CFG_WIDTH                      1  /* GPN_OP_CFG */
+#define WM8994_GPN_DB                           0x0100  /* GPN_DB */
+#define WM8994_GPN_DB_MASK                      0x0100  /* GPN_DB */
+#define WM8994_GPN_DB_SHIFT                          8  /* GPN_DB */
+#define WM8994_GPN_DB_WIDTH                          1  /* GPN_DB */
+#define WM8994_GPN_LVL                          0x0040  /* GPN_LVL */
+#define WM8994_GPN_LVL_MASK                     0x0040  /* GPN_LVL */
+#define WM8994_GPN_LVL_SHIFT                         6  /* GPN_LVL */
+#define WM8994_GPN_LVL_WIDTH                         1  /* GPN_LVL */
+#define WM8994_GPN_FN_MASK                      0x001F  /* GPN_FN - [4:0] */
+#define WM8994_GPN_FN_SHIFT                          0  /* GPN_FN - [4:0] */
+#define WM8994_GPN_FN_WIDTH                          5  /* GPN_FN - [4:0] */
+
+#endif
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
new file mode 100644
index 000000000000..70d6a8687dc5
--- /dev/null
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -0,0 +1,97 @@
+/*
+ * include/linux/mfd/wm8994/pdata.h -- Platform data for WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM8994_PDATA_H__
+#define __MFD_WM8994_PDATA_H__
+
+#define WM8994_NUM_LDO   2
+#define WM8994_NUM_GPIO 11
+
+struct wm8994_ldo_pdata {
+	/** GPIOs to enable regulator, 0 or less if not available */
+	int enable;
+
+	const char *supply;
+	struct regulator_init_data *init_data;
+};
+
+#define WM8994_CONFIGURE_GPIO 0x8000
+
+#define WM8994_DRC_REGS 5
+#define WM8994_EQ_REGS  19
+
+/**
+ * DRC configurations are specified with a label and a set of register
+ * values to write (the enable bits will be ignored).  At runtime an
+ * enumerated control will be presented for each DRC block allowing
+ * the user to choose the configration to use.
+ *
+ * Configurations may be generated by hand or by using the DRC control
+ * panel provided by the WISCE - see  http://www.wolfsonmicro.com/wisce/
+ * for details.
+ */
+struct wm8994_drc_cfg {
+        const char *name;
+        u16 regs[WM8994_DRC_REGS];
+};
+
+/**
+ * ReTune Mobile configurations are specified with a label, sample
+ * rate and set of values to write (the enable bits will be ignored).
+ *
+ * Configurations are expected to be generated using the ReTune Mobile
+ * control panel in WISCE - see http://www.wolfsonmicro.com/wisce/
+ */
+struct wm8994_retune_mobile_cfg {
+        const char *name;
+        unsigned int rate;
+        u16 regs[WM8994_EQ_REGS];
+};
+
+struct wm8994_pdata {
+	int gpio_base;
+
+	/**
+	 * Default values for GPIOs if non-zero, WM8994_CONFIGURE_GPIO
+	 * can be used for all zero values.
+	 */
+	int gpio_defaults[WM8994_NUM_GPIO];
+
+	struct wm8994_ldo_pdata ldo[WM8994_NUM_LDO];
+
+
+        int num_drc_cfgs;
+        struct wm8994_drc_cfg *drc_cfgs;
+
+        int num_retune_mobile_cfgs;
+        struct wm8994_retune_mobile_cfg *retune_mobile_cfgs;
+
+        /* LINEOUT can be differential or single ended */
+        unsigned int lineout1_diff:1;
+        unsigned int lineout2_diff:1;
+
+        /* Common mode feedback */
+        unsigned int lineout1fb:1;
+        unsigned int lineout2fb:1;
+
+        /* Microphone biases: 0=0.9*AVDD1 1=0.65*AVVD1 */
+        unsigned int micbias1_lvl:1;
+        unsigned int micbias2_lvl:1;
+
+        /* Jack detect threashold levels, see datasheet for values */
+        unsigned int jd_scthr:2;
+        unsigned int jd_thr:2;
+};
+
+#endif
-- 
cgit v1.2.3


From 1f1cf8f98cf6588365efeaab8e7e7758aaa77f6e Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 5 Feb 2010 16:07:54 +0100
Subject: mfd: Update irq handler in max8925

Update thread irq handler. Simply the interface of using thread irq.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   4 +-
 drivers/mfd/max8925-core.c  | 678 ++++++++++++++++++++++++++++++--------------
 include/linux/mfd/max8925.h | 139 ++++++---
 3 files changed, 564 insertions(+), 257 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 64fbe3334eb3..84a68d260772 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -204,8 +204,8 @@ config PMIC_ADP5520
 	  under the corresponding menus.
 
 config MFD_MAX8925
-	tristate "Maxim Semiconductor MAX8925 PMIC Support"
-	depends on I2C
+	bool "Maxim Semiconductor MAX8925 PMIC Support"
+	depends on I2C=y
 	select MFD_CORE
 	help
 	  Say yes here to support for Maxim Semiconductor MAX8925. This is
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index f36c494b80f1..85d63c04749b 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -1,7 +1,7 @@
 /*
  * Base driver for Maxim MAX8925
  *
- * Copyright (C) 2009 Marvell International Ltd.
+ * Copyright (C) 2009-2010 Marvell International Ltd.
  *	Haojian Zhuang <haojian.zhuang@marvell.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -12,14 +12,12 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/max8925.h>
 
-#define IRQ_MODE_STATUS		0
-#define IRQ_MODE_MASK		1
-
 static struct resource backlight_resources[] = {
 	{
 		.name	= "max8925-backlight",
@@ -56,6 +54,42 @@ static struct mfd_cell touch_devs[] = {
 	},
 };
 
+static struct resource power_supply_resources[] = {
+	{
+		.name	= "max8925-power",
+		.start	= MAX8925_CHG_IRQ1,
+		.end	= MAX8925_CHG_IRQ1_MASK,
+		.flags	= IORESOURCE_IO,
+	},
+};
+
+static struct mfd_cell power_devs[] = {
+	{
+		.name		= "max8925-power",
+		.num_resources	= 1,
+		.resources	= &power_supply_resources[0],
+		.id		= -1,
+	},
+};
+
+static struct resource rtc_resources[] = {
+	{
+		.name	= "max8925-rtc",
+		.start	= MAX8925_RTC_IRQ,
+		.end	= MAX8925_RTC_IRQ_MASK,
+		.flags	= IORESOURCE_IO,
+	},
+};
+
+static struct mfd_cell rtc_devs[] = {
+	{
+		.name		= "max8925-rtc",
+		.num_resources	= 1,
+		.resources	= &rtc_resources[0],
+		.id		= -1,
+	},
+};
+
 #define MAX8925_REG_RESOURCE(_start, _end)	\
 {						\
 	.start	= MAX8925_##_start,		\
@@ -123,203 +157,450 @@ static struct mfd_cell regulator_devs[] = {
 	MAX8925_REG_DEVS(LDO20),
 };
 
-static int __get_irq_offset(struct max8925_chip *chip, int irq, int mode,
-			    int *offset, int *bit)
+enum {
+	FLAGS_ADC = 1,	/* register in ADC component */
+	FLAGS_RTC,	/* register in RTC component */
+};
+
+struct max8925_irq_data {
+	int	reg;
+	int	mask_reg;
+	int	enable;		/* enable or not */
+	int	offs;		/* bit offset in mask register */
+	int	flags;
+	int	tsc_irq;
+};
+
+static struct max8925_irq_data max8925_irqs[] = {
+	[MAX8925_IRQ_VCHG_DC_OVP] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 0,
+	},
+	[MAX8925_IRQ_VCHG_DC_F] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 1,
+	},
+	[MAX8925_IRQ_VCHG_DC_R] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 2,
+	},
+	[MAX8925_IRQ_VCHG_USB_OVP] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 3,
+	},
+	[MAX8925_IRQ_VCHG_USB_F] =  {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 4,
+	},
+	[MAX8925_IRQ_VCHG_USB_R] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 5,
+	},
+	[MAX8925_IRQ_VCHG_THM_OK_R] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 0,
+	},
+	[MAX8925_IRQ_VCHG_THM_OK_F] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 1,
+	},
+	[MAX8925_IRQ_VCHG_SYSLOW_F] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 2,
+	},
+	[MAX8925_IRQ_VCHG_SYSLOW_R] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 3,
+	},
+	[MAX8925_IRQ_VCHG_RST] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 4,
+	},
+	[MAX8925_IRQ_VCHG_DONE] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 5,
+	},
+	[MAX8925_IRQ_VCHG_TOPOFF] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 6,
+	},
+	[MAX8925_IRQ_VCHG_TMR_FAULT] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 7,
+	},
+	[MAX8925_IRQ_GPM_RSTIN] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 0,
+	},
+	[MAX8925_IRQ_GPM_MPL] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 1,
+	},
+	[MAX8925_IRQ_GPM_SW_3SEC] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 2,
+	},
+	[MAX8925_IRQ_GPM_EXTON_F] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 3,
+	},
+	[MAX8925_IRQ_GPM_EXTON_R] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 4,
+	},
+	[MAX8925_IRQ_GPM_SW_1SEC] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 5,
+	},
+	[MAX8925_IRQ_GPM_SW_F] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 6,
+	},
+	[MAX8925_IRQ_GPM_SW_R] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 7,
+	},
+	[MAX8925_IRQ_GPM_SYSCKEN_F] = {
+		.reg		= MAX8925_ON_OFF_IRQ2,
+		.mask_reg	= MAX8925_ON_OFF_IRQ2_MASK,
+		.offs		= 1 << 0,
+	},
+	[MAX8925_IRQ_GPM_SYSCKEN_R] = {
+		.reg		= MAX8925_ON_OFF_IRQ2,
+		.mask_reg	= MAX8925_ON_OFF_IRQ2_MASK,
+		.offs		= 1 << 1,
+	},
+	[MAX8925_IRQ_RTC_ALARM1] = {
+		.reg		= MAX8925_RTC_IRQ,
+		.mask_reg	= MAX8925_RTC_IRQ_MASK,
+		.offs		= 1 << 2,
+		.flags		= FLAGS_RTC,
+	},
+	[MAX8925_IRQ_RTC_ALARM0] = {
+		.reg		= MAX8925_RTC_IRQ,
+		.mask_reg	= MAX8925_RTC_IRQ_MASK,
+		.offs		= 1 << 3,
+		.flags		= FLAGS_RTC,
+	},
+	[MAX8925_IRQ_TSC_STICK] = {
+		.reg		= MAX8925_TSC_IRQ,
+		.mask_reg	= MAX8925_TSC_IRQ_MASK,
+		.offs		= 1 << 0,
+		.flags		= FLAGS_ADC,
+		.tsc_irq	= 1,
+	},
+	[MAX8925_IRQ_TSC_NSTICK] = {
+		.reg		= MAX8925_TSC_IRQ,
+		.mask_reg	= MAX8925_TSC_IRQ_MASK,
+		.offs		= 1 << 1,
+		.flags		= FLAGS_ADC,
+		.tsc_irq	= 1,
+	},
+};
+
+static inline struct max8925_irq_data *irq_to_max8925(struct max8925_chip *chip,
+						      int irq)
 {
-	if (!offset || !bit)
-		return -EINVAL;
+	return &max8925_irqs[irq - chip->irq_base];
+}
 
-	switch (chip->chip_id) {
-	case MAX8925_GPM:
-		*bit = irq % BITS_PER_BYTE;
-		if (irq < (BITS_PER_BYTE << 1)) {	/* irq = [0,15] */
-			*offset = (mode) ? MAX8925_CHG_IRQ1_MASK
-				: MAX8925_CHG_IRQ1;
-			if (irq >= BITS_PER_BYTE)
-				(*offset)++;
-		} else {				/* irq = [16,31] */
-			*offset = (mode) ? MAX8925_ON_OFF_IRQ1_MASK
-				: MAX8925_ON_OFF_IRQ1;
-			if (irq >= (BITS_PER_BYTE * 3))
-				(*offset)++;
+static irqreturn_t max8925_irq(int irq, void *data)
+{
+	struct max8925_chip *chip = data;
+	struct max8925_irq_data *irq_data;
+	struct i2c_client *i2c;
+	int read_reg = -1, value = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
+		irq_data = &max8925_irqs[i];
+		/* TSC IRQ should be serviced in max8925_tsc_irq() */
+		if (irq_data->tsc_irq)
+			continue;
+		if (irq_data->flags == FLAGS_RTC)
+			i2c = chip->rtc;
+		else if (irq_data->flags == FLAGS_ADC)
+			i2c = chip->adc;
+		else
+			i2c = chip->i2c;
+		if (read_reg != irq_data->reg) {
+			read_reg = irq_data->reg;
+			value = max8925_reg_read(i2c, irq_data->reg);
 		}
-		break;
-	case MAX8925_ADC:
-		*bit = irq % BITS_PER_BYTE;
-		*offset = (mode) ? MAX8925_TSC_IRQ_MASK : MAX8925_TSC_IRQ;
-		break;
-	default:
-		goto out;
+		if (value & irq_data->enable)
+			handle_nested_irq(chip->irq_base + i);
 	}
-	return 0;
-out:
-	dev_err(chip->dev, "Wrong irq #%d is assigned\n", irq);
-	return -EINVAL;
+	return IRQ_HANDLED;
 }
 
-static int __check_irq(int irq)
+static irqreturn_t max8925_tsc_irq(int irq, void *data)
 {
-	if ((irq < 0) || (irq >= MAX8925_NUM_IRQ))
-		return -EINVAL;
-	return 0;
+	struct max8925_chip *chip = data;
+	struct max8925_irq_data *irq_data;
+	struct i2c_client *i2c;
+	int read_reg = -1, value = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
+		irq_data = &max8925_irqs[i];
+		/* non TSC IRQ should be serviced in max8925_irq() */
+		if (!irq_data->tsc_irq)
+			continue;
+		if (irq_data->flags == FLAGS_RTC)
+			i2c = chip->rtc;
+		else if (irq_data->flags == FLAGS_ADC)
+			i2c = chip->adc;
+		else
+			i2c = chip->i2c;
+		if (read_reg != irq_data->reg) {
+			read_reg = irq_data->reg;
+			value = max8925_reg_read(i2c, irq_data->reg);
+		}
+		if (value & irq_data->enable)
+			handle_nested_irq(chip->irq_base + i);
+	}
+	return IRQ_HANDLED;
 }
 
-int max8925_mask_irq(struct max8925_chip *chip, int irq)
+static void max8925_irq_lock(unsigned int irq)
 {
-	int offset, bit, ret;
+	struct max8925_chip *chip = get_irq_chip_data(irq);
 
-	ret = __get_irq_offset(chip, irq, IRQ_MODE_MASK, &offset, &bit);
-	if (ret < 0)
-		return ret;
-	ret = max8925_set_bits(chip->i2c, offset, 1 << bit, 1 << bit);
-	return ret;
+	mutex_lock(&chip->irq_lock);
 }
 
-int max8925_unmask_irq(struct max8925_chip *chip, int irq)
+static void max8925_irq_sync_unlock(unsigned int irq)
 {
-	int offset, bit, ret;
+	struct max8925_chip *chip = get_irq_chip_data(irq);
+	struct max8925_irq_data *irq_data;
+	static unsigned char cache_chg[2] = {0xff, 0xff};
+	static unsigned char cache_on[2] = {0xff, 0xff};
+	static unsigned char cache_rtc = 0xff, cache_tsc = 0xff;
+	unsigned char irq_chg[2], irq_on[2];
+	unsigned char irq_rtc, irq_tsc;
+	int i;
+
+	/* Load cached value. In initial, all IRQs are masked */
+	irq_chg[0] = cache_chg[0];
+	irq_chg[1] = cache_chg[1];
+	irq_on[0] = cache_on[0];
+	irq_on[1] = cache_on[1];
+	irq_rtc = cache_rtc;
+	irq_tsc = cache_tsc;
+	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
+		irq_data = &max8925_irqs[i];
+		switch (irq_data->mask_reg) {
+		case MAX8925_CHG_IRQ1_MASK:
+			irq_chg[0] &= irq_data->enable;
+			break;
+		case MAX8925_CHG_IRQ2_MASK:
+			irq_chg[1] &= irq_data->enable;
+			break;
+		case MAX8925_ON_OFF_IRQ1_MASK:
+			irq_on[0] &= irq_data->enable;
+			break;
+		case MAX8925_ON_OFF_IRQ2_MASK:
+			irq_on[1] &= irq_data->enable;
+			break;
+		case MAX8925_RTC_IRQ_MASK:
+			irq_rtc &= irq_data->enable;
+			break;
+		case MAX8925_TSC_IRQ_MASK:
+			irq_tsc &= irq_data->enable;
+			break;
+		default:
+			dev_err(chip->dev, "wrong IRQ\n");
+			break;
+		}
+	}
+	/* update mask into registers */
+	if (cache_chg[0] != irq_chg[0]) {
+		cache_chg[0] = irq_chg[0];
+		max8925_reg_write(chip->i2c, MAX8925_CHG_IRQ1_MASK,
+			irq_chg[0]);
+	}
+	if (cache_chg[1] != irq_chg[1]) {
+		cache_chg[1] = irq_chg[1];
+		max8925_reg_write(chip->i2c, MAX8925_CHG_IRQ2_MASK,
+			irq_chg[1]);
+	}
+	if (cache_on[0] != irq_on[0]) {
+		cache_on[0] = irq_on[0];
+		max8925_reg_write(chip->i2c, MAX8925_ON_OFF_IRQ1_MASK,
+				irq_on[0]);
+	}
+	if (cache_on[1] != irq_on[1]) {
+		cache_on[1] = irq_on[1];
+		max8925_reg_write(chip->i2c, MAX8925_ON_OFF_IRQ2_MASK,
+				irq_on[1]);
+	}
+	if (cache_rtc != irq_rtc) {
+		cache_rtc = irq_rtc;
+		max8925_reg_write(chip->rtc, MAX8925_RTC_IRQ_MASK, irq_rtc);
+	}
+	if (cache_tsc != irq_tsc) {
+		cache_tsc = irq_tsc;
+		max8925_reg_write(chip->adc, MAX8925_TSC_IRQ_MASK, irq_tsc);
+	}
 
-	ret = __get_irq_offset(chip, irq, IRQ_MODE_MASK, &offset, &bit);
-	if (ret < 0)
-		return ret;
-	ret = max8925_set_bits(chip->i2c, offset, 1 << bit, 0);
-	return ret;
+	mutex_unlock(&chip->irq_lock);
 }
 
-#define INT_STATUS_NUM		(MAX8925_NUM_IRQ / BITS_PER_BYTE)
-
-static irqreturn_t max8925_irq_thread(int irq, void *data)
+static void max8925_irq_enable(unsigned int irq)
 {
-	struct max8925_chip *chip = data;
-	unsigned long irq_status[INT_STATUS_NUM];
-	unsigned char status_buf[INT_STATUS_NUM << 1];
-	int i, ret;
-
-	memset(irq_status, 0, sizeof(unsigned long) * INT_STATUS_NUM);
-
-	/* all these interrupt status registers are read-only */
-	switch (chip->chip_id) {
-	case MAX8925_GPM:
-		ret = max8925_bulk_read(chip->i2c, MAX8925_CHG_IRQ1,
-					4, status_buf);
-		if (ret < 0)
-			goto out;
-		ret = max8925_bulk_read(chip->i2c, MAX8925_ON_OFF_IRQ1,
-					2, &status_buf[4]);
-		if (ret < 0)
-			goto out;
-		ret = max8925_bulk_read(chip->i2c, MAX8925_ON_OFF_IRQ2,
-					2, &status_buf[6]);
-		if (ret < 0)
-			goto out;
-		/* clear masked interrupt status */
-		status_buf[0] &= (~status_buf[2] & CHG_IRQ1_MASK);
-		irq_status[0] |= status_buf[0];
-		status_buf[1] &= (~status_buf[3] & CHG_IRQ2_MASK);
-		irq_status[0] |= (status_buf[1] << BITS_PER_BYTE);
-		status_buf[4] &= (~status_buf[5] & ON_OFF_IRQ1_MASK);
-		irq_status[0] |= (status_buf[4] << (BITS_PER_BYTE * 2));
-		status_buf[6] &= (~status_buf[7] & ON_OFF_IRQ2_MASK);
-		irq_status[0] |= (status_buf[6] << (BITS_PER_BYTE * 3));
-		break;
-	case MAX8925_ADC:
-		ret = max8925_bulk_read(chip->i2c, MAX8925_TSC_IRQ,
-					2, status_buf);
-		if (ret < 0)
-			goto out;
-		/* clear masked interrupt status */
-		status_buf[0] &= (~status_buf[1] & TSC_IRQ_MASK);
-		irq_status[0] |= status_buf[0];
-		break;
-	default:
-		goto out;
-	}
-
-	for_each_bit(i, &irq_status[0], MAX8925_NUM_IRQ) {
-		clear_bit(i, irq_status);
-		dev_dbg(chip->dev, "Servicing IRQ #%d in %s\n", i, chip->name);
-
-		mutex_lock(&chip->irq_lock);
-		if (chip->irq[i].handler)
-			chip->irq[i].handler(i, chip->irq[i].data);
-		else {
-			max8925_mask_irq(chip, i);
-			dev_err(chip->dev, "Noboday cares IRQ #%d in %s. "
-				"Now mask it.\n", i, chip->name);
-		}
-		mutex_unlock(&chip->irq_lock);
-	}
-out:
-	return IRQ_HANDLED;
+	struct max8925_chip *chip = get_irq_chip_data(irq);
+	max8925_irqs[irq - chip->irq_base].enable
+		= max8925_irqs[irq - chip->irq_base].offs;
 }
 
-int max8925_request_irq(struct max8925_chip *chip, int irq,
-			irq_handler_t handler, void *data)
+static void max8925_irq_disable(unsigned int irq)
 {
-	if ((__check_irq(irq) < 0) || !handler)
-		return -EINVAL;
-
-	mutex_lock(&chip->irq_lock);
-	chip->irq[irq].handler = handler;
-	chip->irq[irq].data = data;
-	mutex_unlock(&chip->irq_lock);
-	return 0;
+	struct max8925_chip *chip = get_irq_chip_data(irq);
+	max8925_irqs[irq - chip->irq_base].enable = 0;
 }
-EXPORT_SYMBOL(max8925_request_irq);
 
-int max8925_free_irq(struct max8925_chip *chip, int irq)
+static struct irq_chip max8925_irq_chip = {
+	.name		= "max8925",
+	.bus_lock	= max8925_irq_lock,
+	.bus_sync_unlock = max8925_irq_sync_unlock,
+	.enable		= max8925_irq_enable,
+	.disable	= max8925_irq_disable,
+};
+
+static int max8925_irq_init(struct max8925_chip *chip, int irq,
+			    struct max8925_platform_data *pdata)
 {
-	if (__check_irq(irq) < 0)
+	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
+	struct irq_desc *desc;
+	int i, ret;
+	int __irq;
+
+	if (!pdata || !pdata->irq_base) {
+		dev_warn(chip->dev, "No interrupt support on IRQ base\n");
 		return -EINVAL;
+	}
+	/* clear all interrupts */
+	max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1);
+	max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ2);
+	max8925_reg_read(chip->i2c, MAX8925_ON_OFF_IRQ1);
+	max8925_reg_read(chip->i2c, MAX8925_ON_OFF_IRQ2);
+	max8925_reg_read(chip->rtc, MAX8925_RTC_IRQ);
+	max8925_reg_read(chip->adc, MAX8925_TSC_IRQ);
+	/* mask all interrupts */
+	max8925_reg_write(chip->rtc, MAX8925_ALARM0_CNTL, 0);
+	max8925_reg_write(chip->rtc, MAX8925_ALARM1_CNTL, 0);
+	max8925_reg_write(chip->i2c, MAX8925_CHG_IRQ1_MASK, 0xff);
+	max8925_reg_write(chip->i2c, MAX8925_CHG_IRQ2_MASK, 0xff);
+	max8925_reg_write(chip->i2c, MAX8925_ON_OFF_IRQ1_MASK, 0xff);
+	max8925_reg_write(chip->i2c, MAX8925_ON_OFF_IRQ2_MASK, 0xff);
+	max8925_reg_write(chip->rtc, MAX8925_RTC_IRQ_MASK, 0xff);
+	max8925_reg_write(chip->adc, MAX8925_TSC_IRQ_MASK, 0xff);
+
+	mutex_init(&chip->irq_lock);
+	chip->core_irq = irq;
+	chip->irq_base = pdata->irq_base;
+	desc = irq_to_desc(chip->core_irq);
+
+	/* register with genirq */
+	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
+		__irq = i + chip->irq_base;
+		set_irq_chip_data(__irq, chip);
+		set_irq_chip_and_handler(__irq, &max8925_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(__irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(__irq, IRQF_VALID);
+#else
+		set_irq_noprobe(__irq);
+#endif
+	}
+	if (!irq) {
+		dev_warn(chip->dev, "No interrupt support on core IRQ\n");
+		goto tsc_irq;
+	}
 
-	mutex_lock(&chip->irq_lock);
-	chip->irq[irq].handler = NULL;
-	chip->irq[irq].data = NULL;
-	mutex_unlock(&chip->irq_lock);
+	ret = request_threaded_irq(irq, NULL, max8925_irq, flags,
+				   "max8925", chip);
+	if (ret) {
+		dev_err(chip->dev, "Failed to request core IRQ: %d\n", ret);
+		chip->core_irq = 0;
+	}
+tsc_irq:
+	if (!pdata->tsc_irq) {
+		dev_warn(chip->dev, "No interrupt support on TSC IRQ\n");
+		return 0;
+	}
+	chip->tsc_irq = pdata->tsc_irq;
+
+	ret = request_threaded_irq(chip->tsc_irq, NULL, max8925_tsc_irq,
+				   flags, "max8925-tsc", chip);
+	if (ret) {
+		dev_err(chip->dev, "Failed to request TSC IRQ: %d\n", ret);
+		chip->tsc_irq = 0;
+	}
 	return 0;
 }
-EXPORT_SYMBOL(max8925_free_irq);
 
-static int __devinit device_gpm_init(struct max8925_chip *chip,
-				      struct i2c_client *i2c,
-				      struct max8925_platform_data *pdata)
+int __devinit max8925_device_init(struct max8925_chip *chip,
+				  struct max8925_platform_data *pdata)
 {
 	int ret;
 
-	/* mask all IRQs */
-	ret = max8925_set_bits(i2c, MAX8925_CHG_IRQ1_MASK, 0x7, 0x7);
-	if (ret < 0)
-		goto out;
-	ret = max8925_set_bits(i2c, MAX8925_CHG_IRQ2_MASK, 0xff, 0xff);
-	if (ret < 0)
-		goto out;
-	ret = max8925_set_bits(i2c, MAX8925_ON_OFF_IRQ1_MASK, 0xff, 0xff);
-	if (ret < 0)
-		goto out;
-	ret = max8925_set_bits(i2c, MAX8925_ON_OFF_IRQ2_MASK, 0x3, 0x3);
-	if (ret < 0)
-		goto out;
-
-	chip->name = "GPM";
-	memset(chip->irq, 0, sizeof(struct max8925_irq) * MAX8925_NUM_IRQ);
-	ret = request_threaded_irq(i2c->irq, NULL, max8925_irq_thread,
-				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
-				"max8925-gpm", chip);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
-		goto out;
+	max8925_irq_init(chip, chip->i2c->irq, pdata);
+
+	if (pdata && (pdata->power || pdata->touch)) {
+		/* enable ADC to control internal reference */
+		max8925_set_bits(chip->i2c, MAX8925_RESET_CNFG, 1, 1);
+		/* enable internal reference for ADC */
+		max8925_set_bits(chip->adc, MAX8925_TSC_CNFG1, 3, 2);
+		/* check for internal reference IRQ */
+		do {
+			ret = max8925_reg_read(chip->adc, MAX8925_TSC_IRQ);
+		} while (ret & MAX8925_NREF_OK);
+		/* enaable ADC scheduler, interval is 1 second */
+		max8925_set_bits(chip->adc, MAX8925_ADC_SCHED, 3, 2);
 	}
-	chip->chip_irq = i2c->irq;
 
-	/* enable hard-reset for ONKEY power-off */
-	max8925_set_bits(i2c, MAX8925_SYSENSEL, 0x80, 0x80);
+	/* enable Momentary Power Loss */
+	max8925_set_bits(chip->rtc, MAX8925_MPL_CNTL, 1 << 4, 1 << 4);
 
-	ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
-			      ARRAY_SIZE(regulator_devs),
-			      &regulator_resources[0], 0);
+	ret = mfd_add_devices(chip->dev, 0, &rtc_devs[0],
+			      ARRAY_SIZE(rtc_devs),
+			      &rtc_resources[0], 0);
 	if (ret < 0) {
-		dev_err(chip->dev, "Failed to add regulator subdev\n");
-		goto out_irq;
+		dev_err(chip->dev, "Failed to add rtc subdev\n");
+		goto out;
+	}
+	if (pdata && pdata->regulator[0]) {
+		ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
+				      ARRAY_SIZE(regulator_devs),
+				      &regulator_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add regulator subdev\n");
+			goto out_dev;
+		}
 	}
 
 	if (pdata && pdata->backlight) {
@@ -331,35 +612,17 @@ static int __devinit device_gpm_init(struct max8925_chip *chip,
 			goto out_dev;
 		}
 	}
-	return 0;
-out_dev:
-	mfd_remove_devices(chip->dev);
-out_irq:
-	if (chip->chip_irq)
-		free_irq(chip->chip_irq, chip);
-out:
-	return ret;
-}
-
-static int __devinit device_adc_init(struct max8925_chip *chip,
-				     struct i2c_client *i2c,
-				     struct max8925_platform_data *pdata)
-{
-	int ret;
-
-	/* mask all IRQs */
-	ret = max8925_set_bits(i2c, MAX8925_TSC_IRQ_MASK, 3, 3);
 
-	chip->name = "ADC";
-	memset(chip->irq, 0, sizeof(struct max8925_irq) * MAX8925_NUM_IRQ);
-	ret = request_threaded_irq(i2c->irq, NULL, max8925_irq_thread,
-				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
-				"max8925-adc", chip);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
-		goto out;
+	if (pdata && pdata->power) {
+		ret = mfd_add_devices(chip->dev, 0, &power_devs[0],
+					ARRAY_SIZE(power_devs),
+					&power_supply_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add power supply "
+				"subdev\n");
+			goto out_dev;
+		}
 	}
-	chip->chip_irq = i2c->irq;
 
 	if (pdata && pdata->touch) {
 		ret = mfd_add_devices(chip->dev, 0, &touch_devs[0],
@@ -367,38 +630,27 @@ static int __devinit device_adc_init(struct max8925_chip *chip,
 				      &touch_resources[0], 0);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to add touch subdev\n");
-			goto out_irq;
+			goto out_dev;
 		}
 	}
+
 	return 0;
-out_irq:
-	if (chip->chip_irq)
-		free_irq(chip->chip_irq, chip);
+out_dev:
+	mfd_remove_devices(chip->dev);
 out:
 	return ret;
 }
 
-int __devinit max8925_device_init(struct max8925_chip *chip,
-				  struct max8925_platform_data *pdata)
+void __devexit max8925_device_exit(struct max8925_chip *chip)
 {
-	switch (chip->chip_id) {
-	case MAX8925_GPM:
-		device_gpm_init(chip, chip->i2c, pdata);
-		break;
-	case MAX8925_ADC:
-		device_adc_init(chip, chip->i2c, pdata);
-		break;
-	}
-	return 0;
-}
-
-void max8925_device_exit(struct max8925_chip *chip)
-{
-	if (chip->chip_irq >= 0)
-		free_irq(chip->chip_irq, chip);
+	if (chip->core_irq)
+		free_irq(chip->core_irq, chip);
+	if (chip->tsc_irq)
+		free_irq(chip->tsc_irq, chip);
 	mfd_remove_devices(chip->dev);
 }
 
+
 MODULE_DESCRIPTION("PMIC Driver for Maxim MAX8925");
 MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index b72dbe174d51..18c1844ed472 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -12,6 +12,7 @@
 #ifndef __LINUX_MFD_MAX8925_H
 #define __LINUX_MFD_MAX8925_H
 
+#include <linux/mutex.h>
 #include <linux/interrupt.h>
 
 /* Unified sub device IDs for MAX8925 */
@@ -39,6 +40,30 @@ enum {
 	MAX8925_ID_LDO18,
 	MAX8925_ID_LDO19,
 	MAX8925_ID_LDO20,
+	MAX8925_ID_MAX,
+};
+
+enum {
+	/*
+	 * Charging current threshold trigger going from fast charge
+	 * to TOPOFF charge. From 5% to 20% of fasting charging current.
+	 */
+	MAX8925_TOPOFF_THR_5PER,
+	MAX8925_TOPOFF_THR_10PER,
+	MAX8925_TOPOFF_THR_15PER,
+	MAX8925_TOPOFF_THR_20PER,
+};
+
+enum {
+	/* Fast charging current */
+	MAX8925_FCHG_85MA,
+	MAX8925_FCHG_300MA,
+	MAX8925_FCHG_460MA,
+	MAX8925_FCHG_600MA,
+	MAX8925_FCHG_700MA,
+	MAX8925_FCHG_800MA,
+	MAX8925_FCHG_900MA,
+	MAX8925_FCHG_1000MA,
 };
 
 /* Charger registers */
@@ -46,12 +71,13 @@ enum {
 #define MAX8925_CHG_IRQ2		(0x7f)
 #define MAX8925_CHG_IRQ1_MASK		(0x80)
 #define MAX8925_CHG_IRQ2_MASK		(0x81)
+#define MAX8925_CHG_STATUS		(0x82)
 
 /* GPM registers */
 #define MAX8925_SYSENSEL		(0x00)
 #define MAX8925_ON_OFF_IRQ1		(0x01)
 #define MAX8925_ON_OFF_IRQ1_MASK	(0x02)
-#define MAX8925_ON_OFF_STAT		(0x03)
+#define MAX8925_ON_OFF_STATUS		(0x03)
 #define MAX8925_ON_OFF_IRQ2		(0x0d)
 #define MAX8925_ON_OFF_IRQ2_MASK	(0x0e)
 #define MAX8925_RESET_CNFG		(0x0f)
@@ -59,12 +85,18 @@ enum {
 /* Touch registers */
 #define MAX8925_TSC_IRQ			(0x00)
 #define MAX8925_TSC_IRQ_MASK		(0x01)
+#define MAX8925_TSC_CNFG1		(0x02)
+#define MAX8925_ADC_SCHED		(0x10)
 #define MAX8925_ADC_RES_END		(0x6f)
 
+#define MAX8925_NREF_OK			(1 << 4)
+
 /* RTC registers */
-#define MAX8925_RTC_STATUS		(0x1a)
+#define MAX8925_ALARM0_CNTL		(0x18)
+#define MAX8925_ALARM1_CNTL		(0x19)
 #define MAX8925_RTC_IRQ			(0x1c)
 #define MAX8925_RTC_IRQ_MASK		(0x1d)
+#define MAX8925_MPL_CNTL		(0x1e)
 
 /* WLED registers */
 #define MAX8925_WLED_MODE_CNTL		(0x84)
@@ -126,45 +158,48 @@ enum {
 #define TSC_IRQ_MASK			(0x03)
 #define RTC_IRQ_MASK			(0x0c)
 
-#define MAX8925_NUM_IRQ			(32)
+#define MAX8925_GPM_NUM_IRQ		(40)
+#define MAX8925_ADC_NUM_IRQ		(8)
+#define MAX8925_NUM_IRQ			(MAX8925_GPM_NUM_IRQ	\
+					+ MAX8925_ADC_NUM_IRQ)
+
+#define MAX8925_MAX_REGULATOR		(23)
 
 #define MAX8925_NAME_SIZE		(32)
 
+/* IRQ definitions */
 enum {
-	MAX8925_INVALID = 0,
-	MAX8925_RTC,
-	MAX8925_ADC,
-	MAX8925_GPM,	/* general power management */
-	MAX8925_MAX,
+	MAX8925_IRQ_VCHG_DC_OVP,
+	MAX8925_IRQ_VCHG_DC_F,
+	MAX8925_IRQ_VCHG_DC_R,
+	MAX8925_IRQ_VCHG_USB_OVP,
+	MAX8925_IRQ_VCHG_USB_F,
+	MAX8925_IRQ_VCHG_USB_R,
+	MAX8925_IRQ_VCHG_THM_OK_R,
+	MAX8925_IRQ_VCHG_THM_OK_F,
+	MAX8925_IRQ_VCHG_SYSLOW_F,
+	MAX8925_IRQ_VCHG_SYSLOW_R,
+	MAX8925_IRQ_VCHG_RST,
+	MAX8925_IRQ_VCHG_DONE,
+	MAX8925_IRQ_VCHG_TOPOFF,
+	MAX8925_IRQ_VCHG_TMR_FAULT,
+	MAX8925_IRQ_GPM_RSTIN,
+	MAX8925_IRQ_GPM_MPL,
+	MAX8925_IRQ_GPM_SW_3SEC,
+	MAX8925_IRQ_GPM_EXTON_F,
+	MAX8925_IRQ_GPM_EXTON_R,
+	MAX8925_IRQ_GPM_SW_1SEC,
+	MAX8925_IRQ_GPM_SW_F,
+	MAX8925_IRQ_GPM_SW_R,
+	MAX8925_IRQ_GPM_SYSCKEN_F,
+	MAX8925_IRQ_GPM_SYSCKEN_R,
+	MAX8925_IRQ_RTC_ALARM1,
+	MAX8925_IRQ_RTC_ALARM0,
+	MAX8925_IRQ_TSC_STICK,
+	MAX8925_IRQ_TSC_NSTICK,
+	MAX8925_NR_IRQS,
 };
 
-#define MAX8925_IRQ_VCHG_OVP		(0)
-#define MAX8925_IRQ_VCHG_F		(1)
-#define MAX8925_IRQ_VCHG_R		(2)
-#define MAX8925_IRQ_VCHG_THM_OK_R	(8)
-#define MAX8925_IRQ_VCHG_THM_OK_F	(9)
-#define MAX8925_IRQ_VCHG_BATTLOW_F	(10)
-#define MAX8925_IRQ_VCHG_BATTLOW_R	(11)
-#define MAX8925_IRQ_VCHG_RST		(12)
-#define MAX8925_IRQ_VCHG_DONE		(13)
-#define MAX8925_IRQ_VCHG_TOPOFF		(14)
-#define MAX8925_IRQ_VCHG_TMR_FAULT	(15)
-#define MAX8925_IRQ_GPM_RSTIN		(16)
-#define MAX8925_IRQ_GPM_MPL		(17)
-#define MAX8925_IRQ_GPM_SW_3SEC		(18)
-#define MAX8925_IRQ_GPM_EXTON_F		(19)
-#define MAX8925_IRQ_GPM_EXTON_R		(20)
-#define MAX8925_IRQ_GPM_SW_1SEC		(21)
-#define MAX8925_IRQ_GPM_SW_F		(22)
-#define MAX8925_IRQ_GPM_SW_R		(23)
-#define MAX8925_IRQ_GPM_SYSCKEN_F	(24)
-#define MAX8925_IRQ_GPM_SYSCKEN_R	(25)
-
-#define MAX8925_IRQ_TSC_STICK		(0)
-#define MAX8925_IRQ_TSC_NSTICK		(1)
-
-#define MAX8925_MAX_REGULATOR		(23)
-
 struct max8925_irq {
 	irq_handler_t		handler;
 	void			*data;
@@ -172,14 +207,16 @@ struct max8925_irq {
 
 struct max8925_chip {
 	struct device		*dev;
+	struct i2c_client	*i2c;
+	struct i2c_client	*adc;
+	struct i2c_client	*rtc;
+	struct max8925_irq	irqs[MAX8925_NUM_IRQ];
 	struct mutex		io_lock;
 	struct mutex		irq_lock;
-	struct i2c_client	*i2c;
-	struct max8925_irq	irq[MAX8925_NUM_IRQ];
 
-	const char		*name;
-	int			chip_id;
-	int			chip_irq;
+	int			irq_base;
+	int			core_irq;
+	int			tsc_irq;
 };
 
 struct max8925_backlight_pdata {
@@ -192,13 +229,25 @@ struct max8925_touch_pdata {
 	unsigned int		flags;
 };
 
+struct max8925_power_pdata {
+	int		(*set_charger)(int);
+	unsigned	batt_detect:1;
+	unsigned	topoff_threshold:2;
+	unsigned	fast_charge:3;	/* charge current */
+};
+
+/*
+ * irq_base: stores IRQ base number of MAX8925 in platform
+ * tsc_irq: stores IRQ number of MAX8925 TSC
+ */
 struct max8925_platform_data {
 	struct max8925_backlight_pdata	*backlight;
 	struct max8925_touch_pdata	*touch;
+	struct max8925_power_pdata	*power;
 	struct regulator_init_data	*regulator[MAX8925_MAX_REGULATOR];
 
-	int	chip_id;
-	int	chip_irq;
+	int		irq_base;
+	int		tsc_irq;
 };
 
 extern int max8925_reg_read(struct i2c_client *, int);
@@ -208,6 +257,12 @@ extern int max8925_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int max8925_set_bits(struct i2c_client *, int, unsigned char,
 			unsigned char);
 
+extern int max8925_request_irq(struct max8925_chip *, int,
+			irq_handler_t, void *);
+extern int max8925_free_irq(struct max8925_chip *, int);
+extern int max8925_mask_irq(struct max8925_chip *, int);
+extern int max8925_unmask_irq(struct max8925_chip *, int);
+
 extern int max8925_device_init(struct max8925_chip *,
 				struct max8925_platform_data *);
 extern void max8925_device_exit(struct max8925_chip *);
-- 
cgit v1.2.3


From 34c9120805ff4b3f7a8053bd64157ba564774433 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Wed, 3 Feb 2010 15:37:23 -0500
Subject: mfd: Clean code in max8925

Remove unused definitions.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max8925.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 18c1844ed472..5259dfe8c585 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -158,11 +158,6 @@ enum {
 #define TSC_IRQ_MASK			(0x03)
 #define RTC_IRQ_MASK			(0x0c)
 
-#define MAX8925_GPM_NUM_IRQ		(40)
-#define MAX8925_ADC_NUM_IRQ		(8)
-#define MAX8925_NUM_IRQ			(MAX8925_GPM_NUM_IRQ	\
-					+ MAX8925_ADC_NUM_IRQ)
-
 #define MAX8925_MAX_REGULATOR		(23)
 
 #define MAX8925_NAME_SIZE		(32)
@@ -200,17 +195,11 @@ enum {
 	MAX8925_NR_IRQS,
 };
 
-struct max8925_irq {
-	irq_handler_t		handler;
-	void			*data;
-};
-
 struct max8925_chip {
 	struct device		*dev;
 	struct i2c_client	*i2c;
 	struct i2c_client	*adc;
 	struct i2c_client	*rtc;
-	struct max8925_irq	irqs[MAX8925_NUM_IRQ];
 	struct mutex		io_lock;
 	struct mutex		irq_lock;
 
@@ -257,12 +246,6 @@ extern int max8925_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int max8925_set_bits(struct i2c_client *, int, unsigned char,
 			unsigned char);
 
-extern int max8925_request_irq(struct max8925_chip *, int,
-			irq_handler_t, void *);
-extern int max8925_free_irq(struct max8925_chip *, int);
-extern int max8925_mask_irq(struct max8925_chip *, int);
-extern int max8925_unmask_irq(struct max8925_chip *, int);
-
 extern int max8925_device_init(struct max8925_chip *,
 				struct max8925_platform_data *);
 extern void max8925_device_exit(struct max8925_chip *);
-- 
cgit v1.2.3


From 2afa62ea76027b00e472ddb672191e6e15425b43 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Mon, 8 Feb 2010 05:02:00 -0500
Subject: mfd: Use genirq in 88pm860x

Use genirq to simplify IRQ handling in 88pm860x. Remove the interface of
mask/free IRQs on 88pm860x. All these work is taken by genirq. Update the
touchscreen driver of 88pm860x since IRQ handling is changed.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/88pm860x-ts.c |  15 +-
 drivers/mfd/88pm860x-core.c             | 408 +++++++++++++++++++++++---------
 include/linux/mfd/88pm860x.h            |  30 +--
 3 files changed, 311 insertions(+), 142 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
index 56254d2a1f6e..286bb490a9f2 100644
--- a/drivers/input/touchscreen/88pm860x-ts.c
+++ b/drivers/input/touchscreen/88pm860x-ts.c
@@ -54,7 +54,6 @@ static irqreturn_t pm860x_touch_handler(int irq, void *data)
 	int z1, z2, rt = 0;
 	int ret;
 
-	pm860x_mask_irq(chip, irq);
 	ret = pm860x_bulk_read(touch->i2c, MEAS_TSIX_1, MEAS_LEN, buf);
 	if (ret < 0)
 		goto out;
@@ -83,7 +82,6 @@ static irqreturn_t pm860x_touch_handler(int irq, void *data)
 		dev_dbg(chip->dev, "pen release\n");
 	}
 	input_sync(touch->idev);
-	pm860x_unmask_irq(chip, irq);
 
 out:
 	return IRQ_HANDLED;
@@ -92,7 +90,6 @@ out:
 static int pm860x_touch_open(struct input_dev *dev)
 {
 	struct pm860x_touch *touch = input_get_drvdata(dev);
-	struct pm860x_chip *chip = touch->chip;
 	int data, ret;
 
 	data = MEAS_PD_EN | MEAS_TSIX_EN | MEAS_TSIY_EN
@@ -100,7 +97,6 @@ static int pm860x_touch_open(struct input_dev *dev)
 	ret = pm860x_set_bits(touch->i2c, MEAS_EN3, data, data);
 	if (ret < 0)
 		goto out;
-	pm860x_unmask_irq(chip, touch->irq);
 	return 0;
 out:
 	return ret;
@@ -109,13 +105,11 @@ out:
 static void pm860x_touch_close(struct input_dev *dev)
 {
 	struct pm860x_touch *touch = input_get_drvdata(dev);
-	struct pm860x_chip *chip = touch->chip;
 	int data;
 
 	data = MEAS_PD_EN | MEAS_TSIX_EN | MEAS_TSIY_EN
 		| MEAS_TSIZ1_EN | MEAS_TSIZ2_EN;
 	pm860x_set_bits(touch->i2c, MEAS_EN3, data, 0);
-	pm860x_mask_irq(chip, touch->irq);
 }
 
 static int __devinit pm860x_touch_probe(struct platform_device *pdev)
@@ -164,11 +158,12 @@ static int __devinit pm860x_touch_probe(struct platform_device *pdev)
 	touch->idev->close = pm860x_touch_close;
 	touch->chip = chip;
 	touch->i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
-	touch->irq = irq;
+	touch->irq = irq + chip->irq_base;
 	touch->res_x = pdata->res_x;
 	input_set_drvdata(touch->idev, touch);
 
-	ret = pm860x_request_irq(chip, irq, pm860x_touch_handler, touch);
+	ret = request_threaded_irq(touch->irq, NULL, pm860x_touch_handler,
+				   IRQF_ONESHOT, "touch", touch);
 	if (ret < 0)
 		goto out_irq;
 
@@ -194,7 +189,7 @@ static int __devinit pm860x_touch_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, touch);
 	return 0;
 out_rg:
-	pm860x_free_irq(chip, irq);
+	free_irq(touch->irq, touch);
 out_irq:
 	input_free_device(touch->idev);
 out:
@@ -207,7 +202,7 @@ static int __devexit pm860x_touch_remove(struct platform_device *pdev)
 	struct pm860x_touch *touch = platform_get_drvdata(pdev);
 
 	input_unregister_device(touch->idev);
-	pm860x_free_irq(touch->chip, touch->irq);
+	free_irq(touch->irq, touch);
 	platform_set_drvdata(pdev, NULL);
 	kfree(touch);
 	return 0;
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 16f0dca707a7..6a14d2b1ccf0 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -12,11 +12,14 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/88pm860x.h>
 
+#define INT_STATUS_NUM			3
+
 char pm860x_backlight_name[][MFD_NAME_SIZE] = {
 	"backlight-0",
 	"backlight-1",
@@ -119,6 +122,42 @@ static struct mfd_cell touch_devs[] = {
 	.flags	= IORESOURCE_IO,			\
 }
 
+static struct resource power_supply_resources[] = {
+	{
+		.name		= "88pm860x-power",
+		.start		= PM8607_IRQ_CHG,
+		.end		= PM8607_IRQ_CHG,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell power_devs[] = {
+	{
+		.name		= "88pm860x-power",
+		.num_resources	= 1,
+		.resources	= &power_supply_resources[0],
+		.id		= -1,
+	},
+};
+
+static struct resource onkey_resources[] = {
+	{
+		.name		= "88pm860x-onkey",
+		.start		= PM8607_IRQ_ONKEY,
+		.end		= PM8607_IRQ_ONKEY,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell onkey_devs[] = {
+	{
+		.name		= "88pm860x-onkey",
+		.num_resources	= 1,
+		.resources	= &onkey_resources[0],
+		.id		= -1,
+	},
+};
+
 static struct resource regulator_resources[] = {
 	PM8607_REG_RESOURCE(BUCK1, BUCK1),
 	PM8607_REG_RESOURCE(BUCK2, BUCK2),
@@ -163,129 +202,224 @@ static struct mfd_cell regulator_devs[] = {
 	PM8607_REG_DEVS(ldo14, LDO14),
 };
 
-#define CHECK_IRQ(irq)					\
-do {							\
-	if ((irq < 0) || (irq >= PM860X_NUM_IRQ))	\
-		return -EINVAL;				\
-} while (0)
-
-/* IRQs only occur on 88PM8607 */
-int pm860x_mask_irq(struct pm860x_chip *chip, int irq)
-{
-	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
-				: chip->companion;
-	int offset, data, ret;
-
-	CHECK_IRQ(irq);
-
-	offset = (irq >> 3) + PM8607_INT_MASK_1;
-	data = 1 << (irq % 8);
-	ret = pm860x_set_bits(i2c, offset, data, 0);
+struct pm860x_irq_data {
+	int	reg;
+	int	mask_reg;
+	int	enable;		/* enable or not */
+	int	offs;		/* bit offset in mask register */
+};
 
-	return ret;
-}
-EXPORT_SYMBOL(pm860x_mask_irq);
+static struct pm860x_irq_data pm860x_irqs[] = {
+	[PM8607_IRQ_ONKEY] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 0,
+	},
+	[PM8607_IRQ_EXTON] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 1,
+	},
+	[PM8607_IRQ_CHG] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 2,
+	},
+	[PM8607_IRQ_BAT] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 3,
+	},
+	[PM8607_IRQ_RTC] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 4,
+	},
+	[PM8607_IRQ_CC] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 5,
+	},
+	[PM8607_IRQ_VBAT] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 0,
+	},
+	[PM8607_IRQ_VCHG] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 1,
+	},
+	[PM8607_IRQ_VSYS] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 2,
+	},
+	[PM8607_IRQ_TINT] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 3,
+	},
+	[PM8607_IRQ_GPADC0] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 4,
+	},
+	[PM8607_IRQ_GPADC1] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 5,
+	},
+	[PM8607_IRQ_GPADC2] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 6,
+	},
+	[PM8607_IRQ_GPADC3] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 7,
+	},
+	[PM8607_IRQ_AUDIO_SHORT] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 0,
+	},
+	[PM8607_IRQ_PEN] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 1,
+	},
+	[PM8607_IRQ_HEADSET] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 2,
+	},
+	[PM8607_IRQ_HOOK] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 3,
+	},
+	[PM8607_IRQ_MICIN] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 4,
+	},
+	[PM8607_IRQ_CHG_FAIL] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 5,
+	},
+	[PM8607_IRQ_CHG_DONE] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 6,
+	},
+	[PM8607_IRQ_CHG_FAULT] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 7,
+	},
+};
 
-int pm860x_unmask_irq(struct pm860x_chip *chip, int irq)
+static inline struct pm860x_irq_data *irq_to_pm860x(struct pm860x_chip *chip,
+						    int irq)
 {
-	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
-				: chip->companion;
-	int offset, data, ret;
-
-	CHECK_IRQ(irq);
-
-	offset = (irq >> 3) + PM8607_INT_MASK_1;
-	data = 1 << (irq % 8);
-	ret = pm860x_set_bits(i2c, offset, data, data);
-
-	return ret;
+	return &pm860x_irqs[irq - chip->irq_base];
 }
-EXPORT_SYMBOL(pm860x_unmask_irq);
 
-#define INT_STATUS_NUM		(3)
-
-static irqreturn_t pm8607_irq_thread(int irq, void *data)
+static irqreturn_t pm860x_irq(int irq, void *data)
 {
-	DECLARE_BITMAP(irq_status, PM860X_NUM_IRQ);
 	struct pm860x_chip *chip = data;
-	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
-				: chip->companion;
-	unsigned char status_buf[INT_STATUS_NUM << 1];
-	unsigned long value;
-	int i, ret;
-
-	irq_status[0] = 0;
-
-	/* read out status register */
-	ret = pm860x_bulk_read(i2c, PM8607_INT_STATUS1,
-				INT_STATUS_NUM << 1, status_buf);
-	if (ret < 0)
-		goto out;
-	if (chip->irq_mode) {
-		/* 0, clear by read. 1, clear by write */
-		ret = pm860x_bulk_write(i2c, PM8607_INT_STATUS1,
-					INT_STATUS_NUM, status_buf);
-		if (ret < 0)
-			goto out;
-	}
-
-	/* clear masked interrupt status */
-	for (i = 0, value = 0; i < INT_STATUS_NUM; i++) {
-		status_buf[i] &= status_buf[i + INT_STATUS_NUM];
-		irq_status[0] |= status_buf[i] << (i * 8);
-	}
-
-	while (!bitmap_empty(irq_status, PM860X_NUM_IRQ)) {
-		irq = find_first_bit(irq_status, PM860X_NUM_IRQ);
-		clear_bit(irq, irq_status);
-		dev_dbg(chip->dev, "Servicing IRQ #%d\n", irq);
-
-		mutex_lock(&chip->irq_lock);
-		if (chip->irq[irq].handler)
-			chip->irq[irq].handler(irq, chip->irq[irq].data);
-		else {
-			pm860x_mask_irq(chip, irq);
-			dev_err(chip->dev, "Nobody cares IRQ %d. "
-				"Now mask it.\n", irq);
-			for (i = 0; i < (INT_STATUS_NUM << 1); i++) {
-				dev_err(chip->dev, "status[%d]:%x\n", i,
-					status_buf[i]);
-			}
+	struct pm860x_irq_data *irq_data;
+	struct i2c_client *i2c;
+	int read_reg = -1, value = 0;
+	int i;
+
+	i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
+	for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) {
+		irq_data = &pm860x_irqs[i];
+		if (read_reg != irq_data->reg) {
+			read_reg = irq_data->reg;
+			value = pm860x_reg_read(i2c, irq_data->reg);
 		}
-		mutex_unlock(&chip->irq_lock);
+		if (value & irq_data->enable)
+			handle_nested_irq(chip->irq_base + i);
 	}
-out:
 	return IRQ_HANDLED;
 }
 
-int pm860x_request_irq(struct pm860x_chip *chip, int irq,
-		       irq_handler_t handler, void *data)
+static void pm860x_irq_lock(unsigned int irq)
 {
-	CHECK_IRQ(irq);
-	if (!handler)
-		return -EINVAL;
+	struct pm860x_chip *chip = get_irq_chip_data(irq);
 
 	mutex_lock(&chip->irq_lock);
-	chip->irq[irq].handler = handler;
-	chip->irq[irq].data = data;
-	mutex_unlock(&chip->irq_lock);
-
-	return 0;
 }
-EXPORT_SYMBOL(pm860x_request_irq);
 
-int pm860x_free_irq(struct pm860x_chip *chip, int irq)
+static void pm860x_irq_sync_unlock(unsigned int irq)
 {
-	CHECK_IRQ(irq);
+	struct pm860x_chip *chip = get_irq_chip_data(irq);
+	struct pm860x_irq_data *irq_data;
+	struct i2c_client *i2c;
+	static unsigned char cached[3] = {0x0, 0x0, 0x0};
+	unsigned char mask[3];
+	int i;
+
+	i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
+	/* Load cached value. In initial, all IRQs are masked */
+	for (i = 0; i < 3; i++)
+		mask[i] = cached[i];
+	for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) {
+		irq_data = &pm860x_irqs[i];
+		switch (irq_data->mask_reg) {
+		case PM8607_INT_MASK_1:
+			mask[0] &= ~irq_data->offs;
+			mask[0] |= irq_data->enable;
+			break;
+		case PM8607_INT_MASK_2:
+			mask[1] &= ~irq_data->offs;
+			mask[1] |= irq_data->enable;
+			break;
+		case PM8607_INT_MASK_3:
+			mask[2] &= ~irq_data->offs;
+			mask[2] |= irq_data->enable;
+			break;
+		default:
+			dev_err(chip->dev, "wrong IRQ\n");
+			break;
+		}
+	}
+	/* update mask into registers */
+	for (i = 0; i < 3; i++) {
+		if (mask[i] != cached[i]) {
+			cached[i] = mask[i];
+			pm860x_reg_write(i2c, PM8607_INT_MASK_1 + i, mask[i]);
+		}
+	}
 
-	mutex_lock(&chip->irq_lock);
-	chip->irq[irq].handler = NULL;
-	chip->irq[irq].data = NULL;
 	mutex_unlock(&chip->irq_lock);
+}
 
-	return 0;
+static void pm860x_irq_enable(unsigned int irq)
+{
+	struct pm860x_chip *chip = get_irq_chip_data(irq);
+	pm860x_irqs[irq - chip->irq_base].enable
+		= pm860x_irqs[irq - chip->irq_base].offs;
 }
-EXPORT_SYMBOL(pm860x_free_irq);
+
+static void pm860x_irq_disable(unsigned int irq)
+{
+	struct pm860x_chip *chip = get_irq_chip_data(irq);
+	pm860x_irqs[irq - chip->irq_base].enable = 0;
+}
+
+static struct irq_chip pm860x_irq_chip = {
+	.name		= "88pm860x",
+	.bus_lock	= pm860x_irq_lock,
+	.bus_sync_unlock = pm860x_irq_sync_unlock,
+	.enable		= pm860x_irq_enable,
+	.disable	= pm860x_irq_disable,
+};
 
 static int __devinit device_gpadc_init(struct pm860x_chip *chip,
 				       struct pm860x_platform_data *pdata)
@@ -348,9 +482,15 @@ static int __devinit device_irq_init(struct pm860x_chip *chip,
 	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
 				: chip->companion;
 	unsigned char status_buf[INT_STATUS_NUM];
-	int data, mask, ret = -EINVAL;
+	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
+	struct irq_desc *desc;
+	int i, data, mask, ret = -EINVAL;
+	int __irq;
 
-	mutex_init(&chip->irq_lock);
+	if (!pdata || !pdata->irq_base) {
+		dev_warn(chip->dev, "No interrupt support on IRQ base\n");
+		return -EINVAL;
+	}
 
 	mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR
 		| PM8607_B0_MISC1_INT_MASK;
@@ -389,25 +529,45 @@ static int __devinit device_irq_init(struct pm860x_chip *chip,
 	if (ret < 0)
 		goto out;
 
-	memset(chip->irq, 0, sizeof(struct pm860x_irq) * PM860X_NUM_IRQ);
-
-	ret = request_threaded_irq(i2c->irq, NULL, pm8607_irq_thread,
-				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
-				"88PM8607", chip);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
+	mutex_init(&chip->irq_lock);
+	chip->irq_base = pdata->irq_base;
+	chip->core_irq = i2c->irq;
+	if (!chip->core_irq)
 		goto out;
+
+	desc = irq_to_desc(chip->core_irq);
+
+	/* register IRQ by genirq */
+	for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) {
+		__irq = i + chip->irq_base;
+		set_irq_chip_data(__irq, chip);
+		set_irq_chip_and_handler(__irq, &pm860x_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(__irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(__irq, IRQF_VALID);
+#else
+		set_irq_noprobe(__irq);
+#endif
 	}
-	chip->chip_irq = i2c->irq;
+
+	ret = request_threaded_irq(chip->core_irq, NULL, pm860x_irq, flags,
+				   "88pm860x", chip);
+	if (ret) {
+		dev_err(chip->dev, "Failed to request IRQ: %d\n", ret);
+		chip->core_irq = 0;
+	}
+
 	return 0;
 out:
+	chip->core_irq = 0;
 	return ret;
 }
 
 static void __devexit device_irq_exit(struct pm860x_chip *chip)
 {
-	if (chip->chip_irq >= 0)
-		free_irq(chip->chip_irq, chip);
+	if (chip->core_irq)
+		free_irq(chip->core_irq, chip);
 }
 
 static void __devinit device_8606_init(struct pm860x_chip *chip,
@@ -513,6 +673,26 @@ static void __devinit device_8607_init(struct pm860x_chip *chip,
 			goto out_dev;
 		}
 	}
+
+	if (pdata && pdata->power) {
+		ret = mfd_add_devices(chip->dev, 0, &power_devs[0],
+				      ARRAY_SIZE(power_devs),
+				      &power_supply_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add power supply "
+				"subdev\n");
+			goto out_dev;
+		}
+	}
+
+	ret = mfd_add_devices(chip->dev, 0, &onkey_devs[0],
+			      ARRAY_SIZE(onkey_devs),
+			      &onkey_resources[0], 0);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to add onkey subdev\n");
+		goto out_dev;
+	}
+
 	return;
 out_dev:
 	mfd_remove_devices(chip->dev);
@@ -524,7 +704,7 @@ out:
 int pm860x_device_init(struct pm860x_chip *chip,
 		       struct pm860x_platform_data *pdata)
 {
-	chip->chip_irq = -EINVAL;
+	chip->core_irq = 0;
 
 	switch (chip->id) {
 	case CHIP_PM8606:
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 80bc82a7ac96..73f92c5feea2 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -262,12 +262,13 @@ enum {
 
 /* Interrupt Number in 88PM8607 */
 enum {
-	PM8607_IRQ_ONKEY = 0,
+	PM8607_IRQ_ONKEY,
 	PM8607_IRQ_EXTON,
 	PM8607_IRQ_CHG,
 	PM8607_IRQ_BAT,
 	PM8607_IRQ_RTC,
-	PM8607_IRQ_VBAT = 8,
+	PM8607_IRQ_CC,
+	PM8607_IRQ_VBAT,
 	PM8607_IRQ_VCHG,
 	PM8607_IRQ_VSYS,
 	PM8607_IRQ_TINT,
@@ -275,7 +276,7 @@ enum {
 	PM8607_IRQ_GPADC1,
 	PM8607_IRQ_GPADC2,
 	PM8607_IRQ_GPADC3,
-	PM8607_IRQ_AUDIO_SHORT = 16,
+	PM8607_IRQ_AUDIO_SHORT,
 	PM8607_IRQ_PEN,
 	PM8607_IRQ_HEADSET,
 	PM8607_IRQ_HOOK,
@@ -291,26 +292,19 @@ enum {
 	PM8607_CHIP_B0 = 0x48,
 };
 
-#define PM860X_NUM_IRQ		24
-
-struct pm860x_irq {
-	irq_handler_t		handler;
-	void			*data;
-};
-
 struct pm860x_chip {
 	struct device		*dev;
 	struct mutex		io_lock;
 	struct mutex		irq_lock;
 	struct i2c_client	*client;
 	struct i2c_client	*companion;	/* companion chip client */
-	struct pm860x_irq	irq[PM860X_NUM_IRQ];
 
 	int			buck3_double;	/* DVC ramp slope double */
 	unsigned short		companion_addr;
 	int			id;
 	int			irq_mode;
-	int			chip_irq;
+	int			irq_base;
+	int			core_irq;
 	unsigned char		chip_version;
 
 };
@@ -347,14 +341,20 @@ struct pm860x_touch_pdata {
 	unsigned long	flags;
 };
 
+struct pm860x_power_pdata {
+	unsigned	fast_charge;	/* charge current */
+};
+
 struct pm860x_platform_data {
 	struct pm860x_backlight_pdata	*backlight;
 	struct pm860x_led_pdata		*led;
 	struct pm860x_touch_pdata	*touch;
+	struct pm860x_power_pdata	*power;
 
 	unsigned short	companion_addr;	/* I2C address of companion chip */
 	int		i2c_port;	/* Controlled by GI2C or PI2C */
 	int		irq_mode;	/* Clear interrupt by read/write(0/1) */
+	int		irq_base;	/* IRQ base number of 88pm860x */
 	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
 };
 
@@ -368,12 +368,6 @@ extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int pm860x_set_bits(struct i2c_client *, int, unsigned char,
 			   unsigned char);
 
-extern int pm860x_mask_irq(struct pm860x_chip *, int);
-extern int pm860x_unmask_irq(struct pm860x_chip *, int);
-extern int pm860x_request_irq(struct pm860x_chip *, int,
-			      irq_handler_t handler, void *);
-extern int pm860x_free_irq(struct pm860x_chip *, int);
-
 extern int pm860x_device_init(struct pm860x_chip *chip,
 			      struct pm860x_platform_data *pdata);
 extern void pm860x_device_exit(struct pm860x_chip *chip);
-- 
cgit v1.2.3


From a29aaf55cd6faa75e35abfe00bd3ffc537490485 Mon Sep 17 00:00:00 2001
From: Moiz Sonasath <m-sonasath@ti.com>
Date: Tue, 16 Feb 2010 18:57:21 -0600
Subject: mfd: Disable TWL4030/5030 I2C1/I2C4 internal pull-ups

This patch disables TWL4030/5030 I2C1 adn I2C4(SR) internal pull-up, to
use only the external HW resistor >=470 Ohm for the assured
functionality in HS mode.

While testing the I2C in High Speed mode, it was discovered that
without a proper pull-up resistor, there is data corruption during
multi-byte transfer. RTC(time_set) test case was used for testing.

From the analysis done, it was concluded that ideally we need a
pull-up of 1.6k Ohm(recomended) or atleast 470 Ohm or greater for
assured performance in HS mode.

Signed-off-by: Moiz Sonasath <m-sonasath@ti.com>
Signed-off-by: Allen Pais <allen.pais@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 13 +++++++++++++
 include/linux/i2c/twl.h | 15 +++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 942a1e837819..7ccc39f3aa48 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -958,6 +958,7 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	int				status;
 	unsigned			i;
 	struct twl4030_platform_data	*pdata = client->dev.platform_data;
+	u8 temp;
 
 	if (!pdata) {
 		dev_dbg(&client->dev, "no platform data?\n");
@@ -1025,6 +1026,18 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			goto fail;
 	}
 
+	/* Disable TWL4030/TWL5030 I2C Pull-up on I2C1 and I2C4(SR) interface.
+	 * Program I2C_SCL_CTRL_PU(bit 0)=0, I2C_SDA_CTRL_PU (bit 2)=0,
+	 * SR_I2C_SCL_CTRL_PU(bit 4)=0 and SR_I2C_SDA_CTRL_PU(bit 6)=0.
+	 */
+
+	if (twl_class_is_4030()) {
+		twl_i2c_read_u8(TWL4030_MODULE_INTBR, &temp, REG_GPPUPDCTR1);
+		temp &= ~(SR_I2C_SDA_CTRL_PU | SR_I2C_SCL_CTRL_PU | \
+		I2C_SDA_CTRL_PU | I2C_SCL_CTRL_PU);
+		twl_i2c_write_u8(TWL4030_MODULE_INTBR, temp, REG_GPPUPDCTR1);
+	}
+
 	status = add_children(pdata, id->driver_data);
 fail:
 	if (status < 0)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 9733e9e53f2b..e28d4c0e45bd 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -239,6 +239,21 @@ int twl6030_interrupt_mask(u8 bit_mask, u8 offset);
 
 /*----------------------------------------------------------------------*/
 
+/*Interface Bit Register (INTBR) offsets
+ *(Use TWL_4030_MODULE_INTBR)
+ */
+
+#define REG_GPPUPDCTR1			0x0F
+
+/*I2C1 and I2C4(SR) SDA/SCL pull-up control bits */
+
+#define I2C_SCL_CTRL_PU			BIT(0)
+#define I2C_SDA_CTRL_PU			BIT(2)
+#define SR_I2C_SCL_CTRL_PU		BIT(4)
+#define SR_I2C_SDA_CTRL_PU		BIT(6)
+
+/*----------------------------------------------------------------------*/
+
 /*
  * Keypad register offsets (use TWL4030_MODULE_KEYPAD)
  * ... SIH/interrupt only
-- 
cgit v1.2.3


From fa0d976298b25d090fafc3460c63fee1c8eea854 Mon Sep 17 00:00:00 2001
From: Balaji T K <balajitk@ti.com>
Date: Fri, 19 Feb 2010 12:39:38 +0100
Subject: mfd: Add twl6030 base addr for ID0, ID1, ID2

Add base address for generic slave ID0, ID1, ID2
and introduced one more entry to align RTC module number between
twl4030 and twl6030

Signed-off-by: Balaji T K <balajitk@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 7 +++++--
 include/linux/i2c/twl.h | 5 +++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 7ccc39f3aa48..562cd4935e17 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -198,6 +198,7 @@
 /* subchip/slave 3 0x4B - AUDIO */
 #define TWL6030_BASEADD_AUDIO		0x0000
 #define TWL6030_BASEADD_RSV		0x0000
+#define TWL6030_BASEADD_ZERO		0x0000
 
 /* Few power values */
 #define R_CFG_BOOT			0x05
@@ -313,9 +314,11 @@ static struct twl_mapping twl6030_map[] = {
 	{ SUB_CHIP_ID1, TWL6030_BASEADD_CHARGER },
 	{ SUB_CHIP_ID1, TWL6030_BASEADD_GASGAUGE },
 	{ SUB_CHIP_ID1, TWL6030_BASEADD_PWM },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID0, TWL6030_BASEADD_ZERO },
+	{ SUB_CHIP_ID1, TWL6030_BASEADD_ZERO },
 
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_ZERO },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_ZERO },
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index e28d4c0e45bd..70d4caf48571 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -80,6 +80,11 @@
 #define TWL_MODULE_PM_MASTER	TWL4030_MODULE_PM_MASTER
 #define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
 #define TWL_MODULE_RTC		TWL4030_MODULE_RTC
+#define TWL_MODULE_PWM		TWL4030_MODULE_PWM0
+
+#define TWL6030_MODULE_ID0	0x0D
+#define TWL6030_MODULE_ID1	0x0E
+#define TWL6030_MODULE_ID2	0x0F
 
 #define GPIO_INTR_OFFSET	0
 #define KEYPAD_INTR_OFFSET	1
-- 
cgit v1.2.3


From b741d440a97c376af309e902eeb2f3c5673d2c92 Mon Sep 17 00:00:00 2001
From: Yusuke Goda <goda.yusuke@renesas.com>
Date: Wed, 17 Feb 2010 16:37:55 +0900
Subject: tmio_mmc: Add MMC_CAP_MMC_HIGHSPEED support V2

Enable MMC_CAP_XX support in the tmio_mmc driver if
pdata->capabilities is set.

Signed-off-by: Yusuke Goda <goda.yusuke@renesas.com>
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mmc/host/tmio_mmc.c | 1 +
 include/linux/mfd/tmio.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index e22c3fa3516a..e2c0cc9a0ca6 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -550,6 +550,7 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
+	mmc->caps |= pdata->capabilities;
 	mmc->f_max = pdata->hclk;
 	mmc->f_min = mmc->f_max / 512;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 9cb1834deffa..37d941420ce4 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -60,6 +60,7 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
  */
 struct tmio_mmc_data {
 	const unsigned int		hclk;
+	unsigned long			capabilities;
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
 };
-- 
cgit v1.2.3


From 707f0b2fbc65876e8abd94d26d8d0620600c05d4 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 17 Feb 2010 16:38:14 +0900
Subject: tmio_mmc: Remove const from platform data V3

Remove const from the tmio-mmc platform data hclk V3.
This change makes it possible to remove the type cast
from the sh_mobile_sdhi driver which is using the clock
framework to get the clock rate.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/tmio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 37d941420ce4..c3f7dff8effc 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -59,7 +59,7 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
  * data for the MMC controller
  */
 struct tmio_mmc_data {
-	const unsigned int		hclk;
+	unsigned int			hclk;
 	unsigned long			capabilities;
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
-- 
cgit v1.2.3


From f92e8f8144243a3651b2e350b706ea2d04931f8c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 17 Feb 2010 18:45:25 +0000
Subject: mfd: Add WM831x revision B support

Revision B of the WM831x devices changes the sense of the tristate
bit for GPIO configuration, inverting it to become an enable instead.
Take account of this in the gpiolib driver.

A current sink regulation status bit has also been added in revision B,
add a flag indicating if it's present but don't use it yet.

This revision also adds an interrupt on key up for the ON pin event
which the existing code is able to take advantage of.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/wm831x-gpio.c      | 21 ++++++++++++++++-----
 drivers/mfd/wm831x-core.c       | 15 +++++++++++++++
 include/linux/mfd/wm831x/core.h |  4 ++++
 include/linux/mfd/wm831x/gpio.h |  4 ++++
 4 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/wm831x-gpio.c b/drivers/gpio/wm831x-gpio.c
index 2554180534a1..5b8dc098d80f 100644
--- a/drivers/gpio/wm831x-gpio.c
+++ b/drivers/gpio/wm831x-gpio.c
@@ -38,10 +38,13 @@ static int wm831x_gpio_direction_in(struct gpio_chip *chip, unsigned offset)
 {
 	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
 	struct wm831x *wm831x = wm831x_gpio->wm831x;
+	int val = WM831X_GPN_DIR;
+
+	if (wm831x->has_gpio_ena)
+		val |= WM831X_GPN_TRI;
 
 	return wm831x_set_bits(wm831x, WM831X_GPIO1_CONTROL + offset,
-			       WM831X_GPN_DIR | WM831X_GPN_TRI,
-			       WM831X_GPN_DIR);
+			       WM831X_GPN_DIR | WM831X_GPN_TRI, val);
 }
 
 static int wm831x_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -74,10 +77,14 @@ static int wm831x_gpio_direction_out(struct gpio_chip *chip,
 {
 	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
 	struct wm831x *wm831x = wm831x_gpio->wm831x;
+	int val = 0;
 	int ret;
 
+	if (wm831x->has_gpio_ena)
+		val |= WM831X_GPN_TRI;
+
 	ret = wm831x_set_bits(wm831x, WM831X_GPIO1_CONTROL + offset,
-			      WM831X_GPN_DIR | WM831X_GPN_TRI, 0);
+			      WM831X_GPN_DIR | WM831X_GPN_TRI, val);
 	if (ret < 0)
 		return ret;
 
@@ -103,7 +110,7 @@ static void wm831x_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 {
 	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
 	struct wm831x *wm831x = wm831x_gpio->wm831x;
-	int i;
+	int i, tristated;
 
 	for (i = 0; i < chip->ngpio; i++) {
 		int gpio = i + chip->base;
@@ -170,6 +177,10 @@ static void wm831x_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 			break;
 		}
 
+		tristated = reg & WM831X_GPN_TRI;
+		if (wm831x->has_gpio_ena)
+			tristated = !tristated;
+
 		seq_printf(s, " %s %s %s %s%s\n"
 			   "                                  %s%s (0x%4x)\n",
 			   reg & WM831X_GPN_DIR ? "in" : "out",
@@ -178,7 +189,7 @@ static void wm831x_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 			   powerdomain,
 			   reg & WM831X_GPN_POL ? "" : " inverted",
 			   reg & WM831X_GPN_OD ? "open-drain" : "CMOS",
-			   reg & WM831X_GPN_TRI ? " tristated" : "",
+			   tristated ? " tristated" : "",
 			   reg);
 	}
 }
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 4b2021af1d96..c428d9f918fc 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1449,18 +1449,33 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	case WM8310:
 		parent = WM8310;
 		wm831x->num_gpio = 16;
+		if (rev > 0) {
+			wm831x->has_gpio_ena = 1;
+			wm831x->has_cs_sts = 1;
+		}
+
 		dev_info(wm831x->dev, "WM8310 revision %c\n", 'A' + rev);
 		break;
 
 	case WM8311:
 		parent = WM8311;
 		wm831x->num_gpio = 16;
+		if (rev > 0) {
+			wm831x->has_gpio_ena = 1;
+			wm831x->has_cs_sts = 1;
+		}
+
 		dev_info(wm831x->dev, "WM8311 revision %c\n", 'A' + rev);
 		break;
 
 	case WM8312:
 		parent = WM8312;
 		wm831x->num_gpio = 16;
+		if (rev > 0) {
+			wm831x->has_gpio_ena = 1;
+			wm831x->has_cs_sts = 1;
+		}
+
 		dev_info(wm831x->dev, "WM8312 revision %c\n", 'A' + rev);
 		break;
 
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 5184b79c700b..53580b592bc9 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -254,6 +254,10 @@ struct wm831x {
 	int irq_masks_cur[WM831X_NUM_IRQ_REGS];   /* Currently active value */
 	int irq_masks_cache[WM831X_NUM_IRQ_REGS]; /* Cached hardware value */
 
+	/* Chip revision based flags */
+	unsigned has_gpio_ena:1;  /* Has GPIO enable bit */
+	unsigned has_cs_sts:1;    /* Has current sink status bit */
+
 	int num_gpio;
 
 	struct mutex auxadc_lock;
diff --git a/include/linux/mfd/wm831x/gpio.h b/include/linux/mfd/wm831x/gpio.h
index 2835614af0e3..9b163c58865f 100644
--- a/include/linux/mfd/wm831x/gpio.h
+++ b/include/linux/mfd/wm831x/gpio.h
@@ -41,6 +41,10 @@
 #define WM831X_GPN_OD_MASK                      0x0200  /* GPN_OD */
 #define WM831X_GPN_OD_SHIFT                          9  /* GPN_OD */
 #define WM831X_GPN_OD_WIDTH                          1  /* GPN_OD */
+#define WM831X_GPN_ENA                          0x0080  /* GPN_ENA */
+#define WM831X_GPN_ENA_MASK                     0x0080  /* GPN_ENA */
+#define WM831X_GPN_ENA_SHIFT                         7  /* GPN_ENA */
+#define WM831X_GPN_ENA_WIDTH                         1  /* GPN_ENA */
 #define WM831X_GPN_TRI                          0x0080  /* GPN_TRI */
 #define WM831X_GPN_TRI_MASK                     0x0080  /* GPN_TRI */
 #define WM831X_GPN_TRI_SHIFT                         7  /* GPN_TRI */
-- 
cgit v1.2.3


From 11a441ce82d6ffecfd39b324024de0cd630b36c1 Mon Sep 17 00:00:00 2001
From: Mike Turquette <mturquette@ti.com>
Date: Mon, 22 Feb 2010 11:16:30 -0600
Subject: mfd: Introduce remove_script function for twl4030

New function twl4030_remove_script(u8 flags) takes a script type as
defined in twl.h and prevents any script already loaded in that position
from running.  This is accomplished by programming SEQ_ADD_* to 0x3f,
the END_OF_SCRIPT value, where SEQ_ADD_* is determined by flags.

(Future) users of this function include OMAP board files for machines
facing a race condition between sleep and warm reset.

Signed-off-by: Mike Turquette <mturquette@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-power.c | 50 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/twl.h     |  1 +
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 5b045ff4a2c2..7efa8789a3a2 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -461,6 +461,56 @@ out:
 	return err;
 }
 
+int twl4030_remove_script(u8 flags)
+{
+	int err = 0;
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, R_KEY_1,
+			R_PROTECT_KEY);
+	if (err) {
+		pr_err("twl4030: unable to unlock PROTECT_KEY\n");
+		return err;
+	}
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, R_KEY_2,
+			R_PROTECT_KEY);
+	if (err) {
+		pr_err("twl4030: unable to unlock PROTECT_KEY\n");
+		return err;
+	}
+
+	if (flags & TWL4030_WRST_SCRIPT) {
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
+				R_SEQ_ADD_WARM);
+		if (err)
+			return err;
+	}
+	if (flags & TWL4030_WAKEUP12_SCRIPT) {
+		if (err)
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
+				R_SEQ_ADD_S2A12);
+			return err;
+	}
+	if (flags & TWL4030_WAKEUP3_SCRIPT) {
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
+				R_SEQ_ADD_S2A3);
+		if (err)
+			return err;
+	}
+	if (flags & TWL4030_SLEEP_SCRIPT) {
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
+				R_SEQ_ADD_A2S);
+		if (err)
+			return err;
+	}
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, R_PROTECT_KEY);
+	if (err)
+		pr_err("TWL4030 Unable to relock registers\n");
+
+	return err;
+}
+
 void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 {
 	int err = 0;
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 70d4caf48571..fb6784e86d5f 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -550,6 +550,7 @@ struct twl4030_power_data {
 };
 
 extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
+extern int twl4030_remove_script(u8 flags);
 
 struct twl4030_codec_audio_data {
 	unsigned int	audio_mclk;
-- 
cgit v1.2.3


From d19663ac61a6e36eec655d3c84a106686ebddd2c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 23 Feb 2010 11:08:05 +0000
Subject: mfd: Use completion interrupt for WM835x AUXADC

Use the completion interrupt generated by the device rather than
polling for conversions to complete. As a backup we still check
the state of the AUXADC if we don't get a completion, mostly for
systems that don't have the WM8350 interrupt infrastructure hooked
up.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-core.c       | 35 +++++++++++++++++++++++++++++------
 include/linux/mfd/wm8350/core.h |  2 ++
 2 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 9a970bd68775..bd75807d5302 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -339,7 +339,6 @@ EXPORT_SYMBOL_GPL(wm8350_reg_unlock);
 int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
 {
 	u16 reg, result = 0;
-	int tries = 5;
 
 	if (channel < WM8350_AUXADC_AUX1 || channel > WM8350_AUXADC_TEMP)
 		return -EINVAL;
@@ -363,12 +362,13 @@ int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
 	reg |= 1 << channel | WM8350_AUXADC_POLL;
 	wm8350_reg_write(wm8350, WM8350_DIGITISER_CONTROL_1, reg);
 
-	do {
-		schedule_timeout_interruptible(1);
-		reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
-	} while ((reg & WM8350_AUXADC_POLL) && --tries);
+	/* We ignore the result of the completion and just check for a
+	 * conversion result, allowing us to soldier on if the IRQ
+	 * infrastructure is not set up for the chip. */
+	wait_for_completion_timeout(&wm8350->auxadc_done, msecs_to_jiffies(5));
 
-	if (!tries)
+	reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
+	if (reg & WM8350_AUXADC_POLL)
 		dev_err(wm8350->dev, "adc chn %d read timeout\n", channel);
 	else
 		result = wm8350_reg_read(wm8350,
@@ -385,6 +385,15 @@ int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
 }
 EXPORT_SYMBOL_GPL(wm8350_read_auxadc);
 
+static irqreturn_t wm8350_auxadc_irq(int irq, void *irq_data)
+{
+	struct wm8350 *wm8350 = irq_data;
+
+	complete(&wm8350->auxadc_done);
+
+	return IRQ_HANDLED;
+}
+
 /*
  * Cache is always host endian.
  */
@@ -682,11 +691,22 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 	}
 
 	mutex_init(&wm8350->auxadc_mutex);
+	init_completion(&wm8350->auxadc_done);
 
 	ret = wm8350_irq_init(wm8350, irq, pdata);
 	if (ret < 0)
 		goto err;
 
+	if (wm8350->irq_base) {
+		ret = request_threaded_irq(wm8350->irq_base +
+					   WM8350_IRQ_AUXADC_DATARDY,
+					   NULL, wm8350_auxadc_irq, 0,
+					   "auxadc", wm8350);
+		if (ret < 0)
+			dev_warn(wm8350->dev,
+				 "Failed to request AUXADC IRQ: %d\n", ret);
+	}
+
 	if (pdata && pdata->init) {
 		ret = pdata->init(wm8350);
 		if (ret != 0) {
@@ -736,6 +756,9 @@ void wm8350_device_exit(struct wm8350 *wm8350)
 	platform_device_unregister(wm8350->gpio.pdev);
 	platform_device_unregister(wm8350->codec.pdev);
 
+	if (wm8350->irq_base)
+		free_irq(wm8350->irq_base + WM8350_IRQ_AUXADC_DATARDY, wm8350);
+
 	wm8350_irq_exit(wm8350);
 
 	kfree(wm8350->reg_cache);
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index fae08aa65413..98fcc977e82b 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
+#include <linux/completion.h>
 
 #include <linux/mfd/wm8350/audio.h>
 #include <linux/mfd/wm8350/gpio.h>
@@ -621,6 +622,7 @@ struct wm8350 {
 	u16 *reg_cache;
 
 	struct mutex auxadc_mutex;
+	struct completion auxadc_done;
 
 	/* Interrupt handling */
 	struct mutex irq_lock;
-- 
cgit v1.2.3


From 473fe73650b9f92114edbedfbb616561c1a0026c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 23 Feb 2010 11:08:06 +0000
Subject: mfd: Use completion interrupt for WM831x AUXADC

Use the completion interrupt generated by the device rather than
polling for conversions to complete. As a backup we still check
the status of the AUXADC if we don't get a completion, mostly for
systems that don't have the WM831x interrupt infrastructure hooked
up.

Also reduce the timeout for completion of conversions to 5ms from
the previous 10ms, the lower timeout should be sufficient.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c       | 36 +++++++++++++++++++++++++++++-------
 include/linux/mfd/wm831x/core.h |  2 ++
 2 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index c428d9f918fc..07101e9e1cba 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -321,7 +321,6 @@ EXPORT_SYMBOL_GPL(wm831x_set_bits);
  */
 int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
 {
-	int tries = 10;
 	int ret, src;
 
 	mutex_lock(&wm831x->auxadc_lock);
@@ -349,13 +348,14 @@ int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
 		goto disable;
 	}
 
-	do {
-		msleep(1);
+	/* Ignore the result to allow us to soldier on without IRQ hookup */
+	wait_for_completion_timeout(&wm831x->auxadc_done, msecs_to_jiffies(5));
 
-		ret = wm831x_reg_read(wm831x, WM831X_AUXADC_CONTROL);
-		if (ret < 0)
-			ret = WM831X_AUX_CVT_ENA;
-	} while ((ret & WM831X_AUX_CVT_ENA) && --tries);
+	ret = wm831x_reg_read(wm831x, WM831X_AUXADC_CONTROL);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "AUXADC status read failed: %d\n", ret);
+		goto disable;
+	}
 
 	if (ret & WM831X_AUX_CVT_ENA) {
 		dev_err(wm831x->dev, "Timed out reading AUXADC\n");
@@ -390,6 +390,15 @@ out:
 }
 EXPORT_SYMBOL_GPL(wm831x_auxadc_read);
 
+static irqreturn_t wm831x_auxadc_irq(int irq, void *irq_data)
+{
+	struct wm831x *wm831x = irq_data;
+
+	complete(&wm831x->auxadc_done);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * wm831x_auxadc_read_uv: Read a voltage from the WM831x AUXADC
  *
@@ -1411,6 +1420,7 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	mutex_init(&wm831x->io_lock);
 	mutex_init(&wm831x->key_lock);
 	mutex_init(&wm831x->auxadc_lock);
+	init_completion(&wm831x->auxadc_done);
 	dev_set_drvdata(wm831x->dev, wm831x);
 
 	ret = wm831x_reg_read(wm831x, WM831X_PARENT_ID);
@@ -1523,6 +1533,16 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	if (ret != 0)
 		goto err;
 
+	if (wm831x->irq_base) {
+		ret = request_threaded_irq(wm831x->irq_base +
+					   WM831X_IRQ_AUXADC_DATA,
+					   NULL, wm831x_auxadc_irq, 0,
+					   "auxadc", wm831x);
+		if (ret < 0)
+			dev_err(wm831x->dev, "AUXADC IRQ request failed: %d\n",
+				ret);
+	}
+
 	/* The core device is up, instantiate the subdevices. */
 	switch (parent) {
 	case WM8310:
@@ -1593,6 +1613,8 @@ static void wm831x_device_exit(struct wm831x *wm831x)
 {
 	wm831x_otp_exit(wm831x);
 	mfd_remove_devices(wm831x->dev);
+	if (wm831x->irq_base)
+		free_irq(wm831x->irq_base + WM831X_IRQ_AUXADC_DATA, wm831x);
 	wm831x_irq_exit(wm831x);
 	kfree(wm831x);
 }
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 53580b592bc9..5915f6e3d9ab 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -15,6 +15,7 @@
 #ifndef __MFD_WM831X_CORE_H__
 #define __MFD_WM831X_CORE_H__
 
+#include <linux/completion.h>
 #include <linux/interrupt.h>
 
 /*
@@ -261,6 +262,7 @@ struct wm831x {
 	int num_gpio;
 
 	struct mutex auxadc_lock;
+	struct completion auxadc_done;
 
 	/* The WM831x has a security key blocking access to certain
 	 * registers.  The mutex is taken by the accessors for locking
-- 
cgit v1.2.3


From 2c08583c6a6b4c5f5dea4cb0931eca82af7db6fe Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Sat, 6 Mar 2010 14:36:38 +0100
Subject: mfd: Fix ucb1x00 build failure for collie_defconfig

This patch fixes a build failure[1], by adding the missing semaphore.h include

References:
[1] http://kisskb.ellerman.id.au/kisskb/buildresult/2234322/

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ucb1x00-core.c  | 1 +
 include/linux/mfd/ucb1x00.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 252b74188ec2..b281217334eb 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -27,6 +27,7 @@
 #include <linux/mutex.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/gpio.h>
+#include <linux/semaphore.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index aa9c3789bed4..4321f044d1e4 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -12,6 +12,7 @@
 
 #include <linux/mfd/mcp.h>
 #include <linux/gpio.h>
+#include <linux/semaphore.h>
 
 #define UCB_IO_DATA	0x00
 #define UCB_IO_DIR	0x01
-- 
cgit v1.2.3


From ecdf6ceb8cf4756bd4214bf9755755752b6015f5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 29 Dec 2009 20:11:20 -0800
Subject: Driver core: add platform_create_bundle() helper

Many legacy-style module create singleton platform devices themselves,
along with corresponding platform driver. Instead of replicating error
handling code in all such drivers, provide a helper that allocates and
registers a single platform device and a driver and binds them together.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 58 +++++++++++++++++++++++++++++++++++++++++
 include/linux/platform_device.h |  5 ++++
 2 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 58efaf2f1259..937d58021d1b 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -548,6 +548,64 @@ int __init_or_module platform_driver_probe(struct platform_driver *drv,
 }
 EXPORT_SYMBOL_GPL(platform_driver_probe);
 
+/**
+ * platform_create_bundle - register driver and create corresponding device
+ * @driver: platform driver structure
+ * @probe: the driver probe routine, probably from an __init section
+ * @res: set of resources that needs to be allocated for the device
+ * @n_res: number of resources
+ * @data: platform specific data for this platform device
+ * @size: size of platform specific data
+ *
+ * Use this in legacy-style modules that probe hardware directly and
+ * register a single platform device and corresponding platform driver.
+ */
+struct platform_device * __init_or_module platform_create_bundle(
+			struct platform_driver *driver,
+			int (*probe)(struct platform_device *),
+			struct resource *res, unsigned int n_res,
+			const void *data, size_t size)
+{
+	struct platform_device *pdev;
+	int error;
+
+	pdev = platform_device_alloc(driver->driver.name, -1);
+	if (!pdev) {
+		error = -ENOMEM;
+		goto err_out;
+	}
+
+	if (res) {
+		error = platform_device_add_resources(pdev, res, n_res);
+		if (error)
+			goto err_pdev_put;
+	}
+
+	if (data) {
+		error = platform_device_add_data(pdev, data, size);
+		if (error)
+			goto err_pdev_put;
+	}
+
+	error = platform_device_add(pdev);
+	if (error)
+		goto err_pdev_put;
+
+	error = platform_driver_probe(driver, probe);
+	if (error)
+		goto err_pdev_del;
+
+	return pdev;
+
+err_pdev_del:
+	platform_device_del(pdev);
+err_pdev_put:
+	platform_device_put(pdev);
+err_out:
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL_GPL(platform_create_bundle);
+
 /* modalias support enables more hands-off userspace setup:
  * (a) environment variable lets new-style hotplug events work once system is
  *     fully running:  "modprobe $MODALIAS"
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 71ff887ca44e..25e64b43e644 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -77,6 +77,11 @@ extern int platform_driver_probe(struct platform_driver *driver,
 #define platform_get_drvdata(_dev)	dev_get_drvdata(&(_dev)->dev)
 #define platform_set_drvdata(_dev,data)	dev_set_drvdata(&(_dev)->dev, (data))
 
+extern struct platform_device *platform_create_bundle(struct platform_driver *driver,
+					int (*probe)(struct platform_device *),
+					struct resource *res, unsigned int n_res,
+					const void *data, size_t size);
+
 /* early platform driver interface */
 struct early_platform_driver {
 	const char *class_str;
-- 
cgit v1.2.3


From 3d03ba4d1dd2246adff5a9ff1194a539b3bc05a7 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Fri, 1 Jan 2010 15:43:28 +0800
Subject: driver core: make platform_device_id table const

The platform ID table is normally const, force that by adding the attribute.

Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/platform_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 25e64b43e644..2c2d035bfb92 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -21,7 +21,7 @@ struct platform_device {
 	u32		num_resources;
 	struct resource	* resource;
 
-	struct platform_device_id	*id_entry;
+	const struct platform_device_id	*id_entry;
 
 	/* arch specific additions */
 	struct pdev_archdata	archdata;
-- 
cgit v1.2.3


From c9be0a36f9bf392a7984473124a67a12964df11f Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:47:58 +0100
Subject: sysdev: Pass attribute in sysdev_class attributes show/store

Passing the attribute to the low level IO functions allows all kinds
of cleanups, by sharing low level IO code without requiring
an own function for every piece of data.

Also drivers can extend the attributes with own data fields
and use that in the low level function.

Similar to sysdev_attributes and normal attributes.

This is a tree-wide sweep, converting everything in one go.

No functional changes in this patch other than passing the new
argument everywhere.

Tested on x86, the non x86 parts are uncompiled.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/mips/txx9/generic/7segled.c |  5 +++-
 arch/s390/kernel/smp.c           |  8 +++++--
 arch/s390/kernel/time.c          | 49 ++++++++++++++++++++++++++++++----------
 drivers/base/cpu.c               |  9 +++++---
 drivers/base/node.c              | 17 ++++++++++----
 drivers/base/sys.c               |  4 ++--
 drivers/cpuidle/sysfs.c          |  4 ++++
 include/linux/sysdev.h           |  6 +++--
 kernel/perf_event.c              | 13 ++++++++---
 kernel/sched.c                   |  4 ++++
 10 files changed, 89 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/txx9/generic/7segled.c b/arch/mips/txx9/generic/7segled.c
index 727ab21b6618..7f8416f86222 100644
--- a/arch/mips/txx9/generic/7segled.c
+++ b/arch/mips/txx9/generic/7segled.c
@@ -58,13 +58,16 @@ static ssize_t raw_store(struct sys_device *dev,
 static SYSDEV_ATTR(ascii, 0200, NULL, ascii_store);
 static SYSDEV_ATTR(raw, 0200, NULL, raw_store);
 
-static ssize_t map_seg7_show(struct sysdev_class *class, char *buf)
+static ssize_t map_seg7_show(struct sysdev_class *class,
+			     struct sysdev_class_attribute *attr,
+			     char *buf)
 {
 	memcpy(buf, &txx9_seg7map, sizeof(txx9_seg7map));
 	return sizeof(txx9_seg7map);
 }
 
 static ssize_t map_seg7_store(struct sysdev_class *class,
+			      struct sysdev_class_attribute *attr,
 			      const char *buf, size_t size)
 {
 	if (size != sizeof(txx9_seg7map))
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8b10127c00ad..e2121099f03b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1020,7 +1020,9 @@ out:
 	return rc;
 }
 
-static ssize_t __ref rescan_store(struct sysdev_class *class, const char *buf,
+static ssize_t __ref rescan_store(struct sysdev_class *class,
+				  struct sysdev_class_attribute *attr,
+				  const char *buf,
 				  size_t count)
 {
 	int rc;
@@ -1041,7 +1043,9 @@ static ssize_t dispatching_show(struct sysdev_class *class, char *buf)
 	return count;
 }
 
-static ssize_t dispatching_store(struct sysdev_class *dev, const char *buf,
+static ssize_t dispatching_store(struct sysdev_class *dev,
+				 struct sysdev_class_attribute *attr,
+				 const char *buf,
 				 size_t count)
 {
 	int val, rc;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index a8f93f1705ad..75894c281710 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1116,14 +1116,18 @@ static struct sys_device etr_port1_dev = {
 /*
  * ETR class attributes
  */
-static ssize_t etr_stepping_port_show(struct sysdev_class *class, char *buf)
+static ssize_t etr_stepping_port_show(struct sysdev_class *class,
+					struct sysdev_class_attribute *attr,
+					char *buf)
 {
 	return sprintf(buf, "%i\n", etr_port0.esw.p);
 }
 
 static SYSDEV_CLASS_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
 
-static ssize_t etr_stepping_mode_show(struct sysdev_class *class, char *buf)
+static ssize_t etr_stepping_mode_show(struct sysdev_class *class,
+				      	struct sysdev_class_attribute *attr,
+					char *buf)
 {
 	char *mode_str;
 
@@ -1584,7 +1588,9 @@ static struct sysdev_class stp_sysclass = {
 	.name	= "stp",
 };
 
-static ssize_t stp_ctn_id_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_ctn_id_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1594,7 +1600,9 @@ static ssize_t stp_ctn_id_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
 
-static ssize_t stp_ctn_type_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_ctn_type_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1603,7 +1611,9 @@ static ssize_t stp_ctn_type_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
 
-static ssize_t stp_dst_offset_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_dst_offset_show(struct sysdev_class *class,
+				   struct sysdev_class_attribute *attr,
+				   char *buf)
 {
 	if (!stp_online || !(stp_info.vbits & 0x2000))
 		return -ENODATA;
@@ -1612,7 +1622,9 @@ static ssize_t stp_dst_offset_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
 
-static ssize_t stp_leap_seconds_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_leap_seconds_show(struct sysdev_class *class,
+					struct sysdev_class_attribute *attr,
+					char *buf)
 {
 	if (!stp_online || !(stp_info.vbits & 0x8000))
 		return -ENODATA;
@@ -1621,7 +1633,9 @@ static ssize_t stp_leap_seconds_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
 
-static ssize_t stp_stratum_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_stratum_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1630,7 +1644,9 @@ static ssize_t stp_stratum_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(stratum, 0400, stp_stratum_show, NULL);
 
-static ssize_t stp_time_offset_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_time_offset_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online || !(stp_info.vbits & 0x0800))
 		return -ENODATA;
@@ -1639,7 +1655,9 @@ static ssize_t stp_time_offset_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
 
-static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_time_zone_offset_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online || !(stp_info.vbits & 0x4000))
 		return -ENODATA;
@@ -1649,7 +1667,9 @@ static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, char *buf)
 static SYSDEV_CLASS_ATTR(time_zone_offset, 0400,
 			 stp_time_zone_offset_show, NULL);
 
-static ssize_t stp_timing_mode_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_timing_mode_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1658,7 +1678,9 @@ static ssize_t stp_timing_mode_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
 
-static ssize_t stp_timing_state_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_timing_state_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1667,12 +1689,15 @@ static ssize_t stp_timing_state_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
 
-static ssize_t stp_online_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_online_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%i\n", stp_online);
 }
 
 static ssize_t stp_online_store(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
 				const char *buf, size_t count)
 {
 	unsigned int value;
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 958bd1540c30..fd1b2f9b7b8f 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -151,7 +151,8 @@ static ssize_t print_cpus_map(char *buf, const struct cpumask *map)
 }
 
 #define	print_cpus_func(type) \
-static ssize_t print_cpus_##type(struct sysdev_class *class, char *buf)	\
+static ssize_t print_cpus_##type(struct sysdev_class *class, 		\
+	 		struct sysdev_class_attribute *attr, char *buf)	\
 {									\
 	return print_cpus_map(buf, cpu_##type##_mask);			\
 }									\
@@ -165,7 +166,8 @@ print_cpus_func(present);
 /*
  * Print values for NR_CPUS and offlined cpus
  */
-static ssize_t print_cpus_kernel_max(struct sysdev_class *class, char *buf)
+static ssize_t print_cpus_kernel_max(struct sysdev_class *class,
+				     struct sysdev_class_attribute *attr, char *buf)
 {
 	int n = snprintf(buf, PAGE_SIZE-2, "%d\n", NR_CPUS - 1);
 	return n;
@@ -175,7 +177,8 @@ static SYSDEV_CLASS_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
 /* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
 unsigned int total_cpus;
 
-static ssize_t print_cpus_offline(struct sysdev_class *class, char *buf)
+static ssize_t print_cpus_offline(struct sysdev_class *class,
+				  struct sysdev_class_attribute *attr, char *buf)
 {
 	int n = 0, len = PAGE_SIZE-2;
 	cpumask_var_t offline;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 70122791683d..85c9d30d7004 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -544,23 +544,29 @@ static ssize_t print_nodes_state(enum node_states state, char *buf)
 	return n;
 }
 
-static ssize_t print_nodes_possible(struct sysdev_class *class, char *buf)
+static ssize_t print_nodes_possible(struct sysdev_class *class,
+				    struct sysdev_class_attribute *attr, char *buf)
 {
 	return print_nodes_state(N_POSSIBLE, buf);
 }
 
-static ssize_t print_nodes_online(struct sysdev_class *class, char *buf)
+static ssize_t print_nodes_online(struct sysdev_class *class,
+				  struct sysdev_class_attribute *attr,
+				  char *buf)
 {
 	return print_nodes_state(N_ONLINE, buf);
 }
 
 static ssize_t print_nodes_has_normal_memory(struct sysdev_class *class,
-						char *buf)
+					     struct sysdev_class_attribute *attr,
+					     char *buf)
 {
 	return print_nodes_state(N_NORMAL_MEMORY, buf);
 }
 
-static ssize_t print_nodes_has_cpu(struct sysdev_class *class, char *buf)
+static ssize_t print_nodes_has_cpu(struct sysdev_class *class,
+				   struct sysdev_class_attribute *attr,
+				   char *buf)
 {
 	return print_nodes_state(N_CPU, buf);
 }
@@ -573,7 +579,8 @@ static SYSDEV_CLASS_ATTR(has_cpu, 0444, print_nodes_has_cpu, NULL);
 
 #ifdef CONFIG_HIGHMEM
 static ssize_t print_nodes_has_high_memory(struct sysdev_class *class,
-						 char *buf)
+					   struct sysdev_class_attribute *attr,
+					   char *buf)
 {
 	return print_nodes_state(N_HIGH_MEMORY, buf);
 }
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 0d903909af7e..a38445c0f8c5 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -89,7 +89,7 @@ static ssize_t sysdev_class_show(struct kobject *kobj, struct attribute *attr,
 	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
 
 	if (class_attr->show)
-		return class_attr->show(class, buffer);
+		return class_attr->show(class, class_attr, buffer);
 	return -EIO;
 }
 
@@ -100,7 +100,7 @@ static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr,
 	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
 
 	if (class_attr->store)
-		return class_attr->store(class, buffer, count);
+		return class_attr->store(class, class_attr, buffer, count);
 	return -EIO;
 }
 
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 97b003839fb6..c9cefacabf37 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -22,6 +22,7 @@ static int __init cpuidle_sysfs_setup(char *unused)
 __setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
 
 static ssize_t show_available_governors(struct sysdev_class *class,
+					struct sysdev_class_attribute *attr,
 					char *buf)
 {
 	ssize_t i = 0;
@@ -41,6 +42,7 @@ out:
 }
 
 static ssize_t show_current_driver(struct sysdev_class *class,
+				   struct sysdev_class_attribute *attr,
 				   char *buf)
 {
 	ssize_t ret;
@@ -56,6 +58,7 @@ static ssize_t show_current_driver(struct sysdev_class *class,
 }
 
 static ssize_t show_current_governor(struct sysdev_class *class,
+				     struct sysdev_class_attribute *attr,
 				     char *buf)
 {
 	ssize_t ret;
@@ -71,6 +74,7 @@ static ssize_t show_current_governor(struct sysdev_class *class,
 }
 
 static ssize_t store_current_governor(struct sysdev_class *class,
+				      struct sysdev_class_attribute *attr,
 				      const char *buf, size_t count)
 {
 	char gov_name[CPUIDLE_NAME_LEN];
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index f395bb3fa2f2..c2458fa8376c 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -41,8 +41,10 @@ struct sysdev_class {
 
 struct sysdev_class_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct sysdev_class *, char *);
-	ssize_t (*store)(struct sysdev_class *, const char *, size_t);
+	ssize_t (*show)(struct sysdev_class *, struct sysdev_class_attribute *,
+			char *);
+	ssize_t (*store)(struct sysdev_class *, struct sysdev_class_attribute *,
+			 const char *, size_t);
 };
 
 #define _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) 		\
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 8e352c756ba7..f40560b86544 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5481,13 +5481,16 @@ void __init perf_event_init(void)
 	register_cpu_notifier(&perf_cpu_nb);
 }
 
-static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
+static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,
+					struct sysdev_class_attribute *attr,
+					char *buf)
 {
 	return sprintf(buf, "%d\n", perf_reserved_percpu);
 }
 
 static ssize_t
 perf_set_reserve_percpu(struct sysdev_class *class,
+			struct sysdev_class_attribute *attr,
 			const char *buf,
 			size_t count)
 {
@@ -5516,13 +5519,17 @@ perf_set_reserve_percpu(struct sysdev_class *class,
 	return count;
 }
 
-static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
+static ssize_t perf_show_overcommit(struct sysdev_class *class,
+				    struct sysdev_class_attribute *attr,
+				    char *buf)
 {
 	return sprintf(buf, "%d\n", perf_overcommit);
 }
 
 static ssize_t
-perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
+perf_set_overcommit(struct sysdev_class *class,
+		    struct sysdev_class_attribute *attr,
+		    const char *buf, size_t count)
 {
 	unsigned long val;
 	int err;
diff --git a/kernel/sched.c b/kernel/sched.c
index b47ceeec1a91..150b6988de49 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7406,11 +7406,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 
 #ifdef CONFIG_SCHED_MC
 static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
+					   struct sysdev_class_attribute *attr,
 					   char *page)
 {
 	return sprintf(page, "%u\n", sched_mc_power_savings);
 }
 static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
+					    struct sysdev_class_attribute *attr,
 					    const char *buf, size_t count)
 {
 	return sched_power_savings_store(buf, count, 0);
@@ -7422,11 +7424,13 @@ static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
 
 #ifdef CONFIG_SCHED_SMT
 static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
+					    struct sysdev_class_attribute *attr,
 					    char *page)
 {
 	return sprintf(page, "%u\n", sched_smt_power_savings);
 }
 static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
+					     struct sysdev_class_attribute *attr,
 					     const char *buf, size_t count)
 {
 	return sched_power_savings_store(buf, count, 1);
-- 
cgit v1.2.3


From 1c205ae18db53ff72985dd79f3baaf2dbaba6db7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:01 +0100
Subject: sysfs: Add sysfs_add/remove_files utility functions

Adding/Removing a whole array of attributes is very common. Add a standard
utility function to do this with a simple function call, instead of
requiring drivers to open code this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c       | 20 ++++++++++++++++++++
 include/linux/sysfs.h | 14 ++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index dc30d9e31683..50b725bcc3f3 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -542,6 +542,18 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 
 }
 
+int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; ptr[i] && !err; i++)
+		err = sysfs_create_file(kobj, ptr[i]);
+	if (err)
+		while (--i >= 0)
+			sysfs_remove_file(kobj, ptr[i]);
+	return err;
+}
 
 /**
  * sysfs_add_file_to_group - add an attribute file to a pre-existing group.
@@ -614,6 +626,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 	sysfs_hash_and_remove(kobj->sd, attr->name);
 }
 
+void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr)
+{
+	int i;
+	for (i = 0; ptr[i]; i++)
+		sysfs_remove_file(kobj, ptr[i]);
+}
 
 /**
  * sysfs_remove_file_from_group - remove an attribute file from a group.
@@ -732,3 +750,5 @@ EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
 
 EXPORT_SYMBOL_GPL(sysfs_create_file);
 EXPORT_SYMBOL_GPL(sysfs_remove_file);
+EXPORT_SYMBOL_GPL(sysfs_remove_files);
+EXPORT_SYMBOL_GPL(sysfs_create_files);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index cfa83083a2d4..3e8526582146 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -94,9 +94,12 @@ int __must_check sysfs_move_dir(struct kobject *kobj,
 
 int __must_check sysfs_create_file(struct kobject *kobj,
 				   const struct attribute *attr);
+int __must_check sysfs_create_files(struct kobject *kobj,
+				   const struct attribute **attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr,
 				  mode_t mode);
 void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);
+void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr);
 
 int __must_check sysfs_create_bin_file(struct kobject *kobj,
 				       const struct bin_attribute *attr);
@@ -164,6 +167,12 @@ static inline int sysfs_create_file(struct kobject *kobj,
 	return 0;
 }
 
+static inline int sysfs_create_files(struct kobject *kobj,
+				    const struct attribute **attr)
+{
+	return 0;
+}
+
 static inline int sysfs_chmod_file(struct kobject *kobj,
 				   struct attribute *attr, mode_t mode)
 {
@@ -175,6 +184,11 @@ static inline void sysfs_remove_file(struct kobject *kobj,
 {
 }
 
+static inline void sysfs_remove_files(struct kobject *kobj,
+				     const struct attribute **attr)
+{
+}
+
 static inline int sysfs_create_bin_file(struct kobject *kobj,
 					const struct bin_attribute *attr)
 {
-- 
cgit v1.2.3


From 38457ab3a0d36320370c715145ba6da514127194 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:02 +0100
Subject: sysfs: Add attribute array to sysdev classes

Add a attribute array that is automatically registered and unregistered
to struct sysdev_class. This is similar to what struct class has.

A lot of drivers add list of attributes, so it's better to do
this easily in the common sysdev layer.

This adds a new field to struct sysdev_class. I audited the
whole tree and there are no dynamically allocated sysdev classes,
so this is fully compatible.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/sys.c     | 9 ++++++++-
 include/linux/sysdev.h | 2 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index a38445c0f8c5..747c99e0568b 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -145,13 +145,20 @@ int sysdev_class_register(struct sysdev_class *cls)
 	if (retval)
 		return retval;
 
-	return kset_register(&cls->kset);
+	retval = kset_register(&cls->kset);
+	if (!retval && cls->attrs)
+		retval = sysfs_create_files(&cls->kset.kobj,
+					    (const struct attribute **)cls->attrs);
+	return retval;
 }
 
 void sysdev_class_unregister(struct sysdev_class *cls)
 {
 	pr_debug("Unregistering sysdev class '%s'\n",
 		 kobject_name(&cls->kset.kobj));
+	if (cls->attrs)
+		sysfs_remove_files(&cls->kset.kobj,
+				   (const struct attribute **)cls->attrs);
 	kset_unregister(&cls->kset);
 }
 
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index c2458fa8376c..b6244f9b533f 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -27,10 +27,12 @@
 
 
 struct sys_device;
+struct sysdev_class_attribute;
 
 struct sysdev_class {
 	const char *name;
 	struct list_head	drivers;
+	struct sysdev_class_attribute **attrs;
 
 	/* Default operations for these types of devices */
 	int	(*shutdown)(struct sys_device *);
-- 
cgit v1.2.3


From 1e395ab3d9b6aa09c5f0aa46a1b0a6fc5bd33133 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:05 +0100
Subject: sysdev: Add sysdev_create/remove_files

Allow to create/remove arrays of sysdev attributes

Just wrappers around sysfs_create/move_files

Will be used later to clean up some drivers.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysdev.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index b6244f9b533f..1154c29f4101 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -123,6 +123,19 @@ struct sysdev_attribute {
 extern int sysdev_create_file(struct sys_device *, struct sysdev_attribute *);
 extern void sysdev_remove_file(struct sys_device *, struct sysdev_attribute *);
 
+/* Create/remove NULL terminated attribute list */
+static inline int
+sysdev_create_files(struct sys_device *d, struct sysdev_attribute **a)
+{
+	return sysfs_create_files(&d->kobj, (const struct attribute **)a);
+}
+
+static inline void
+sysdev_remove_files(struct sys_device *d, struct sysdev_attribute **a)
+{
+	return sysfs_remove_files(&d->kobj, (const struct attribute **)a);
+}
+
 struct sysdev_ext_attribute {
 	struct sysdev_attribute attr;
 	void *var;
-- 
cgit v1.2.3


From 28812fe11a21826ba4c97c6c7971a619987cd912 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:07 +0100
Subject: driver-core: Add attribute argument to class_attribute show/store

Passing the attribute to the low level IO functions allows all kinds
of cleanups, by sharing low level IO code without requiring
an own function for every piece of data.

Also drivers can extend the attributes with own data fields
and use that in the low level function.

This makes the class attributes the same as sysdev_class attributes
and plain attributes.

This will allow further cleanups in drivers.

Full tree sweep converting all users.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c                  |  4 ++--
 drivers/base/cpu.c                    |  8 ++++++--
 drivers/base/firmware_class.c         |  8 ++++++--
 drivers/base/memory.c                 | 11 ++++++++---
 drivers/block/osdblk.c                | 12 +++++++++---
 drivers/block/pktcdvd.c               | 12 +++++++++---
 drivers/gpio/gpiolib.c                |  8 ++++++--
 drivers/gpu/drm/drm_sysfs.c           |  3 ++-
 drivers/infiniband/core/ucm.c         |  4 +++-
 drivers/infiniband/core/user_mad.c    |  4 +++-
 drivers/infiniband/core/uverbs_main.c |  4 +++-
 drivers/misc/phantom.c                |  2 +-
 drivers/mtd/ubi/build.c               |  3 ++-
 drivers/net/bonding/bond_sysfs.c      |  5 ++++-
 drivers/staging/asus_oled/asus_oled.c |  4 +++-
 drivers/uwb/driver.c                  |  5 ++++-
 include/linux/device.h                |  6 ++++--
 net/bluetooth/l2cap.c                 |  4 +++-
 net/bluetooth/rfcomm/core.c           |  4 +++-
 net/bluetooth/rfcomm/sock.c           |  4 +++-
 net/bluetooth/sco.c                   |  4 +++-
 21 files changed, 87 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 6e2c3b064f53..34a2de9c5385 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -31,7 +31,7 @@ static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr,
 	ssize_t ret = -EIO;
 
 	if (class_attr->show)
-		ret = class_attr->show(cp->class, buf);
+		ret = class_attr->show(cp->class, class_attr, buf);
 	return ret;
 }
 
@@ -43,7 +43,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 	ssize_t ret = -EIO;
 
 	if (class_attr->store)
-		ret = class_attr->store(cp->class, buf, count);
+		ret = class_attr->store(cp->class, class_attr, buf, count);
 	return ret;
 }
 
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index fb456b729803..9121c77b77fa 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -79,13 +79,17 @@ void unregister_cpu(struct cpu *cpu)
 }
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-static ssize_t cpu_probe_store(struct class *class, const char *buf,
+static ssize_t cpu_probe_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf,
 			       size_t count)
 {
 	return arch_cpu_probe(buf, count);
 }
 
-static ssize_t cpu_release_store(struct class *class, const char *buf,
+static ssize_t cpu_release_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf,
 				 size_t count)
 {
 	return arch_cpu_release(buf, count);
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index a95024166b66..6604fb33d072 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -69,7 +69,9 @@ fw_load_abort(struct firmware_priv *fw_priv)
 }
 
 static ssize_t
-firmware_timeout_show(struct class *class, char *buf)
+firmware_timeout_show(struct class *class,
+		      struct class_attribute *attr,
+		      char *buf)
 {
 	return sprintf(buf, "%d\n", loading_timeout);
 }
@@ -87,7 +89,9 @@ firmware_timeout_show(struct class *class, char *buf)
  *	Note: zero means 'wait forever'.
  **/
 static ssize_t
-firmware_timeout_store(struct class *class, const char *buf, size_t count)
+firmware_timeout_store(struct class *class,
+			struct class_attribute *attr,
+			const char *buf, size_t count)
 {
 	loading_timeout = simple_strtol(buf, NULL, 10);
 	if (loading_timeout < 0)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 563656ad75a1..495f15e92d4c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -331,7 +331,8 @@ static int block_size_init(void)
  */
 #ifdef CONFIG_ARCH_MEMORY_PROBE
 static ssize_t
-memory_probe_store(struct class *class, const char *buf, size_t count)
+memory_probe_store(struct class *class, struct class_attribute *attr,
+		   const char *buf, size_t count)
 {
 	u64 phys_addr;
 	int nid;
@@ -368,7 +369,9 @@ static inline int memory_probe_init(void)
 
 /* Soft offline a page */
 static ssize_t
-store_soft_offline_page(struct class *class, const char *buf, size_t count)
+store_soft_offline_page(struct class *class,
+			struct class_attribute *attr,
+			const char *buf, size_t count)
 {
 	int ret;
 	u64 pfn;
@@ -385,7 +388,9 @@ store_soft_offline_page(struct class *class, const char *buf, size_t count)
 
 /* Forcibly offline a page, including killing processes. */
 static ssize_t
-store_hard_offline_page(struct class *class, const char *buf, size_t count)
+store_hard_offline_page(struct class *class,
+			struct class_attribute *attr,
+			const char *buf, size_t count)
 {
 	int ret;
 	u64 pfn;
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index a808b1530b3b..eb2091aa1c19 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -476,7 +476,9 @@ static void class_osdblk_release(struct class *cls)
 	kfree(cls);
 }
 
-static ssize_t class_osdblk_list(struct class *c, char *data)
+static ssize_t class_osdblk_list(struct class *c,
+				struct class_attribute *attr,
+				char *data)
 {
 	int n = 0;
 	struct list_head *tmp;
@@ -500,7 +502,9 @@ static ssize_t class_osdblk_list(struct class *c, char *data)
 	return n;
 }
 
-static ssize_t class_osdblk_add(struct class *c, const char *buf, size_t count)
+static ssize_t class_osdblk_add(struct class *c,
+				struct class_attribute *attr,
+				const char *buf, size_t count)
 {
 	struct osdblk_device *osdev;
 	ssize_t rc;
@@ -592,7 +596,9 @@ err_out_mod:
 	return rc;
 }
 
-static ssize_t class_osdblk_remove(struct class *c, const char *buf,
+static ssize_t class_osdblk_remove(struct class *c,
+					struct class_attribute *attr,
+					const char *buf,
 					size_t count)
 {
 	struct osdblk_device *osdev = NULL;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index b72935b8f203..73d815d3f1b2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -337,7 +337,9 @@ static void class_pktcdvd_release(struct class *cls)
 {
 	kfree(cls);
 }
-static ssize_t class_pktcdvd_show_map(struct class *c, char *data)
+static ssize_t class_pktcdvd_show_map(struct class *c,
+					struct class_attribute *attr,
+					char *data)
 {
 	int n = 0;
 	int idx;
@@ -356,7 +358,9 @@ static ssize_t class_pktcdvd_show_map(struct class *c, char *data)
 	return n;
 }
 
-static ssize_t class_pktcdvd_store_add(struct class *c, const char *buf,
+static ssize_t class_pktcdvd_store_add(struct class *c,
+					struct class_attribute *attr,
+					const char *buf,
 					size_t count)
 {
 	unsigned int major, minor;
@@ -376,7 +380,9 @@ static ssize_t class_pktcdvd_store_add(struct class *c, const char *buf,
 	return -EINVAL;
 }
 
-static ssize_t class_pktcdvd_store_remove(struct class *c, const char *buf,
+static ssize_t class_pktcdvd_store_remove(struct class *c,
+					  struct class_attribute *attr,
+					  const char *buf,
 					size_t count)
 {
 	unsigned int major, minor;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9006fdb26fea..6d1b86661e63 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -623,7 +623,9 @@ static const struct attribute_group gpiochip_attr_group = {
  * /sys/class/gpio/unexport ... write-only
  *	integer N ... number of GPIO to unexport
  */
-static ssize_t export_store(struct class *class, const char *buf, size_t len)
+static ssize_t export_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf, size_t len)
 {
 	long	gpio;
 	int	status;
@@ -653,7 +655,9 @@ done:
 	return status ? : len;
 }
 
-static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
+static ssize_t unexport_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf, size_t len)
 {
 	long	gpio;
 	int	status;
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 7e42b7e9d43a..b95aaf23596e 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -71,7 +71,8 @@ static int drm_class_resume(struct device *dev)
 }
 
 /* Display the version of drm_core. This doesn't work right in current design */
-static ssize_t version_show(struct class *dev, char *buf)
+static ssize_t version_show(struct class *dev, struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%s %d.%d.%d %s\n", CORE_NAME, CORE_MAJOR,
 		       CORE_MINOR, CORE_PATCHLEVEL, CORE_DATE);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 1b09b735c5a8..02e209ff33fd 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1336,7 +1336,9 @@ static void ib_ucm_remove_one(struct ib_device *device)
 	device_unregister(&ucm_dev->dev);
 }
 
-static ssize_t show_abi_version(struct class *class, char *buf)
+static ssize_t show_abi_version(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
 }
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 02d360cfc2f7..d0de8f265f45 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -965,7 +965,9 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
 
-static ssize_t show_abi_version(struct class *class, char *buf)
+static ssize_t show_abi_version(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 4fa2e6516441..60879399207a 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -691,7 +691,9 @@ static ssize_t show_dev_abi_version(struct device *device,
 }
 static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
 
-static ssize_t show_abi_version(struct class *class, char *buf)
+static ssize_t show_abi_version(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
 }
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 04c27266f567..d30ae9560309 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -497,7 +497,7 @@ static struct pci_driver phantom_pci_driver = {
 	.resume = phantom_resume
 };
 
-static ssize_t phantom_show_version(struct class *cls, char *buf)
+static ssize_t phantom_show_version(struct class *cls, struct class_attribute *attr, char *buf)
 {
 	return sprintf(buf, PHANTOM_VERSION "\n");
 }
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index bc45ef9af17d..fad40aa6f099 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -89,7 +89,8 @@ DEFINE_MUTEX(ubi_devices_mutex);
 static DEFINE_SPINLOCK(ubi_devices_lock);
 
 /* "Show" method for files in '/<sysfs>/class/ubi/' */
-static ssize_t ubi_version_show(struct class *class, char *buf)
+static ssize_t ubi_version_show(struct class *class, struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%d\n", UBI_VERSION);
 }
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 5acd557cea9b..b8bec086daa1 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -51,7 +51,9 @@
  * "show" function for the bond_masters attribute.
  * The class parameter is ignored.
  */
-static ssize_t bonding_show_bonds(struct class *cls, char *buf)
+static ssize_t bonding_show_bonds(struct class *cls,
+				  struct class_attribute *attr,
+				  char *buf)
 {
 	struct net *net = current->nsproxy->net_ns;
 	struct bond_net *bn = net_generic(net, bond_net_id);
@@ -98,6 +100,7 @@ static struct net_device *bond_get_by_name(struct net *net, const char *ifname)
  */
 
 static ssize_t bonding_store_bonds(struct class *cls,
+				   struct class_attribute *attr,
 				   const char *buffer, size_t count)
 {
 	struct net *net = current->nsproxy->net_ns;
diff --git a/drivers/staging/asus_oled/asus_oled.c b/drivers/staging/asus_oled/asus_oled.c
index cadb6f7321ad..7d93f50a0a64 100644
--- a/drivers/staging/asus_oled/asus_oled.c
+++ b/drivers/staging/asus_oled/asus_oled.c
@@ -770,7 +770,9 @@ static struct usb_driver oled_driver = {
 	.id_table =	id_table,
 };
 
-static ssize_t version_show(struct class *dev, char *buf)
+static ssize_t version_show(struct class *dev,
+			    struct class_attribute *attr,
+			    char *buf)
 {
 	return sprintf(buf, ASUS_OLED_UNDERSCORE_NAME " %s\n",
 		       ASUS_OLED_VERSION);
diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c
index da77e41de990..08bd6dbfd4a6 100644
--- a/drivers/uwb/driver.c
+++ b/drivers/uwb/driver.c
@@ -74,13 +74,16 @@
 unsigned long beacon_timeout_ms = 500;
 
 static
-ssize_t beacon_timeout_ms_show(struct class *class, char *buf)
+ssize_t beacon_timeout_ms_show(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
 {
 	return scnprintf(buf, PAGE_SIZE, "%lu\n", beacon_timeout_ms);
 }
 
 static
 ssize_t beacon_timeout_ms_store(struct class *class,
+				struct class_attribute *attr,
 				const char *buf, size_t size)
 {
 	unsigned long bt;
diff --git a/include/linux/device.h b/include/linux/device.h
index b30527db3ac0..190f8d30d1d3 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -251,8 +251,10 @@ extern struct device *class_find_device(struct class *class,
 
 struct class_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct class *class, char *buf);
-	ssize_t (*store)(struct class *class, const char *buf, size_t count);
+	ssize_t (*show)(struct class *class, struct class_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct class *class, struct class_attribute *attr,
+			const char *buf, size_t count);
 };
 
 #define CLASS_ATTR(_name, _mode, _show, _store)			\
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 400efa26ddba..4db7ae2fe07d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3937,7 +3937,9 @@ drop:
 	return 0;
 }
 
-static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
+static ssize_t l2cap_sysfs_show(struct class *dev,
+				struct class_attribute *attr,
+				char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 89f4a59eb82b..db8a68e1a5ba 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2098,7 +2098,9 @@ static struct hci_cb rfcomm_cb = {
 	.security_cfm	= rfcomm_security_cfm
 };
 
-static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf)
+static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
+				     struct class_attribute *attr,
+				     char *buf)
 {
 	struct rfcomm_session *s;
 	struct list_head *pp, *p;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4b5968dda673..ca87d6ac6a20 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1061,7 +1061,9 @@ done:
 	return result;
 }
 
-static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf)
+static ssize_t rfcomm_sock_sysfs_show(struct class *dev,
+				      struct class_attribute *attr,
+				      char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index dd8f6ec57dce..f93b939539bc 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -953,7 +953,9 @@ drop:
 	return 0;
 }
 
-static ssize_t sco_sysfs_show(struct class *dev, char *buf)
+static ssize_t sco_sysfs_show(struct class *dev,
+				struct class_attribute *attr,
+				char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-- 
cgit v1.2.3


From 869dfc875e32fd832385fd52ce54525a10401ed6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:08 +0100
Subject: driver core: Add class_attr_string for simple read-only string

Several drivers just export a static string as class attributes.

Use the new extensible attribute support to define a simple
CLASS_ATTR_STRING() macro for this.

This will allow to remove code from drivers in followon patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c   | 10 ++++++++++
 include/linux/device.h | 17 +++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 34a2de9c5385..2e297cc4cd3d 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -490,6 +490,16 @@ void class_interface_unregister(struct class_interface *class_intf)
 	class_put(parent);
 }
 
+ssize_t show_class_attr_string(struct class *class, struct class_attribute *attr,
+                        	char *buf)
+{
+	struct class_attribute_string *cs;
+	cs = container_of(attr, struct class_attribute_string, attr);
+	return snprintf(buf, PAGE_SIZE, "%s\n", cs->str);
+}
+
+EXPORT_SYMBOL_GPL(show_class_attr_string);
+
 struct class_compat {
 	struct kobject *kobj;
 };
diff --git a/include/linux/device.h b/include/linux/device.h
index 190f8d30d1d3..f95d5bfe8248 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -265,6 +265,23 @@ extern int __must_check class_create_file(struct class *class,
 extern void class_remove_file(struct class *class,
 			      const struct class_attribute *attr);
 
+/* Simple class attribute that is just a static string */
+
+struct class_attribute_string {
+	struct class_attribute attr;
+	char *str;
+};
+
+/* Currently read-only only */
+#define _CLASS_ATTR_STRING(_name, _mode, _str) \
+	{ __ATTR(_name, _mode, show_class_attr_string, NULL), _str }
+#define CLASS_ATTR_STRING(_name, _mode, _str) \
+	struct class_attribute_string class_attr_##_name = \
+		_CLASS_ATTR_STRING(_name, _mode, _str)
+
+extern ssize_t show_class_attr_string(struct class *class, struct class_attribute *attr,
+                        char *buf);
+
 struct class_interface {
 	struct list_head	node;
 	struct class		*class;
-- 
cgit v1.2.3


From 9cd43611ccfb46632bfa7d19f688924ea93f1613 Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Thu, 31 Dec 2009 14:52:51 +0100
Subject: kobject: Constify struct kset_uevent_ops

Constify struct kset_uevent_ops.

This is part of the ops structure constification
effort started by Arjan van de Ven et al.

Benefits of this constification:

 * prevents modification of data that is shared
   (referenced) by many other structure instances
   at runtime

 * detects/prevents accidental (but not intentional)
   modification attempts on archs that enforce
   read-only kernel data at runtime

 * potentially better optimized code as the compiler
   can assume that the const data cannot be changed

 * the compiler/linker move const data into .rodata
   and therefore exclude them from false sharing

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c      |  2 +-
 drivers/base/core.c     |  2 +-
 drivers/base/memory.c   |  2 +-
 fs/gfs2/sys.c           |  2 +-
 include/linux/kobject.h | 10 +++++-----
 kernel/params.c         |  2 +-
 lib/kobject.c           |  4 ++--
 lib/kobject_uevent.c    |  2 +-
 mm/slub.c               |  2 +-
 9 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index c0c5a43d9fb3..2afe599eb35d 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -154,7 +154,7 @@ static int bus_uevent_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static struct kset_uevent_ops bus_uevent_ops = {
+static const struct kset_uevent_ops bus_uevent_ops = {
 	.filter = bus_uevent_filter,
 };
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index f6c73a9e3d95..58ec1069f4b0 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -252,7 +252,7 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	return retval;
 }
 
-static struct kset_uevent_ops device_uevent_ops = {
+static const struct kset_uevent_ops device_uevent_ops = {
 	.filter =	dev_uevent_filter,
 	.name =		dev_uevent_name,
 	.uevent =	dev_uevent,
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 495f15e92d4c..2f8691511190 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -44,7 +44,7 @@ static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uev
 	return retval;
 }
 
-static struct kset_uevent_ops memory_uevent_ops = {
+static const struct kset_uevent_ops memory_uevent_ops = {
 	.name		= memory_uevent_name,
 	.uevent		= memory_uevent,
 };
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index b5f1a46133c8..543503010ed0 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -574,7 +574,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
 	return 0;
 }
 
-static struct kset_uevent_ops gfs2_uevent_ops = {
+static const struct kset_uevent_ops gfs2_uevent_ops = {
 	.uevent = gfs2_uevent,
 };
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 58ae8e00fcdd..57a1eaae9096 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -118,9 +118,9 @@ struct kobj_uevent_env {
 };
 
 struct kset_uevent_ops {
-	int (*filter)(struct kset *kset, struct kobject *kobj);
-	const char *(*name)(struct kset *kset, struct kobject *kobj);
-	int (*uevent)(struct kset *kset, struct kobject *kobj,
+	int (* const filter)(struct kset *kset, struct kobject *kobj);
+	const char *(* const name)(struct kset *kset, struct kobject *kobj);
+	int (* const uevent)(struct kset *kset, struct kobject *kobj,
 		      struct kobj_uevent_env *env);
 };
 
@@ -155,14 +155,14 @@ struct kset {
 	struct list_head list;
 	spinlock_t list_lock;
 	struct kobject kobj;
-	struct kset_uevent_ops *uevent_ops;
+	const struct kset_uevent_ops *uevent_ops;
 };
 
 extern void kset_init(struct kset *kset);
 extern int __must_check kset_register(struct kset *kset);
 extern void kset_unregister(struct kset *kset);
 extern struct kset * __must_check kset_create_and_add(const char *name,
-						struct kset_uevent_ops *u,
+						const struct kset_uevent_ops *u,
 						struct kobject *parent_kobj);
 
 static inline struct kset *to_kset(struct kobject *kobj)
diff --git a/kernel/params.c b/kernel/params.c
index 8d95f5451b22..48370be3c0a1 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -736,7 +736,7 @@ static int uevent_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static struct kset_uevent_ops module_uevent_ops = {
+static const struct kset_uevent_ops module_uevent_ops = {
 	.filter = uevent_filter,
 };
 
diff --git a/lib/kobject.c b/lib/kobject.c
index b512b746d2af..cecf5a0ef6e1 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -789,7 +789,7 @@ static struct kobj_type kset_ktype = {
  * If the kset was not able to be created, NULL will be returned.
  */
 static struct kset *kset_create(const char *name,
-				struct kset_uevent_ops *uevent_ops,
+				const struct kset_uevent_ops *uevent_ops,
 				struct kobject *parent_kobj)
 {
 	struct kset *kset;
@@ -832,7 +832,7 @@ static struct kset *kset_create(const char *name,
  * If the kset was not able to be created, NULL will be returned.
  */
 struct kset *kset_create_and_add(const char *name,
-				 struct kset_uevent_ops *uevent_ops,
+				 const struct kset_uevent_ops *uevent_ops,
 				 struct kobject *parent_kobj)
 {
 	struct kset *kset;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 920a3ca6e259..c9d3a3e8405d 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -95,7 +95,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	const char *subsystem;
 	struct kobject *top_kobj;
 	struct kset *kset;
-	struct kset_uevent_ops *uevent_ops;
+	const struct kset_uevent_ops *uevent_ops;
 	u64 seq;
 	int i = 0;
 	int retval = 0;
diff --git a/mm/slub.c b/mm/slub.c
index 0bfd3863d521..a26753c12dcd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4409,7 +4409,7 @@ static int uevent_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static struct kset_uevent_ops slab_uevent_ops = {
+static const struct kset_uevent_ops slab_uevent_ops = {
 	.filter = uevent_filter,
 };
 
-- 
cgit v1.2.3


From 52cf25d0ab7f78eeecc59ac652ed5090f69b619e Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Tue, 19 Jan 2010 02:58:23 +0100
Subject: Driver core: Constify struct sysfs_ops in struct kobj_type

Constify struct sysfs_ops.

This is part of the ops structure constification
effort started by Arjan van de Ven et al.

Benefits of this constification:

 * prevents modification of data that is shared
   (referenced) by many other structure instances
   at runtime

 * detects/prevents accidental (but not intentional)
   modification attempts on archs that enforce
   read-only kernel data at runtime

 * potentially better optimized code as the compiler
   can assume that the const data cannot be changed

 * the compiler/linker move const data into .rodata
   and therefore exclude them from false sharing

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Acked-by: David Teigland <teigland@redhat.com>
Acked-by: Matt Domsch <Matt_Domsch@dell.com>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Acked-by: Hans J. Koch <hjk@linutronix.de>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/kobject.txt             | 2 +-
 arch/ia64/kernel/topology.c           | 2 +-
 arch/powerpc/kernel/cacheinfo.c       | 2 +-
 arch/sh/kernel/cpu/sh4/sq.c           | 2 +-
 arch/x86/kernel/cpu/intel_cacheinfo.c | 2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd.c  | 2 +-
 block/blk-integrity.c                 | 2 +-
 block/blk-sysfs.c                     | 2 +-
 block/elevator.c                      | 2 +-
 drivers/base/bus.c                    | 4 ++--
 drivers/base/class.c                  | 2 +-
 drivers/base/core.c                   | 2 +-
 drivers/base/sys.c                    | 4 ++--
 drivers/block/pktcdvd.c               | 2 +-
 drivers/cpufreq/cpufreq.c             | 2 +-
 drivers/cpuidle/sysfs.c               | 4 ++--
 drivers/dma/ioat/dma.c                | 2 +-
 drivers/dma/ioat/dma.h                | 2 +-
 drivers/edac/edac_device_sysfs.c      | 6 +++---
 drivers/edac/edac_mc_sysfs.c          | 4 ++--
 drivers/edac/edac_pci_sysfs.c         | 4 ++--
 drivers/firmware/edd.c                | 2 +-
 drivers/firmware/efivars.c            | 2 +-
 drivers/firmware/iscsi_ibft.c         | 2 +-
 drivers/firmware/memmap.c             | 2 +-
 drivers/gpu/drm/ttm/ttm_bo.c          | 2 +-
 drivers/gpu/drm/ttm/ttm_memory.c      | 2 +-
 drivers/infiniband/core/cm.c          | 2 +-
 drivers/infiniband/core/sysfs.c       | 2 +-
 drivers/md/dm-sysfs.c                 | 2 +-
 drivers/md/md.c                       | 4 ++--
 drivers/net/ibmveth.c                 | 2 +-
 drivers/net/iseries_veth.c            | 4 ++--
 drivers/parisc/pdc_stable.c           | 2 +-
 drivers/pci/hotplug/fakephp.c         | 2 +-
 drivers/pci/slot.c                    | 2 +-
 drivers/uio/uio.c                     | 4 ++--
 drivers/uwb/wlp/sysfs.c               | 3 +--
 drivers/video/omap2/dss/manager.c     | 2 +-
 drivers/video/omap2/dss/overlay.c     | 2 +-
 drivers/xen/sys-hypervisor.c          | 2 +-
 fs/btrfs/sysfs.c                      | 4 ++--
 fs/dlm/lockspace.c                    | 2 +-
 fs/ext4/super.c                       | 2 +-
 fs/gfs2/sys.c                         | 2 +-
 fs/ocfs2/cluster/masklog.c            | 2 +-
 fs/sysfs/file.c                       | 8 ++++----
 include/linux/kobject.h               | 4 ++--
 kernel/params.c                       | 2 +-
 lib/kobject.c                         | 2 +-
 mm/slub.c                             | 2 +-
 net/bridge/br_private.h               | 2 +-
 net/bridge/br_sysfs_if.c              | 2 +-
 samples/kobject/kset-example.c        | 2 +-
 54 files changed, 69 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index c79ab996dada..bdb13817e1e9 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -266,7 +266,7 @@ kobj_type:
 
     struct kobj_type {
 	    void (*release)(struct kobject *);
-	    struct sysfs_ops	*sysfs_ops;
+	    const struct sysfs_ops *sysfs_ops;
 	    struct attribute	**default_attrs;
     };
 
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 8f060352e129..b3a5818088d9 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -282,7 +282,7 @@ static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char *
 	return ret;
 }
 
-static struct sysfs_ops cache_sysfs_ops = {
+static const struct sysfs_ops cache_sysfs_ops = {
 	.show   = cache_show
 };
 
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index bb37b1d19a58..01fe9ce28379 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -642,7 +642,7 @@ static struct kobj_attribute *cache_index_opt_attrs[] = {
 	&cache_assoc_attr,
 };
 
-static struct sysfs_ops cache_index_ops = {
+static const struct sysfs_ops cache_index_ops = {
 	.show = cache_index_show,
 };
 
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index fc065f9da6e5..14726eef1ce0 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -326,7 +326,7 @@ static struct attribute *sq_sysfs_attrs[] = {
 	NULL,
 };
 
-static struct sysfs_ops sq_sysfs_ops = {
+static const struct sysfs_ops sq_sysfs_ops = {
 	.show	= sq_sysfs_show,
 	.store	= sq_sysfs_store,
 };
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index eddb1bdd1b8f..b3eeb66c0a51 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -903,7 +903,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops sysfs_ops = {
+static const struct sysfs_ops sysfs_ops = {
 	.show   = show,
 	.store  = store,
 };
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 83a3d1f4efca..cda932ca3ade 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -388,7 +388,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops threshold_ops = {
+static const struct sysfs_ops threshold_ops = {
 	.show			= show,
 	.store			= store,
 };
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 15c630813b1c..96e83c2bdb94 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -278,7 +278,7 @@ static struct attribute *integrity_attrs[] = {
 	NULL,
 };
 
-static struct sysfs_ops integrity_ops = {
+static const struct sysfs_ops integrity_ops = {
 	.show	= &integrity_attr_show,
 	.store	= &integrity_attr_store,
 };
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e85442415db3..2ae2cb3f362f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -450,7 +450,7 @@ static void blk_release_queue(struct kobject *kobj)
 	kmem_cache_free(blk_requestq_cachep, q);
 }
 
-static struct sysfs_ops queue_sysfs_ops = {
+static const struct sysfs_ops queue_sysfs_ops = {
 	.show	= queue_attr_show,
 	.store	= queue_attr_store,
 };
diff --git a/block/elevator.c b/block/elevator.c
index ee3a883840f2..df75676f6671 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -892,7 +892,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
 	return error;
 }
 
-static struct sysfs_ops elv_sysfs_ops = {
+static const struct sysfs_ops elv_sysfs_ops = {
 	.show	= elv_attr_show,
 	.store	= elv_attr_store,
 };
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 2afe599eb35d..cca1aa10054c 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -70,7 +70,7 @@ static ssize_t drv_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops driver_sysfs_ops = {
+static const struct sysfs_ops driver_sysfs_ops = {
 	.show	= drv_attr_show,
 	.store	= drv_attr_store,
 };
@@ -115,7 +115,7 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops bus_sysfs_ops = {
+static const struct sysfs_ops bus_sysfs_ops = {
 	.show	= bus_attr_show,
 	.store	= bus_attr_store,
 };
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 2e297cc4cd3d..0147f476b8a9 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -63,7 +63,7 @@ static void class_release(struct kobject *kobj)
 	kfree(cp);
 }
 
-static struct sysfs_ops class_sysfs_ops = {
+static const struct sysfs_ops class_sysfs_ops = {
 	.show	= class_attr_show,
 	.store	= class_attr_store,
 };
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 58ec1069f4b0..b0d6646a2814 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -100,7 +100,7 @@ static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops dev_sysfs_ops = {
+static const struct sysfs_ops dev_sysfs_ops = {
 	.show	= dev_attr_show,
 	.store	= dev_attr_store,
 };
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 747c99e0568b..8980feec5d14 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -54,7 +54,7 @@ sysdev_store(struct kobject *kobj, struct attribute *attr,
 	return -EIO;
 }
 
-static struct sysfs_ops sysfs_ops = {
+static const struct sysfs_ops sysfs_ops = {
 	.show	= sysdev_show,
 	.store	= sysdev_store,
 };
@@ -104,7 +104,7 @@ static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr,
 	return -EIO;
 }
 
-static struct sysfs_ops sysfs_class_ops = {
+static const struct sysfs_ops sysfs_class_ops = {
 	.show	= sysdev_class_show,
 	.store	= sysdev_class_store,
 };
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 73d815d3f1b2..39c8514442eb 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -284,7 +284,7 @@ static ssize_t kobj_pkt_store(struct kobject *kobj,
 	return len;
 }
 
-static struct sysfs_ops kobj_pkt_ops = {
+static const struct sysfs_ops kobj_pkt_ops = {
 	.show = kobj_pkt_show,
 	.store = kobj_pkt_store
 };
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 67bc2ece7b4b..2d5d575e889d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -766,7 +766,7 @@ static void cpufreq_sysfs_release(struct kobject *kobj)
 	complete(&policy->kobj_unregister);
 }
 
-static struct sysfs_ops sysfs_ops = {
+static const struct sysfs_ops sysfs_ops = {
 	.show	= show,
 	.store	= store,
 };
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index c9cefacabf37..8719b36e1a4d 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -195,7 +195,7 @@ static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
 	return ret;
 }
 
-static struct sysfs_ops cpuidle_sysfs_ops = {
+static const struct sysfs_ops cpuidle_sysfs_ops = {
 	.show = cpuidle_show,
 	.store = cpuidle_store,
 };
@@ -281,7 +281,7 @@ static ssize_t cpuidle_state_show(struct kobject * kobj,
 	return ret;
 }
 
-static struct sysfs_ops cpuidle_state_sysfs_ops = {
+static const struct sysfs_ops cpuidle_state_sysfs_ops = {
 	.show = cpuidle_state_show,
 };
 
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index af14c9a5b8d4..0099340b9616 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -1138,7 +1138,7 @@ ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	return entry->show(&chan->common, page);
 }
 
-struct sysfs_ops ioat_sysfs_ops = {
+const struct sysfs_ops ioat_sysfs_ops = {
 	.show	= ioat_attr_show,
 };
 
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 4f747a254074..86b97ac8774e 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -346,7 +346,7 @@ bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
 			   unsigned long *phys_complete);
 void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
 void ioat_kobject_del(struct ioatdma_device *device);
-extern struct sysfs_ops ioat_sysfs_ops;
+extern const struct sysfs_ops ioat_sysfs_ops;
 extern struct ioat_sysfs_entry ioat_version_attr;
 extern struct ioat_sysfs_entry ioat_cap_attr;
 #endif /* IOATDMA_H */
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 53764577035f..5fdedbc0f545 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -137,7 +137,7 @@ static ssize_t edac_dev_ctl_info_store(struct kobject *kobj,
 }
 
 /* edac_dev file operations for an 'ctl_info' */
-static struct sysfs_ops device_ctl_info_ops = {
+static const struct sysfs_ops device_ctl_info_ops = {
 	.show = edac_dev_ctl_info_show,
 	.store = edac_dev_ctl_info_store
 };
@@ -373,7 +373,7 @@ static ssize_t edac_dev_instance_store(struct kobject *kobj,
 }
 
 /* edac_dev file operations for an 'instance' */
-static struct sysfs_ops device_instance_ops = {
+static const struct sysfs_ops device_instance_ops = {
 	.show = edac_dev_instance_show,
 	.store = edac_dev_instance_store
 };
@@ -476,7 +476,7 @@ static ssize_t edac_dev_block_store(struct kobject *kobj,
 }
 
 /* edac_dev file operations for a 'block' */
-static struct sysfs_ops device_block_ops = {
+static const struct sysfs_ops device_block_ops = {
 	.show = edac_dev_block_show,
 	.store = edac_dev_block_store
 };
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index e1d4ce083481..88840e9fa3e0 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -245,7 +245,7 @@ static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
 	return -EIO;
 }
 
-static struct sysfs_ops csrowfs_ops = {
+static const struct sysfs_ops csrowfs_ops = {
 	.show = csrowdev_show,
 	.store = csrowdev_store
 };
@@ -575,7 +575,7 @@ static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
 }
 
 /* Intermediate show/store table */
-static struct sysfs_ops mci_ops = {
+static const struct sysfs_ops mci_ops = {
 	.show = mcidev_show,
 	.store = mcidev_store
 };
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index fb60a877d768..bef94e3d9944 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -121,7 +121,7 @@ static ssize_t edac_pci_instance_store(struct kobject *kobj,
 }
 
 /* fs_ops table */
-static struct sysfs_ops pci_instance_ops = {
+static const struct sysfs_ops pci_instance_ops = {
 	.show = edac_pci_instance_show,
 	.store = edac_pci_instance_store
 };
@@ -261,7 +261,7 @@ static ssize_t edac_pci_dev_store(struct kobject *kobj,
 	return -EIO;
 }
 
-static struct sysfs_ops edac_pci_sysfs_ops = {
+static const struct sysfs_ops edac_pci_sysfs_ops = {
 	.show = edac_pci_dev_show,
 	.store = edac_pci_dev_store
 };
diff --git a/drivers/firmware/edd.c b/drivers/firmware/edd.c
index 9e4f59dc7f1e..110e24e50883 100644
--- a/drivers/firmware/edd.c
+++ b/drivers/firmware/edd.c
@@ -122,7 +122,7 @@ edd_attr_show(struct kobject * kobj, struct attribute *attr, char *buf)
 	return ret;
 }
 
-static struct sysfs_ops edd_attr_ops = {
+static const struct sysfs_ops edd_attr_ops = {
 	.show = edd_attr_show,
 };
 
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index f4f709d1370b..082f06ecd327 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -362,7 +362,7 @@ static ssize_t efivar_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops efivar_attr_ops = {
+static const struct sysfs_ops efivar_attr_ops = {
 	.show = efivar_attr_show,
 	.store = efivar_attr_store,
 };
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index a3600e3ed0fa..ed2801c378de 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -519,7 +519,7 @@ static ssize_t ibft_show_attribute(struct kobject *kobj,
 	return ret;
 }
 
-static struct sysfs_ops ibft_attr_ops = {
+static const struct sysfs_ops ibft_attr_ops = {
 	.show = ibft_show_attribute,
 };
 
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 20f645743ead..d59f7cad2269 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -74,7 +74,7 @@ static struct attribute *def_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops memmap_attr_ops = {
+static const struct sysfs_ops memmap_attr_ops = {
 	.show = memmap_attr_show,
 };
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c7320ce4567d..89c38c49066f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -128,7 +128,7 @@ static struct attribute *ttm_bo_global_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops ttm_bo_global_ops = {
+static const struct sysfs_ops ttm_bo_global_ops = {
 	.show = &ttm_bo_global_show
 };
 
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index f5245c02b8fd..eb143e04d402 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -152,7 +152,7 @@ static struct attribute *ttm_mem_zone_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops ttm_mem_zone_ops = {
+static const struct sysfs_ops ttm_mem_zone_ops = {
 	.show = &ttm_mem_zone_show,
 	.store = &ttm_mem_zone_store
 };
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 5130fc55b8e2..764787ebe8d8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3597,7 +3597,7 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
 		       atomic_long_read(&group->counter[cm_attr->index]));
 }
 
-static struct sysfs_ops cm_counter_ops = {
+static const struct sysfs_ops cm_counter_ops = {
 	.show = cm_show_counter
 };
 
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 158a214da2f7..1558bb7fc74d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -79,7 +79,7 @@ static ssize_t port_attr_show(struct kobject *kobj,
 	return port_attr->show(p, port_attr, buf);
 }
 
-static struct sysfs_ops port_sysfs_ops = {
+static const struct sysfs_ops port_sysfs_ops = {
 	.show = port_attr_show
 };
 
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index f91b40942e07..84d2b91e4efb 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -75,7 +75,7 @@ static struct attribute *dm_attrs[] = {
 	NULL,
 };
 
-static struct sysfs_ops dm_sysfs_ops = {
+static const struct sysfs_ops dm_sysfs_ops = {
 	.show	= dm_attr_show,
 };
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a20a71e5efd3..fdc1890b6ac5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2642,7 +2642,7 @@ static void rdev_free(struct kobject *ko)
 	mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
 	kfree(rdev);
 }
-static struct sysfs_ops rdev_sysfs_ops = {
+static const struct sysfs_ops rdev_sysfs_ops = {
 	.show		= rdev_attr_show,
 	.store		= rdev_attr_store,
 };
@@ -4059,7 +4059,7 @@ static void md_free(struct kobject *ko)
 	kfree(mddev);
 }
 
-static struct sysfs_ops md_sysfs_ops = {
+static const struct sysfs_ops md_sysfs_ops = {
 	.show	= md_attr_show,
 	.store	= md_attr_store,
 };
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index f2b937966950..0bc777bac9b4 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -1577,7 +1577,7 @@ static struct attribute * veth_pool_attrs[] = {
 	NULL,
 };
 
-static struct sysfs_ops veth_pool_ops = {
+static const struct sysfs_ops veth_pool_ops = {
 	.show   = veth_pool_show,
 	.store  = veth_pool_store,
 };
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index 966de5d69521..e6e972d9b7ca 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -384,7 +384,7 @@ static struct attribute *veth_cnx_default_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops veth_cnx_sysfs_ops = {
+static const struct sysfs_ops veth_cnx_sysfs_ops = {
 		.show = veth_cnx_attribute_show
 };
 
@@ -441,7 +441,7 @@ static struct attribute *veth_port_default_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops veth_port_sysfs_ops = {
+static const struct sysfs_ops veth_port_sysfs_ops = {
 	.show = veth_port_attribute_show
 };
 
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index 0bc5d474b168..1062b8ffe244 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -481,7 +481,7 @@ pdcspath_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops pdcspath_attr_ops = {
+static const struct sysfs_ops pdcspath_attr_ops = {
 	.show = pdcspath_attr_show,
 	.store = pdcspath_attr_store,
 };
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
index 6151389fd903..0a894efd4b9b 100644
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -73,7 +73,7 @@ static void legacy_release(struct kobject *kobj)
 }
 
 static struct kobj_type legacy_ktype = {
-	.sysfs_ops = &(struct sysfs_ops){
+	.sysfs_ops = &(const struct sysfs_ops){
 		.store = legacy_store, .show = legacy_show
 	},
 	.release = &legacy_release,
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 49c9e6c9779a..f75a44d37fbe 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -29,7 +29,7 @@ static ssize_t pci_slot_attr_store(struct kobject *kobj,
 	return attribute->store ? attribute->store(slot, buf, len) : -EIO;
 }
 
-static struct sysfs_ops pci_slot_sysfs_ops = {
+static const struct sysfs_ops pci_slot_sysfs_ops = {
 	.show = pci_slot_attr_show,
 	.store = pci_slot_attr_store,
 };
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index e941367dd28f..4de382acd8f2 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -129,7 +129,7 @@ static ssize_t map_type_show(struct kobject *kobj, struct attribute *attr,
 	return entry->show(mem, buf);
 }
 
-static struct sysfs_ops map_sysfs_ops = {
+static const struct sysfs_ops map_sysfs_ops = {
 	.show = map_type_show,
 };
 
@@ -217,7 +217,7 @@ static ssize_t portio_type_show(struct kobject *kobj, struct attribute *attr,
 	return entry->show(port, buf);
 }
 
-static struct sysfs_ops portio_sysfs_ops = {
+static const struct sysfs_ops portio_sysfs_ops = {
 	.show = portio_type_show,
 };
 
diff --git a/drivers/uwb/wlp/sysfs.c b/drivers/uwb/wlp/sysfs.c
index 0370399ff4bb..6627c94cc854 100644
--- a/drivers/uwb/wlp/sysfs.c
+++ b/drivers/uwb/wlp/sysfs.c
@@ -615,8 +615,7 @@ ssize_t wlp_wss_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static
-struct sysfs_ops wss_sysfs_ops = {
+static const struct sysfs_ops wss_sysfs_ops = {
 	.show	= wlp_wss_attr_show,
 	.store	= wlp_wss_attr_store,
 };
diff --git a/drivers/video/omap2/dss/manager.c b/drivers/video/omap2/dss/manager.c
index 913142d4cab1..9acef00c47ea 100644
--- a/drivers/video/omap2/dss/manager.c
+++ b/drivers/video/omap2/dss/manager.c
@@ -341,7 +341,7 @@ static ssize_t manager_attr_store(struct kobject *kobj, struct attribute *attr,
 	return manager_attr->store(manager, buf, size);
 }
 
-static struct sysfs_ops manager_sysfs_ops = {
+static const struct sysfs_ops manager_sysfs_ops = {
 	.show = manager_attr_show,
 	.store = manager_attr_store,
 };
diff --git a/drivers/video/omap2/dss/overlay.c b/drivers/video/omap2/dss/overlay.c
index 0c5bea263ac6..aed3f3194347 100644
--- a/drivers/video/omap2/dss/overlay.c
+++ b/drivers/video/omap2/dss/overlay.c
@@ -320,7 +320,7 @@ static ssize_t overlay_attr_store(struct kobject *kobj, struct attribute *attr,
 	return overlay_attr->store(overlay, buf, size);
 }
 
-static struct sysfs_ops overlay_sysfs_ops = {
+static const struct sysfs_ops overlay_sysfs_ops = {
 	.show = overlay_attr_show,
 	.store = overlay_attr_store,
 };
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index ae5cb05a1a1c..bb71ab2336c8 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -426,7 +426,7 @@ static ssize_t hyp_sysfs_store(struct kobject *kobj,
 	return 0;
 }
 
-static struct sysfs_ops hyp_sysfs_ops = {
+static const struct sysfs_ops hyp_sysfs_ops = {
 	.show = hyp_sysfs_show,
 	.store = hyp_sysfs_store,
 };
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index a240b6fa81df..4ce16ef702a3 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -164,12 +164,12 @@ static void btrfs_root_release(struct kobject *kobj)
 	complete(&root->kobj_unregister);
 }
 
-static struct sysfs_ops btrfs_super_attr_ops = {
+static const struct sysfs_ops btrfs_super_attr_ops = {
 	.show	= btrfs_super_attr_show,
 	.store	= btrfs_super_attr_store,
 };
 
-static struct sysfs_ops btrfs_root_attr_ops = {
+static const struct sysfs_ops btrfs_root_attr_ops = {
 	.show	= btrfs_root_attr_show,
 	.store	= btrfs_root_attr_store,
 };
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 26a8bd40400a..f994a7dfda85 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -148,7 +148,7 @@ static void lockspace_kobj_release(struct kobject *k)
 	kfree(ls);
 }
 
-static struct sysfs_ops dlm_attr_ops = {
+static const struct sysfs_ops dlm_attr_ops = {
 	.show  = dlm_attr_show,
 	.store = dlm_attr_store,
 };
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2b83b96cb2eb..ce84a6ed4a48 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2358,7 +2358,7 @@ static void ext4_sb_release(struct kobject *kobj)
 }
 
 
-static struct sysfs_ops ext4_attr_ops = {
+static const struct sysfs_ops ext4_attr_ops = {
 	.show	= ext4_attr_show,
 	.store	= ext4_attr_store,
 };
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 543503010ed0..419042f7f0b6 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -49,7 +49,7 @@ static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
 	return a->store ? a->store(sdp, buf, len) : len;
 }
 
-static struct sysfs_ops gfs2_attr_ops = {
+static const struct sysfs_ops gfs2_attr_ops = {
 	.show  = gfs2_attr_show,
 	.store = gfs2_attr_store,
 };
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index b39da877b12f..3bb928a2bf7d 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -136,7 +136,7 @@ static ssize_t mlog_store(struct kobject *obj, struct attribute *attr,
 	return mlog_mask_store(mlog_attr->mask, buf, count);
 }
 
-static struct sysfs_ops mlog_attr_ops = {
+static const struct sysfs_ops mlog_attr_ops = {
 	.show  = mlog_show,
 	.store = mlog_store,
 };
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 50b725bcc3f3..ced2299f1c9a 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -53,7 +53,7 @@ struct sysfs_buffer {
 	size_t			count;
 	loff_t			pos;
 	char			* page;
-	struct sysfs_ops	* ops;
+	const struct sysfs_ops	* ops;
 	struct mutex		mutex;
 	int			needs_read_fill;
 	int			event;
@@ -75,7 +75,7 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
 {
 	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	struct sysfs_ops * ops = buffer->ops;
+	const struct sysfs_ops * ops = buffer->ops;
 	int ret = 0;
 	ssize_t count;
 
@@ -199,7 +199,7 @@ flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t
 {
 	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	struct sysfs_ops * ops = buffer->ops;
+	const struct sysfs_ops * ops = buffer->ops;
 	int rc;
 
 	/* need attr_sd for attr and ops, its parent for kobj */
@@ -335,7 +335,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
 	struct sysfs_buffer *buffer;
-	struct sysfs_ops *ops;
+	const struct sysfs_ops *ops;
 	int error = -EACCES;
 	char *p;
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 57a1eaae9096..3950d3c2850d 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -106,7 +106,7 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
 	void (*release)(struct kobject *kobj);
-	struct sysfs_ops *sysfs_ops;
+	const struct sysfs_ops *sysfs_ops;
 	struct attribute **default_attrs;
 };
 
@@ -132,7 +132,7 @@ struct kobj_attribute {
 			 const char *buf, size_t count);
 };
 
-extern struct sysfs_ops kobj_sysfs_ops;
+extern const struct sysfs_ops kobj_sysfs_ops;
 
 /**
  * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem.
diff --git a/kernel/params.c b/kernel/params.c
index 48370be3c0a1..68396d73c838 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -722,7 +722,7 @@ static ssize_t module_attr_store(struct kobject *kobj,
 	return ret;
 }
 
-static struct sysfs_ops module_sysfs_ops = {
+static const struct sysfs_ops module_sysfs_ops = {
 	.show = module_attr_show,
 	.store = module_attr_store,
 };
diff --git a/lib/kobject.c b/lib/kobject.c
index cecf5a0ef6e1..8115eb1bbf4d 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-struct sysfs_ops kobj_sysfs_ops = {
+const struct sysfs_ops kobj_sysfs_ops = {
 	.show	= kobj_attr_show,
 	.store	= kobj_attr_store,
 };
diff --git a/mm/slub.c b/mm/slub.c
index a26753c12dcd..a2b8969ba6d0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4390,7 +4390,7 @@ static void kmem_cache_release(struct kobject *kobj)
 	kfree(s);
 }
 
-static struct sysfs_ops slab_sysfs_ops = {
+static const struct sysfs_ops slab_sysfs_ops = {
 	.show = slab_attr_show,
 	.store = slab_attr_store,
 };
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1cf2cef78584..fef0384e3c0b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -423,7 +423,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 
 #ifdef CONFIG_SYSFS
 /* br_sysfs_if.c */
-extern struct sysfs_ops brport_sysfs_ops;
+extern const struct sysfs_ops brport_sysfs_ops;
 extern int br_sysfs_addif(struct net_bridge_port *p);
 
 /* br_sysfs_br.c */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 696596cd3384..0b9916489d6b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -238,7 +238,7 @@ static ssize_t brport_store(struct kobject * kobj,
 	return ret;
 }
 
-struct sysfs_ops brport_sysfs_ops = {
+const struct sysfs_ops brport_sysfs_ops = {
 	.show = brport_show,
 	.store = brport_store,
 };
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index 7c6088140528..3b126d1f8599 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -87,7 +87,7 @@ static ssize_t foo_attr_store(struct kobject *kobj,
 }
 
 /* Our custom sysfs_ops that we will associate with our ktype later on */
-static struct sysfs_ops foo_sysfs_ops = {
+static const struct sysfs_ops foo_sysfs_ops = {
 	.show = foo_attr_show,
 	.store = foo_attr_store,
 };
-- 
cgit v1.2.3


From 831fad2f75f0d7bfc339de81173e7068a3c72276 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 26 Jan 2010 09:35:00 +0100
Subject: Driver core: make struct platform_driver.id_table const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a warning on several pxa based machines:

	arch/arm/mach-pxa/ssp.c:475: warning: initialization discards qualifiers from pointer target type

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Vikram Dhillon <dhillonv10@gmail.com>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 2 +-
 include/linux/platform_device.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 937d58021d1b..575e08bc6630 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -636,7 +636,7 @@ static int platform_uevent(struct device *dev, struct kobj_uevent_env *env)
 }
 
 static const struct platform_device_id *platform_match_id(
-			struct platform_device_id *id,
+			const struct platform_device_id *id,
 			struct platform_device *pdev)
 {
 	while (id->name[0]) {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 2c2d035bfb92..212da17d06af 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -62,7 +62,7 @@ struct platform_driver {
 	int (*suspend)(struct platform_device *, pm_message_t state);
 	int (*resume)(struct platform_device *);
 	struct device_driver driver;
-	struct platform_device_id *id_table;
+	const struct platform_device_id *id_table;
 };
 
 extern int platform_driver_register(struct platform_driver *);
-- 
cgit v1.2.3


From 6992f5334995af474c2b58d010d08bc597f0f2fe Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 11 Feb 2010 15:21:53 -0800
Subject: sysfs: Use one lockdep class per sysfs attribute.

Acknowledge that the logical sysfs rwsem has one instance per
sysfs attribute with different locking depencencies for different
attributes.

There is a sysfs idiom where writing to one sysfs file causes the
addition or removal of other sysfs files.   Lumping all of the
sysfs attributes together in one lock class causes lockdep to
generate lots of false positives.

This introduces the requirement that non-static sysfs attributes
need to be initialized with sysfs_attr_init or sysfs_bin_attr_init.
Strictly speaking this requirement only exists when lockdep is
enabled, and when lockdep is enabled we get a bit fat warning
if this requirement is not met.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/sysfs.h      |  7 +++++--
 include/linux/sysfs.h | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 7db6884f4206..37e0e086233c 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -92,9 +92,12 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define sysfs_dirent_init_lockdep(sd)				\
 do {								\
-	static struct lock_class_key __key;			\
+	struct attribute *attr = sd->s_attr.attr;		\
+	struct lock_class_key *key = attr->key;			\
+	if (!key)						\
+		key = &attr->skey;				\
 								\
-	lockdep_init_map(&sd->dep_map, "s_active", &__key, 0);	\
+	lockdep_init_map(&sd->dep_map, "s_active", key, 0);	\
 } while(0)
 #else
 #define sysfs_dirent_init_lockdep(sd) do {} while(0)
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 3e8526582146..006c359e63c0 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -15,6 +15,7 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <asm/atomic.h>
 
 struct kobject;
@@ -29,8 +30,23 @@ struct attribute {
 	const char		*name;
 	struct module		*owner;
 	mode_t			mode;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lock_class_key	*key;
+	struct lock_class_key	skey;
+#endif
 };
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define sysfs_attr_init(attr)				\
+do {							\
+	static struct lock_class_key __key;		\
+							\
+	(attr)->key = &__key;				\
+} while(0)
+#else
+#define sysfs_attr_init(attr) do {} while(0)
+#endif
+
 struct attribute_group {
 	const char		*name;
 	mode_t			(*is_visible)(struct kobject *,
@@ -74,6 +90,8 @@ struct bin_attribute {
 		    struct vm_area_struct *vma);
 };
 
+#define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&bin_attr->attr)
+
 struct sysfs_ops {
 	ssize_t	(*show)(struct kobject *, struct attribute *,char *);
 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
-- 
cgit v1.2.3


From 35960258ed388cdcebdb71df35fd5126978ca325 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 12 Feb 2010 04:35:32 -0800
Subject: sysfs: Document sysfs_attr_init and sysfs_bin_attr_init

I have added a new requirement to the external sysfs interface
that dynamically allocated sysfs attributes must call sysfs_attr_init
if lockdep is enabled.  For the time being callying sysfs_attr_init
is only mandatory if lockdep is enabled, so we can live with a few
unconverted instances until we find them all.  As this is part of
the public interface of sysfs it is a good idea to document these
pseudo functions so someone inspeciting the code can find out
what has happened.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 006c359e63c0..5b8f80f5aca6 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -36,6 +36,16 @@ struct attribute {
 #endif
 };
 
+/**
+ *	sysfs_attr_init - initialize a dynamically allocated sysfs attribute
+ *	@attr: struct attribute to initialize
+ *
+ *	Initialize a dynamically allocated struct attribute so we can
+ *	make lockdep happy.  This is a new requirement for attributes
+ *	and initially this is only needed when lockdep is enabled.
+ *	Lockdep gives a nice error when your attribute is added to
+ *	sysfs if you don't have this.
+ */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define sysfs_attr_init(attr)				\
 do {							\
@@ -90,6 +100,16 @@ struct bin_attribute {
 		    struct vm_area_struct *vma);
 };
 
+/**
+ *	sysfs_bin_attr_init - initialize a dynamically allocated bin_attribute
+ *	@attr: struct bin_attribute to initialize
+ *
+ *	Initialize a dynamically allocated struct bin_attribute so we
+ *	can make lockdep happy.  This is a new requirement for
+ *	attributes and initially this is only needed when lockdep is
+ *	enabled.  Lockdep gives a nice error when your attribute is
+ *	added to sysfs if you don't have this.
+ */
 #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&bin_attr->attr)
 
 struct sysfs_ops {
-- 
cgit v1.2.3


From 7cb32942d91a501b2df944928ccc9e6590ab237b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 12 Feb 2010 19:22:25 -0800
Subject: sysfs: Implement sysfs_rename_link

Because of rename ordering problems we occassionally give false
warnings about invalid sysfs operations.  So using sysfs_rename
create a sysfs_rename_link function that doesn't need strange
workarounds.

Cc: Benjamin Thery <benjamin.thery@bull.net>
Cc: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/symlink.c    | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h |  9 +++++++++
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index c5eff49fa41b..1b9a3a1e8a17 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -123,6 +123,44 @@ void sysfs_remove_link(struct kobject * kobj, const char * name)
 	sysfs_hash_and_remove(parent_sd, name);
 }
 
+/**
+ *	sysfs_rename_link - rename symlink in object's directory.
+ *	@kobj:	object we're acting for.
+ *	@targ:	object we're pointing to.
+ *	@old:	previous name of the symlink.
+ *	@new:	new name of the symlink.
+ *
+ *	A helper function for the common rename symlink idiom.
+ */
+int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
+			const char *old, const char *new)
+{
+	struct sysfs_dirent *parent_sd, *sd = NULL;
+	int result;
+
+	if (!kobj)
+		parent_sd = &sysfs_root;
+	else
+		parent_sd = kobj->sd;
+
+	result = -ENOENT;
+	sd = sysfs_get_dirent(parent_sd, old);
+	if (!sd)
+		goto out;
+
+	result = -EINVAL;
+	if (sysfs_type(sd) != SYSFS_KOBJ_LINK)
+		goto out;
+	if (sd->s_symlink.target_sd->s_dir.kobj != targ)
+		goto out;
+
+	result = sysfs_rename(sd, parent_sd, new);
+
+out:
+	sysfs_put(sd);
+	return result;
+}
+
 static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
 				 struct sysfs_dirent *target_sd, char *path)
 {
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 5b8f80f5aca6..d77cde6d0498 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -151,6 +151,9 @@ int __must_check sysfs_create_link_nowarn(struct kobject *kobj,
 					  const char *name);
 void sysfs_remove_link(struct kobject *kobj, const char *name);
 
+int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
+			const char *old_name, const char *new_name);
+
 int __must_check sysfs_create_group(struct kobject *kobj,
 				    const struct attribute_group *grp);
 int sysfs_update_group(struct kobject *kobj,
@@ -255,6 +258,12 @@ static inline void sysfs_remove_link(struct kobject *kobj, const char *name)
 {
 }
 
+static inline int sysfs_rename_link(struct kobject *k, struct kobject *t,
+				    const char *old_name, const char *new_name)
+{
+	return 0;
+}
+
 static inline int sysfs_create_group(struct kobject *kobj,
 				     const struct attribute_group *grp)
 {
-- 
cgit v1.2.3


From 62e877b893e6350c900d381f353aa62ed48dcc97 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 1 Mar 2010 20:38:36 +1100
Subject: sysfs: fix for thinko with sysfs_bin_attr_init()

After merging the final tree, today's linux-next build (powerpc
allyesconfig) failed like this:

drivers/pci/pci-sysfs.c: In function 'pci_create_legacy_files':
drivers/pci/pci-sysfs.c:645: error: lvalue required as unary '&' operand
drivers/pci/pci-sysfs.c:658: error: lvalue required as unary '&' operand

Caused by commit "sysfs: Use sysfs_attr_init and sysfs_bin_attr_init on
dynamic attributes" interacting with commit "sysfs: Use one lockdep
class per sysfs attribute") both from the driver-core tree.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/pci-sysfs.c | 4 ++--
 include/linux/sysfs.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 9fa183cfb0e9..de296452c957 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -642,7 +642,7 @@ void pci_create_legacy_files(struct pci_bus *b)
 	if (!b->legacy_io)
 		goto kzalloc_err;
 
-	sysfs_bin_attr_init(&b->legacy_io);
+	sysfs_bin_attr_init(b->legacy_io);
 	b->legacy_io->attr.name = "legacy_io";
 	b->legacy_io->size = 0xffff;
 	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
@@ -655,7 +655,7 @@ void pci_create_legacy_files(struct pci_bus *b)
 		goto legacy_io_err;
 
 	/* Allocated above after the legacy_io struct */
-	sysfs_bin_attr_init(&b->legacy_mem);
+	sysfs_bin_attr_init(b->legacy_mem);
 	b->legacy_mem = b->legacy_io + 1;
 	b->legacy_mem->attr.name = "legacy_mem";
 	b->legacy_mem->size = 1024*1024;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d77cde6d0498..f0496b3d1811 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -110,7 +110,7 @@ struct bin_attribute {
  *	enabled.  Lockdep gives a nice error when your attribute is
  *	added to sysfs if you don't have this.
  */
-#define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&bin_attr->attr)
+#define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr)
 
 struct sysfs_ops {
 	ssize_t	(*show)(struct kobject *, struct attribute *,char *);
-- 
cgit v1.2.3


From 8e9394ce2412254ec69fd2a4f3e44a66eade2297 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 17 Feb 2010 10:57:05 -0800
Subject: Driver core: create lock/unlock functions for struct device

In the future, we are going to be changing the lock type for struct
device (once we get the lockdep infrastructure properly worked out)  To
make that changeover easier, and to possibly burry the lock in a
different part of struct device, let's create some functions to lock and
unlock a device so that no out-of-core code needs to be changed in the
future.

This patch creates the device_lock/unlock/trylock() functions, and
converts all in-tree users to them.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Dave Young <hidave.darkstar@gmail.com>
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Len Brown <len.brown@intel.com>
Cc: Magnus Damm <damm@igel.co.jp>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Alex Chiang <achiang@hp.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrew Patterson <andrew.patterson@hp.com>
Cc: Yu Zhao <yu.zhao@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Wolfram Sang <w.sang@pengutronix.de>
Cc: CHENG Renquan <rqcheng@smu.edu.sg>
Cc: Oliver Neukum <oliver@neukum.org>
Cc: Frans Pop <elendil@planet.nl>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c             | 20 ++++++++++----------
 drivers/base/dd.c              | 38 +++++++++++++++++++-------------------
 drivers/base/power/main.c      | 20 ++++++++++----------
 drivers/firewire/core-device.c |  5 ++---
 drivers/ieee1394/nodemgr.c     |  5 ++---
 drivers/pci/bus.c              |  4 ++--
 drivers/pci/pci.c              |  4 ++--
 drivers/pcmcia/ds.c            |  8 ++++----
 drivers/usb/core/driver.c      |  4 ++--
 drivers/uwb/umc-bus.c          |  4 ++--
 drivers/uwb/uwb-internal.h     |  4 ++--
 include/linux/device.h         | 17 ++++++++++++++++-
 include/linux/usb.h            |  6 +++---
 13 files changed, 76 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index cca1aa10054c..71f6af5c8b0b 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -173,10 +173,10 @@ static ssize_t driver_unbind(struct device_driver *drv,
 	dev = bus_find_device_by_name(bus, NULL, buf);
 	if (dev && dev->driver == drv) {
 		if (dev->parent)	/* Needed for USB */
-			down(&dev->parent->sem);
+			device_lock(dev->parent);
 		device_release_driver(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 		err = count;
 	}
 	put_device(dev);
@@ -200,12 +200,12 @@ static ssize_t driver_bind(struct device_driver *drv,
 	dev = bus_find_device_by_name(bus, NULL, buf);
 	if (dev && dev->driver == NULL && driver_match_device(drv, dev)) {
 		if (dev->parent)	/* Needed for USB */
-			down(&dev->parent->sem);
-		down(&dev->sem);
+			device_lock(dev->parent);
+		device_lock(dev);
 		err = driver_probe_device(drv, dev);
-		up(&dev->sem);
+		device_unlock(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 
 		if (err > 0) {
 			/* success */
@@ -744,10 +744,10 @@ static int __must_check bus_rescan_devices_helper(struct device *dev,
 
 	if (!dev->driver) {
 		if (dev->parent)	/* Needed for USB */
-			down(&dev->parent->sem);
+			device_lock(dev->parent);
 		ret = device_attach(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 	}
 	return ret < 0 ? ret : 0;
 }
@@ -779,10 +779,10 @@ int device_reprobe(struct device *dev)
 {
 	if (dev->driver) {
 		if (dev->parent)        /* Needed for USB */
-			down(&dev->parent->sem);
+			device_lock(dev->parent);
 		device_release_driver(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 	}
 	return bus_rescan_devices_helper(dev, NULL);
 }
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index ee95c76bfd3d..c89291f8a16b 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -85,7 +85,7 @@ static void driver_sysfs_remove(struct device *dev)
  * for before calling this. (It is ok to call with no other effort
  * from a driver's probe() method.)
  *
- * This function must be called with @dev->sem held.
+ * This function must be called with the device lock held.
  */
 int device_bind_driver(struct device *dev)
 {
@@ -190,8 +190,8 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe);
  * This function returns -ENODEV if the device is not registered,
  * 1 if the device is bound successfully and 0 otherwise.
  *
- * This function must be called with @dev->sem held.  When called for a
- * USB interface, @dev->parent->sem must be held as well.
+ * This function must be called with @dev lock held.  When called for a
+ * USB interface, @dev->parent lock must be held as well.
  */
 int driver_probe_device(struct device_driver *drv, struct device *dev)
 {
@@ -233,13 +233,13 @@ static int __device_attach(struct device_driver *drv, void *data)
  * 0 if no matching driver was found;
  * -ENODEV if the device is not registered.
  *
- * When called for a USB interface, @dev->parent->sem must be held.
+ * When called for a USB interface, @dev->parent lock must be held.
  */
 int device_attach(struct device *dev)
 {
 	int ret = 0;
 
-	down(&dev->sem);
+	device_lock(dev);
 	if (dev->driver) {
 		ret = device_bind_driver(dev);
 		if (ret == 0)
@@ -253,7 +253,7 @@ int device_attach(struct device *dev)
 		ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
 		pm_runtime_put_sync(dev);
 	}
-	up(&dev->sem);
+	device_unlock(dev);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(device_attach);
@@ -276,13 +276,13 @@ static int __driver_attach(struct device *dev, void *data)
 		return 0;
 
 	if (dev->parent)	/* Needed for USB */
-		down(&dev->parent->sem);
-	down(&dev->sem);
+		device_lock(dev->parent);
+	device_lock(dev);
 	if (!dev->driver)
 		driver_probe_device(drv, dev);
-	up(&dev->sem);
+	device_unlock(dev);
 	if (dev->parent)
-		up(&dev->parent->sem);
+		device_unlock(dev->parent);
 
 	return 0;
 }
@@ -303,8 +303,8 @@ int driver_attach(struct device_driver *drv)
 EXPORT_SYMBOL_GPL(driver_attach);
 
 /*
- * __device_release_driver() must be called with @dev->sem held.
- * When called for a USB interface, @dev->parent->sem must be held as well.
+ * __device_release_driver() must be called with @dev lock held.
+ * When called for a USB interface, @dev->parent lock must be held as well.
  */
 static void __device_release_driver(struct device *dev)
 {
@@ -343,7 +343,7 @@ static void __device_release_driver(struct device *dev)
  * @dev: device.
  *
  * Manually detach device from driver.
- * When called for a USB interface, @dev->parent->sem must be held.
+ * When called for a USB interface, @dev->parent lock must be held.
  */
 void device_release_driver(struct device *dev)
 {
@@ -352,9 +352,9 @@ void device_release_driver(struct device *dev)
 	 * within their ->remove callback for the same device, they
 	 * will deadlock right here.
 	 */
-	down(&dev->sem);
+	device_lock(dev);
 	__device_release_driver(dev);
-	up(&dev->sem);
+	device_unlock(dev);
 }
 EXPORT_SYMBOL_GPL(device_release_driver);
 
@@ -381,13 +381,13 @@ void driver_detach(struct device_driver *drv)
 		spin_unlock(&drv->p->klist_devices.k_lock);
 
 		if (dev->parent)	/* Needed for USB */
-			down(&dev->parent->sem);
-		down(&dev->sem);
+			device_lock(dev->parent);
+		device_lock(dev);
 		if (dev->driver == drv)
 			__device_release_driver(dev);
-		up(&dev->sem);
+		device_unlock(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 		put_device(dev);
 	}
 }
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 0e26a6f6fd48..d477f4dc5e51 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -35,8 +35,8 @@
  * because children are guaranteed to be discovered after parents, and
  * are inserted at the back of the list on discovery.
  *
- * Since device_pm_add() may be called with a device semaphore held,
- * we must never try to acquire a device semaphore while holding
+ * Since device_pm_add() may be called with a device lock held,
+ * we must never try to acquire a device lock while holding
  * dpm_list_mutex.
  */
 
@@ -508,7 +508,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	TRACE_RESUME(0);
 
 	dpm_wait(dev->parent, async);
-	down(&dev->sem);
+	device_lock(dev);
 
 	dev->power.status = DPM_RESUMING;
 
@@ -543,7 +543,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 		}
 	}
  End:
-	up(&dev->sem);
+	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
 	TRACE_RESUME(error);
@@ -629,7 +629,7 @@ static void dpm_resume(pm_message_t state)
  */
 static void device_complete(struct device *dev, pm_message_t state)
 {
-	down(&dev->sem);
+	device_lock(dev);
 
 	if (dev->class && dev->class->pm && dev->class->pm->complete) {
 		pm_dev_dbg(dev, state, "completing class ");
@@ -646,7 +646,7 @@ static void device_complete(struct device *dev, pm_message_t state)
 		dev->bus->pm->complete(dev);
 	}
 
-	up(&dev->sem);
+	device_unlock(dev);
 }
 
 /**
@@ -809,7 +809,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	int error = 0;
 
 	dpm_wait_for_children(dev, async);
-	down(&dev->sem);
+	device_lock(dev);
 
 	if (async_error)
 		goto End;
@@ -849,7 +849,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		dev->power.status = DPM_OFF;
 
  End:
-	up(&dev->sem);
+	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
 	return error;
@@ -938,7 +938,7 @@ static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	down(&dev->sem);
+	device_lock(dev);
 
 	if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) {
 		pm_dev_dbg(dev, state, "preparing ");
@@ -962,7 +962,7 @@ static int device_prepare(struct device *dev, pm_message_t state)
 		suspend_report_result(dev->class->pm->prepare, error);
 	}
  End:
-	up(&dev->sem);
+	device_unlock(dev);
 
 	return error;
 }
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 014cabd3afda..5db0518c66da 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -33,7 +33,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
-#include <linux/semaphore.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
@@ -828,9 +827,9 @@ static int update_unit(struct device *dev, void *data)
 	struct fw_driver *driver = (struct fw_driver *)dev->driver;
 
 	if (is_fw_unit(dev) && driver != NULL && driver->update != NULL) {
-		down(&dev->sem);
+		device_lock(dev);
 		driver->update(unit);
-		up(&dev->sem);
+		device_unlock(dev);
 	}
 
 	return 0;
diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c
index 5122b5a8aa2d..18350213479e 100644
--- a/drivers/ieee1394/nodemgr.c
+++ b/drivers/ieee1394/nodemgr.c
@@ -19,7 +19,6 @@
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
-#include <linux/semaphore.h>
 #include <asm/atomic.h>
 
 #include "csr.h"
@@ -1397,9 +1396,9 @@ static int update_pdrv(struct device *dev, void *data)
 			pdrv = container_of(drv, struct hpsb_protocol_driver,
 					    driver);
 			if (pdrv->update) {
-				down(&ud->device.sem);
+				device_lock(&ud->device);
 				error = pdrv->update(ud);
-				up(&ud->device.sem);
+				device_unlock(&ud->device);
 			}
 			if (error)
 				device_release_driver(&ud->device);
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 712250f5874a..26301cb25e7f 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -288,9 +288,9 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
 			next = dev->bus_list.next;
 
 		/* Run device routines with the device locked */
-		down(&dev->dev.sem);
+		device_lock(&dev->dev);
 		retval = cb(dev, userdata);
-		up(&dev->dev.sem);
+		device_unlock(&dev->dev);
 		if (retval)
 			break;
 	}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 77b493b3d97b..897fa5ccdb78 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2486,7 +2486,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 	if (!probe) {
 		pci_block_user_cfg_access(dev);
 		/* block PM suspend, driver probe, etc. */
-		down(&dev->dev.sem);
+		device_lock(&dev->dev);
 	}
 
 	rc = pci_dev_specific_reset(dev, probe);
@@ -2508,7 +2508,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 	rc = pci_parent_bus_reset(dev, probe);
 done:
 	if (!probe) {
-		up(&dev->dev.sem);
+		device_unlock(&dev->dev);
 		pci_unblock_user_cfg_access(dev);
 	}
 
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 0f98be4450b7..ad93ebd7b2a2 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -971,9 +971,9 @@ static int runtime_suspend(struct device *dev)
 {
 	int rc;
 
-	down(&dev->sem);
+	device_lock(dev);
 	rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND);
-	up(&dev->sem);
+	device_unlock(dev);
 	return rc;
 }
 
@@ -981,9 +981,9 @@ static int runtime_resume(struct device *dev)
 {
 	int rc;
 
-	down(&dev->sem);
+	device_lock(dev);
 	rc = pcmcia_dev_resume(dev);
-	up(&dev->sem);
+	device_unlock(dev);
 	return rc;
 }
 
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index a7037bf81688..f3c233806fa3 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -489,10 +489,10 @@ void usb_driver_release_interface(struct usb_driver *driver,
 	if (device_is_registered(dev)) {
 		device_release_driver(dev);
 	} else {
-		down(&dev->sem);
+		device_lock(dev);
 		usb_unbind_interface(dev);
 		dev->driver = NULL;
-		up(&dev->sem);
+		device_unlock(dev);
 	}
 }
 EXPORT_SYMBOL_GPL(usb_driver_release_interface);
diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
index cdd6c8efc9f8..5fad4e791b3e 100644
--- a/drivers/uwb/umc-bus.c
+++ b/drivers/uwb/umc-bus.c
@@ -62,12 +62,12 @@ int umc_controller_reset(struct umc_dev *umc)
 	struct device *parent = umc->dev.parent;
 	int ret = 0;
 
-	if(down_trylock(&parent->sem))
+	if (device_trylock(parent))
 		return -EAGAIN;
 	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
 	if (ret >= 0)
 		ret = device_for_each_child(parent, parent, umc_bus_post_reset_helper);
-	up(&parent->sem);
+	device_unlock(parent);
 
 	return ret;
 }
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index d5bcfc1c227a..157485c862c0 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -366,12 +366,12 @@ struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal);
 
 static inline void uwb_dev_lock(struct uwb_dev *uwb_dev)
 {
-	down(&uwb_dev->dev.sem);
+	device_lock(&uwb_dev->dev);
 }
 
 static inline void uwb_dev_unlock(struct uwb_dev *uwb_dev)
 {
-	up(&uwb_dev->dev.sem);
+	device_unlock(&uwb_dev->dev);
 }
 
 #endif /* #ifndef __UWB_INTERNAL_H__ */
diff --git a/include/linux/device.h b/include/linux/device.h
index f95d5bfe8248..182192892d45 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -106,7 +106,7 @@ extern int bus_unregister_notifier(struct bus_type *bus,
 
 /* All 4 notifers below get called with the target struct device *
  * as an argument. Note that those functions are likely to be called
- * with the device semaphore held in the core, so be careful.
+ * with the device lock held in the core, so be careful.
  */
 #define BUS_NOTIFY_ADD_DEVICE		0x00000001 /* device added */
 #define BUS_NOTIFY_DEL_DEVICE		0x00000002 /* device removed */
@@ -508,6 +508,21 @@ static inline bool device_async_suspend_enabled(struct device *dev)
 	return !!dev->power.async_suspend;
 }
 
+static inline void device_lock(struct device *dev)
+{
+	down(&dev->sem);
+}
+
+static inline int device_trylock(struct device *dev)
+{
+	return down_trylock(&dev->sem);
+}
+
+static inline void device_unlock(struct device *dev)
+{
+	up(&dev->sem);
+}
+
 void driver_init(void);
 
 /*
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3492abf82e75..8c9f053111bb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -512,9 +512,9 @@ extern struct usb_device *usb_get_dev(struct usb_device *dev);
 extern void usb_put_dev(struct usb_device *dev);
 
 /* USB device locking */
-#define usb_lock_device(udev)		down(&(udev)->dev.sem)
-#define usb_unlock_device(udev)		up(&(udev)->dev.sem)
-#define usb_trylock_device(udev)	down_trylock(&(udev)->dev.sem)
+#define usb_lock_device(udev)		device_lock(&(udev)->dev)
+#define usb_unlock_device(udev)		device_unlock(&(udev)->dev)
+#define usb_trylock_device(udev)	device_trylock(&(udev)->dev)
 extern int usb_lock_device_for_reset(struct usb_device *udev,
 				     const struct usb_interface *iface);
 
-- 
cgit v1.2.3


From 6cce09f87a04797fae5b947ef2626c14a78f0b49 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 7 Mar 2010 23:21:57 +0000
Subject: tcp: Add SNMP counters for backlog and min_ttl drops

Commit 6b03a53a (tcp: use limited socket backlog) added the possibility
of dropping frames when backlog queue is full.

Commit d218d111 (tcp: Generalized TTL Security Mechanism) added the
possibility of dropping frames when TTL is under a given limit.

This patch adds new SNMP MIB entries, named TCPBacklogDrop and
TCPMinTTLDrop, published in /proc/net/netstat in TcpExt: line

netstat -s | egrep "TCPBacklogDrop|TCPMinTTLDrop"
    TCPBacklogDrop: 0
    TCPMinTTLDrop: 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h | 2 ++
 net/ipv4/proc.c      | 2 ++
 net/ipv4/tcp_ipv4.c  | 7 +++++--
 net/ipv6/tcp_ipv6.c  | 3 ++-
 4 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index e28f5a0182e8..4435d1084755 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -225,6 +225,8 @@ enum
 	LINUX_MIB_SACKSHIFTED,
 	LINUX_MIB_SACKMERGED,
 	LINUX_MIB_SACKSHIFTFALLBACK,
+	LINUX_MIB_TCPBACKLOGDROP,
+	LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 242ed2307370..4f1f337f4337 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -249,6 +249,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
 	SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
+	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
+	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1915f7dc30e6..8d51d39ad1bb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1651,8 +1651,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
-	if (iph->ttl < inet_sk(sk)->min_ttl)
+	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
+	}
 
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -1682,8 +1684,9 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (unlikely(sk_add_backlog(sk, skb))) {
 		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2c378b1bd5cf..9b6dbba80d31 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,8 +1740,9 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (unlikely(sk_add_backlog(sk, skb))) {
 		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
-- 
cgit v1.2.3


From 58b939959d228681208ba997595411fddc860849 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 8 Mar 2010 22:37:10 -0800
Subject: Input: scancode in get/set_keycodes should be unsigned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The HID layer has some scan codes of the form 0xffbc0000 for logitech
devices which do not work if scancode is typed as signed int, so we need
to switch to unsigned it instead. While at it keycode being signed does
not make much sense either.

Acked-by: Márton Németh <nm127@freemail.hu>
Acked-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/hid/hid-input.c                    | 24 ++++++++++++------------
 drivers/input/evdev.c                      |  2 +-
 drivers/input/input.c                      | 20 +++++++++-----------
 drivers/input/misc/ati_remote2.c           | 14 +++++++-------
 drivers/input/misc/winbond-cir.c           | 12 +++++-------
 drivers/input/sparse-keymap.c              |  6 ++++--
 drivers/media/IR/ir-keytable.c             |  4 ++--
 drivers/media/dvb/dvb-usb/dvb-usb-remote.c |  4 ++--
 drivers/platform/x86/dell-wmi.c            | 16 +++++++---------
 drivers/platform/x86/hp-wmi.c              | 15 +++++++--------
 drivers/platform/x86/panasonic-laptop.c    | 15 +++++++--------
 drivers/platform/x86/topstar-laptop.c      | 13 ++++++-------
 drivers/platform/x86/toshiba_acpi.c        | 17 +++++++----------
 include/linux/input.h                      | 20 ++++++++++++--------
 14 files changed, 88 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 79d9edd0bdfa..7a0d2e4661a1 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -68,22 +68,25 @@ static const struct {
 #define map_key_clear(c)	hid_map_usage_clear(hidinput, usage, &bit, \
 		&max, EV_KEY, (c))
 
-static inline int match_scancode(int code, int scancode)
+static inline int match_scancode(unsigned int code, unsigned int scancode)
 {
 	if (scancode == 0)
 		return 1;
-	return ((code & (HID_USAGE_PAGE | HID_USAGE)) == scancode);
+
+	return (code & (HID_USAGE_PAGE | HID_USAGE)) == scancode;
 }
 
-static inline int match_keycode(int code, int keycode)
+static inline int match_keycode(unsigned int code, unsigned int keycode)
 {
 	if (keycode == 0)
 		return 1;
-	return (code == keycode);
+
+	return code == keycode;
 }
 
 static struct hid_usage *hidinput_find_key(struct hid_device *hid,
-		int scancode, int keycode)
+					   unsigned int scancode,
+					   unsigned int keycode)
 {
 	int i, j, k;
 	struct hid_report *report;
@@ -105,8 +108,8 @@ static struct hid_usage *hidinput_find_key(struct hid_device *hid,
 	return NULL;
 }
 
-static int hidinput_getkeycode(struct input_dev *dev, int scancode,
-				int *keycode)
+static int hidinput_getkeycode(struct input_dev *dev,
+			       unsigned int scancode, unsigned int *keycode)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
 	struct hid_usage *usage;
@@ -119,16 +122,13 @@ static int hidinput_getkeycode(struct input_dev *dev, int scancode,
 	return -EINVAL;
 }
 
-static int hidinput_setkeycode(struct input_dev *dev, int scancode,
-				int keycode)
+static int hidinput_setkeycode(struct input_dev *dev,
+			       unsigned int scancode, unsigned int keycode)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
 	struct hid_usage *usage;
 	int old_keycode;
 
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
-
 	usage = hidinput_find_key(hid, scancode, 0);
 	if (usage) {
 		old_keycode = usage->code;
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 9f9816baeb97..2ee6c7a68bdc 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -515,7 +515,7 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 	struct input_absinfo abs;
 	struct ff_effect effect;
 	int __user *ip = (int __user *)p;
-	int i, t, u, v;
+	unsigned int i, t, u, v;
 	int error;
 
 	switch (cmd) {
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 41168d5f8c17..e2dd8858e19d 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -582,7 +582,8 @@ static int input_fetch_keycode(struct input_dev *dev, int scancode)
 }
 
 static int input_default_getkeycode(struct input_dev *dev,
-				    int scancode, int *keycode)
+				    unsigned int scancode,
+				    unsigned int *keycode)
 {
 	if (!dev->keycodesize)
 		return -EINVAL;
@@ -596,7 +597,8 @@ static int input_default_getkeycode(struct input_dev *dev,
 }
 
 static int input_default_setkeycode(struct input_dev *dev,
-				    int scancode, int keycode)
+				    unsigned int scancode,
+				    unsigned int keycode)
 {
 	int old_keycode;
 	int i;
@@ -654,11 +656,9 @@ static int input_default_setkeycode(struct input_dev *dev,
  * This function should be called by anyone interested in retrieving current
  * keymap. Presently keyboard and evdev handlers use it.
  */
-int input_get_keycode(struct input_dev *dev, int scancode, int *keycode)
+int input_get_keycode(struct input_dev *dev,
+		      unsigned int scancode, unsigned int *keycode)
 {
-	if (scancode < 0)
-		return -EINVAL;
-
 	return dev->getkeycode(dev, scancode, keycode);
 }
 EXPORT_SYMBOL(input_get_keycode);
@@ -672,16 +672,14 @@ EXPORT_SYMBOL(input_get_keycode);
  * This function should be called by anyone needing to update current
  * keymap. Presently keyboard and evdev handlers use it.
  */
-int input_set_keycode(struct input_dev *dev, int scancode, int keycode)
+int input_set_keycode(struct input_dev *dev,
+		      unsigned int scancode, unsigned int keycode)
 {
 	unsigned long flags;
 	int old_keycode;
 	int retval;
 
-	if (scancode < 0)
-		return -EINVAL;
-
-	if (keycode < 0 || keycode > KEY_MAX)
+	if (keycode > KEY_MAX)
 		return -EINVAL;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index 0501f0e65157..15be5430bc6d 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -474,10 +474,11 @@ static void ati_remote2_complete_key(struct urb *urb)
 }
 
 static int ati_remote2_getkeycode(struct input_dev *idev,
-				  int scancode, int *keycode)
+				  unsigned int scancode, unsigned int *keycode)
 {
 	struct ati_remote2 *ar2 = input_get_drvdata(idev);
-	int index, mode;
+	unsigned int mode;
+	int index;
 
 	mode = scancode >> 8;
 	if (mode > ATI_REMOTE2_PC || !((1 << mode) & ar2->mode_mask))
@@ -491,10 +492,12 @@ static int ati_remote2_getkeycode(struct input_dev *idev,
 	return 0;
 }
 
-static int ati_remote2_setkeycode(struct input_dev *idev, int scancode, int keycode)
+static int ati_remote2_setkeycode(struct input_dev *idev,
+				  unsigned int scancode, unsigned int keycode)
 {
 	struct ati_remote2 *ar2 = input_get_drvdata(idev);
-	int index, mode, old_keycode;
+	unsigned int mode, old_keycode;
+	int index;
 
 	mode = scancode >> 8;
 	if (mode > ATI_REMOTE2_PC || !((1 << mode) & ar2->mode_mask))
@@ -504,9 +507,6 @@ static int ati_remote2_setkeycode(struct input_dev *idev, int scancode, int keyc
 	if (index < 0)
 		return -EINVAL;
 
-	if (keycode < KEY_RESERVED || keycode > KEY_MAX)
-		return -EINVAL;
-
 	old_keycode = ar2->keycode[mode][index];
 	ar2->keycode[mode][index] = keycode;
 	__set_bit(keycode, idev->keybit);
diff --git a/drivers/input/misc/winbond-cir.c b/drivers/input/misc/winbond-cir.c
index cbec3dfdd42b..9c155a43abc2 100644
--- a/drivers/input/misc/winbond-cir.c
+++ b/drivers/input/misc/winbond-cir.c
@@ -385,26 +385,24 @@ wbcir_do_getkeycode(struct wbcir_data *data, u32 scancode)
 }
 
 static int
-wbcir_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+wbcir_getkeycode(struct input_dev *dev,
+		 unsigned int scancode, unsigned int *keycode)
 {
 	struct wbcir_data *data = input_get_drvdata(dev);
 
-	*keycode = (int)wbcir_do_getkeycode(data, (u32)scancode);
+	*keycode = wbcir_do_getkeycode(data, scancode);
 	return 0;
 }
 
 static int
-wbcir_setkeycode(struct input_dev *dev, int sscancode, int keycode)
+wbcir_setkeycode(struct input_dev *dev,
+		 unsigned int scancode, unsigned int keycode)
 {
 	struct wbcir_data *data = input_get_drvdata(dev);
 	struct wbcir_keyentry *keyentry;
 	struct wbcir_keyentry *new_keyentry;
 	unsigned long flags;
 	unsigned int old_keycode = KEY_RESERVED;
-	u32 scancode = (u32)sscancode;
-
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
 
 	new_keyentry = kmalloc(sizeof(*new_keyentry), GFP_KERNEL);
 	if (!new_keyentry)
diff --git a/drivers/input/sparse-keymap.c b/drivers/input/sparse-keymap.c
index fbd3987af57f..e6bde55e5203 100644
--- a/drivers/input/sparse-keymap.c
+++ b/drivers/input/sparse-keymap.c
@@ -64,7 +64,8 @@ struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev,
 EXPORT_SYMBOL(sparse_keymap_entry_from_keycode);
 
 static int sparse_keymap_getkeycode(struct input_dev *dev,
-				    int scancode, int *keycode)
+				    unsigned int scancode,
+				    unsigned int *keycode)
 {
 	const struct key_entry *key =
 			sparse_keymap_entry_from_scancode(dev, scancode);
@@ -78,7 +79,8 @@ static int sparse_keymap_getkeycode(struct input_dev *dev,
 }
 
 static int sparse_keymap_setkeycode(struct input_dev *dev,
-				    int scancode, int keycode)
+				    unsigned int scancode,
+				    unsigned int keycode)
 {
 	struct key_entry *key;
 	int old_keycode;
diff --git a/drivers/media/IR/ir-keytable.c b/drivers/media/IR/ir-keytable.c
index 0903f539bf68..0a3b4ed38e48 100644
--- a/drivers/media/IR/ir-keytable.c
+++ b/drivers/media/IR/ir-keytable.c
@@ -123,7 +123,7 @@ static int ir_copy_table(struct ir_scancode_table *destin,
  * If the key is not found, returns -EINVAL, otherwise, returns 0.
  */
 static int ir_getkeycode(struct input_dev *dev,
-			 int scancode, int *keycode)
+			 unsigned int scancode, unsigned int *keycode)
 {
 	int elem;
 	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
@@ -291,7 +291,7 @@ static int ir_insert_key(struct ir_scancode_table *rc_tab,
  * If the key is not found, returns -EINVAL, otherwise, returns 0.
  */
 static int ir_setkeycode(struct input_dev *dev,
-			 int scancode, int keycode)
+			 unsigned int scancode, unsigned int keycode)
 {
 	int rc = 0;
 	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
index a03ef7efec9a..852fe89539cf 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
@@ -9,7 +9,7 @@
 #include <linux/usb/input.h>
 
 static int dvb_usb_getkeycode(struct input_dev *dev,
-				    int scancode, int *keycode)
+				unsigned int scancode, unsigned int *keycode)
 {
 	struct dvb_usb_device *d = input_get_drvdata(dev);
 
@@ -39,7 +39,7 @@ static int dvb_usb_getkeycode(struct input_dev *dev,
 }
 
 static int dvb_usb_setkeycode(struct input_dev *dev,
-				    int scancode, int keycode)
+				unsigned int scancode, unsigned int keycode)
 {
 	struct dvb_usb_device *d = input_get_drvdata(dev);
 
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 1b1dddbd5744..bed764e3ea2a 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -142,7 +142,7 @@ static struct key_entry *dell_wmi_keymap = dell_legacy_wmi_keymap;
 
 static struct input_dev *dell_wmi_input_dev;
 
-static struct key_entry *dell_wmi_get_entry_by_scancode(int code)
+static struct key_entry *dell_wmi_get_entry_by_scancode(unsigned int code)
 {
 	struct key_entry *key;
 
@@ -153,7 +153,7 @@ static struct key_entry *dell_wmi_get_entry_by_scancode(int code)
 	return NULL;
 }
 
-static struct key_entry *dell_wmi_get_entry_by_keycode(int keycode)
+static struct key_entry *dell_wmi_get_entry_by_keycode(unsigned int keycode)
 {
 	struct key_entry *key;
 
@@ -164,8 +164,8 @@ static struct key_entry *dell_wmi_get_entry_by_keycode(int keycode)
 	return NULL;
 }
 
-static int dell_wmi_getkeycode(struct input_dev *dev, int scancode,
-			       int *keycode)
+static int dell_wmi_getkeycode(struct input_dev *dev,
+				unsigned int scancode, unsigned int *keycode)
 {
 	struct key_entry *key = dell_wmi_get_entry_by_scancode(scancode);
 
@@ -177,13 +177,11 @@ static int dell_wmi_getkeycode(struct input_dev *dev, int scancode,
 	return -EINVAL;
 }
 
-static int dell_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
+static int dell_wmi_setkeycode(struct input_dev *dev,
+				unsigned int scancode, unsigned int keycode)
 {
 	struct key_entry *key;
-	int old_keycode;
-
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
+	unsigned int old_keycode;
 
 	key = dell_wmi_get_entry_by_scancode(scancode);
 	if (key && key->type == KE_KEY) {
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 7ccf33c08967..56086363becc 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -278,7 +278,7 @@ static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
 static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
 static DEVICE_ATTR(tablet, S_IRUGO, show_tablet, NULL);
 
-static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
+static struct key_entry *hp_wmi_get_entry_by_scancode(unsigned int code)
 {
 	struct key_entry *key;
 
@@ -289,7 +289,7 @@ static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
 	return NULL;
 }
 
-static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
+static struct key_entry *hp_wmi_get_entry_by_keycode(unsigned int keycode)
 {
 	struct key_entry *key;
 
@@ -300,7 +300,8 @@ static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
 	return NULL;
 }
 
-static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+static int hp_wmi_getkeycode(struct input_dev *dev,
+			     unsigned int scancode, unsigned int *keycode)
 {
 	struct key_entry *key = hp_wmi_get_entry_by_scancode(scancode);
 
@@ -312,13 +313,11 @@ static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
 	return -EINVAL;
 }
 
-static int hp_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
+static int hp_wmi_setkeycode(struct input_dev *dev,
+			     unsigned int scancode, unsigned int keycode)
 {
 	struct key_entry *key;
-	int old_keycode;
-
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
+	unsigned int old_keycode;
 
 	key = hp_wmi_get_entry_by_scancode(scancode);
 	if (key && key->type == KE_KEY) {
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index fe7cf0188acc..c9fc479fc290 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -200,7 +200,7 @@ static struct acpi_driver acpi_pcc_driver = {
 };
 
 #define KEYMAP_SIZE		11
-static const int initial_keymap[KEYMAP_SIZE] = {
+static const unsigned int initial_keymap[KEYMAP_SIZE] = {
 	/*  0 */ KEY_RESERVED,
 	/*  1 */ KEY_BRIGHTNESSDOWN,
 	/*  2 */ KEY_BRIGHTNESSUP,
@@ -222,7 +222,7 @@ struct pcc_acpi {
 	struct acpi_device	*device;
 	struct input_dev	*input_dev;
 	struct backlight_device	*backlight;
-	int			keymap[KEYMAP_SIZE];
+	unsigned int		keymap[KEYMAP_SIZE];
 };
 
 struct pcc_keyinput {
@@ -445,7 +445,8 @@ static struct attribute_group pcc_attr_group = {
 
 /* hotkey input device driver */
 
-static int pcc_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+static int pcc_getkeycode(struct input_dev *dev,
+			  unsigned int scancode, unsigned int *keycode)
 {
 	struct pcc_acpi *pcc = input_get_drvdata(dev);
 
@@ -457,7 +458,7 @@ static int pcc_getkeycode(struct input_dev *dev, int scancode, int *keycode)
 	return 0;
 }
 
-static int keymap_get_by_keycode(struct pcc_acpi *pcc, int keycode)
+static int keymap_get_by_keycode(struct pcc_acpi *pcc, unsigned int keycode)
 {
 	int i;
 
@@ -469,7 +470,8 @@ static int keymap_get_by_keycode(struct pcc_acpi *pcc, int keycode)
 	return 0;
 }
 
-static int pcc_setkeycode(struct input_dev *dev, int scancode, int keycode)
+static int pcc_setkeycode(struct input_dev *dev,
+			  unsigned int scancode, unsigned int keycode)
 {
 	struct pcc_acpi *pcc = input_get_drvdata(dev);
 	int oldkeycode;
@@ -477,9 +479,6 @@ static int pcc_setkeycode(struct input_dev *dev, int scancode, int keycode)
 	if (scancode >= ARRAY_SIZE(pcc->keymap))
 		return -EINVAL;
 
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
-
 	oldkeycode = pcc->keymap[scancode];
 	pcc->keymap[scancode] = keycode;
 
diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c
index 02f3d4e9e666..4d6516fded7e 100644
--- a/drivers/platform/x86/topstar-laptop.c
+++ b/drivers/platform/x86/topstar-laptop.c
@@ -46,7 +46,7 @@ static struct tps_key_entry topstar_keymap[] = {
 	{ }
 };
 
-static struct tps_key_entry *tps_get_key_by_scancode(int code)
+static struct tps_key_entry *tps_get_key_by_scancode(unsigned int code)
 {
 	struct tps_key_entry *key;
 
@@ -57,7 +57,7 @@ static struct tps_key_entry *tps_get_key_by_scancode(int code)
 	return NULL;
 }
 
-static struct tps_key_entry *tps_get_key_by_keycode(int code)
+static struct tps_key_entry *tps_get_key_by_keycode(unsigned int code)
 {
 	struct tps_key_entry *key;
 
@@ -126,7 +126,8 @@ static int acpi_topstar_fncx_switch(struct acpi_device *device, bool state)
 	return 0;
 }
 
-static int topstar_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+static int topstar_getkeycode(struct input_dev *dev,
+				unsigned int scancode, unsigned int *keycode)
 {
 	struct tps_key_entry *key = tps_get_key_by_scancode(scancode);
 
@@ -137,14 +138,12 @@ static int topstar_getkeycode(struct input_dev *dev, int scancode, int *keycode)
 	return 0;
 }
 
-static int topstar_setkeycode(struct input_dev *dev, int scancode, int keycode)
+static int topstar_setkeycode(struct input_dev *dev,
+				unsigned int scancode, unsigned int keycode)
 {
 	struct tps_key_entry *key;
 	int old_keycode;
 
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
-
 	key = tps_get_key_by_scancode(scancode);
 
 	if (!key)
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 405b969734d6..789240d1b577 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -745,7 +745,7 @@ static struct backlight_ops toshiba_backlight_data = {
         .update_status  = set_lcd_status,
 };
 
-static struct key_entry *toshiba_acpi_get_entry_by_scancode(int code)
+static struct key_entry *toshiba_acpi_get_entry_by_scancode(unsigned int code)
 {
 	struct key_entry *key;
 
@@ -756,7 +756,7 @@ static struct key_entry *toshiba_acpi_get_entry_by_scancode(int code)
 	return NULL;
 }
 
-static struct key_entry *toshiba_acpi_get_entry_by_keycode(int code)
+static struct key_entry *toshiba_acpi_get_entry_by_keycode(unsigned int code)
 {
 	struct key_entry *key;
 
@@ -767,8 +767,8 @@ static struct key_entry *toshiba_acpi_get_entry_by_keycode(int code)
 	return NULL;
 }
 
-static int toshiba_acpi_getkeycode(struct input_dev *dev, int scancode,
-				   int *keycode)
+static int toshiba_acpi_getkeycode(struct input_dev *dev,
+				   unsigned int scancode, unsigned int *keycode)
 {
 	struct key_entry *key = toshiba_acpi_get_entry_by_scancode(scancode);
 
@@ -780,14 +780,11 @@ static int toshiba_acpi_getkeycode(struct input_dev *dev, int scancode,
 	return -EINVAL;
 }
 
-static int toshiba_acpi_setkeycode(struct input_dev *dev, int scancode,
-				   int keycode)
+static int toshiba_acpi_setkeycode(struct input_dev *dev,
+				   unsigned int scancode, unsigned int keycode)
 {
 	struct key_entry *key;
-	int old_keycode;
-
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
+	unsigned int old_keycode;
 
 	key = toshiba_acpi_get_entry_by_scancode(scancode);
 	if (key && key->type == KE_KEY) {
diff --git a/include/linux/input.h b/include/linux/input.h
index dc24effb6d0e..7ed2251b33f1 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -58,10 +58,10 @@ struct input_absinfo {
 
 #define EVIOCGVERSION		_IOR('E', 0x01, int)			/* get driver version */
 #define EVIOCGID		_IOR('E', 0x02, struct input_id)	/* get device ID */
-#define EVIOCGREP		_IOR('E', 0x03, int[2])			/* get repeat settings */
-#define EVIOCSREP		_IOW('E', 0x03, int[2])			/* set repeat settings */
-#define EVIOCGKEYCODE		_IOR('E', 0x04, int[2])			/* get keycode */
-#define EVIOCSKEYCODE		_IOW('E', 0x04, int[2])			/* set keycode */
+#define EVIOCGREP		_IOR('E', 0x03, unsigned int[2])	/* get repeat settings */
+#define EVIOCSREP		_IOW('E', 0x03, unsigned int[2])	/* set repeat settings */
+#define EVIOCGKEYCODE		_IOR('E', 0x04, unsigned int[2])	/* get keycode */
+#define EVIOCSKEYCODE		_IOW('E', 0x04, unsigned int[2])	/* set keycode */
 
 #define EVIOCGNAME(len)		_IOC(_IOC_READ, 'E', 0x06, len)		/* get device name */
 #define EVIOCGPHYS(len)		_IOC(_IOC_READ, 'E', 0x07, len)		/* get physical location */
@@ -1142,8 +1142,10 @@ struct input_dev {
 	unsigned int keycodemax;
 	unsigned int keycodesize;
 	void *keycode;
-	int (*setkeycode)(struct input_dev *dev, int scancode, int keycode);
-	int (*getkeycode)(struct input_dev *dev, int scancode, int *keycode);
+	int (*setkeycode)(struct input_dev *dev,
+			  unsigned int scancode, unsigned int keycode);
+	int (*getkeycode)(struct input_dev *dev,
+			  unsigned int scancode, unsigned int *keycode);
 
 	struct ff_device *ff;
 
@@ -1415,8 +1417,10 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min
 	dev->absbit[BIT_WORD(axis)] |= BIT_MASK(axis);
 }
 
-int input_get_keycode(struct input_dev *dev, int scancode, int *keycode);
-int input_set_keycode(struct input_dev *dev, int scancode, int keycode);
+int input_get_keycode(struct input_dev *dev,
+		      unsigned int scancode, unsigned int *keycode);
+int input_set_keycode(struct input_dev *dev,
+		      unsigned int scancode, unsigned int keycode);
 
 extern struct class input_class;
 
-- 
cgit v1.2.3


From 04e82ffb0f02e645f3dde5128ef39d425a8b3c6d Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Wed, 10 Mar 2010 11:55:05 +0900
Subject: serial: sh-sci: Fix build failure for non-sh architectures.

This patch fixes a build failure for various arm based defconfigs
[1][2][3] and maybe other architectures/configs.

The build failure was introduced by the sh specific patch [4]
"serial: sh-sci: Add DMA support"
by Guennadi Liakhovetski

Patch against linux-next of 20100309

References:
[1] http://kisskb.ellerman.id.au/kisskb/buildresult/2248992/
[2] http://kisskb.ellerman.id.au/kisskb/buildresult/2248996/
[3] http://kisskb.ellerman.id.au/kisskb/buildresult/2248998/
[4] http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git;a=commit;h=73a19e4c0301908ce6346715fd08a74308451f5a

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/serial_sci.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 1b177d29a7f0..193d4bfe42ff 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -2,7 +2,9 @@
 #define __LINUX_SERIAL_SCI_H
 
 #include <linux/serial_core.h>
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
 #include <asm/dmaengine.h>
+#endif
 
 /*
  * Generic header for SuperH SCI(F) (used by sh/sh64/h8300 and related parts)
@@ -30,8 +32,10 @@ struct plat_sci_port {
 	upf_t		flags;			/* UPF_* flags */
 	char		*clk;			/* clock string */
 	struct device	*dma_dev;
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
 	enum sh_dmae_slave_chan_id dma_slave_tx;
 	enum sh_dmae_slave_chan_id dma_slave_rx;
+#endif
 };
 
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v1.2.3


From 06a09124b5ec65f81df66c56695d9a9ae04a0114 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 9 Mar 2010 20:38:45 -0800
Subject: Input: ads7846 - add support for AD7843 parts

The AD7873 is almost identical to the ADS7846; the only difference is
related to the Power Management bits PD0 and PD1.  This results in a
slightly different PENIRQ enable behavior.  For the AD7873, VREF should
be turned off during differential measurements.

So, add the AD7873/43 to the list of driver supported devices, and prevent
VREF usage during differential/ratiometric conversion modes.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig   |  9 +++++----
 drivers/input/touchscreen/ads7846.c | 10 ++++++++++
 include/linux/spi/ads7846.h         |  2 +-
 3 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 7208654a94ae..8a8fa4d2d6a8 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -24,17 +24,18 @@ config TOUCHSCREEN_88PM860X
 	  module will be called 88pm860x-ts.
 
 config TOUCHSCREEN_ADS7846
-	tristate "ADS7846/TSC2046 and ADS7843 based touchscreens"
+	tristate "ADS7846/TSC2046/AD7873 and AD(S)7843 based touchscreens"
 	depends on SPI_MASTER
 	depends on HWMON = n || HWMON
 	help
 	  Say Y here if you have a touchscreen interface using the
-	  ADS7846/TSC2046 or ADS7843 controller, and your board-specific
-	  setup code includes that in its table of SPI devices.
+	  ADS7846/TSC2046/AD7873 or ADS7843/AD7843 controller,
+	  and your board-specific setup code includes that in its
+	  table of SPI devices.
 
 	  If HWMON is selected, and the driver is told the reference voltage
 	  on your board, you will also get hwmon interfaces for the voltage
-	  (and on ads7846/tsc2046, temperature) sensors of this chip.
+	  (and on ads7846/tsc2046/ad7873, temperature) sensors of this chip.
 
 	  If unsure, say N (but it's safe to say "Y").
 
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 8b05d8e97543..d187be05955f 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -36,6 +36,7 @@
  * TSC2046 is just newer ads7846 silicon.
  * Support for ads7843 tested on Atmel at91sam926x-EK.
  * Support for ads7845 has only been stubbed in.
+ * Support for Analog Devices AD7873 and AD7843 tested.
  *
  * IRQ handling needs a workaround because of a shortcoming in handling
  * edge triggered IRQs on some platforms like the OMAP1/2. These
@@ -984,6 +985,15 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 
 	vref = pdata->keep_vref_on;
 
+	if (ts->model == 7873) {
+		/* The AD7873 is almost identical to the ADS7846
+		 * keep VREF off during differential/ratiometric
+		 * conversion modes
+		 */
+		ts->model = 7846;
+		vref = 0;
+	}
+
 	/* set up the transfers to read touchscreen state; this assumes we
 	 * use formula #2 for pressure, not #3.
 	 */
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 51948eb6927a..5710c15d394a 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -12,7 +12,7 @@ enum ads7846_filter {
 };
 
 struct ads7846_platform_data {
-	u16	model;			/* 7843, 7845, 7846. */
+	u16	model;			/* 7843, 7845, 7846, 7873. */
 	u16	vref_delay_usecs;	/* 0 for external vref; etc */
 	u16	vref_mv;		/* external vref value, milliVolts */
 	bool	keep_vref_on;		/* set to keep vref on for differential
-- 
cgit v1.2.3


From dc1d628a67a8f042e711ea5accc0beedc3ef0092 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Mar 2010 15:55:04 +0100
Subject: perf: Provide generic perf_sample_data initialization

This makes it easier to extend perf_sample_data and fixes a bug on arm
and sparc, which failed to set ->raw to NULL, which can cause crashes
when combined with PERF_SAMPLE_RAW.

It also optimizes PowerPC and tracepoint, because the struct
initialization is forced to zero out the whole structure.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Jean Pihet <jpihet@mvista.com>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Jamie Iles <jamie.iles@picochip.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: stable@kernel.org
LKML-Reference: <20100304140100.315416040@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/kernel/perf_event.c           |  4 ++--
 arch/powerpc/kernel/perf_event.c       |  8 ++++----
 arch/sparc/kernel/perf_event.c         |  2 +-
 arch/x86/kernel/cpu/perf_event.c       |  3 +--
 arch/x86/kernel/cpu/perf_event_intel.c |  6 ++----
 include/linux/perf_event.h             |  7 +++++++
 kernel/perf_event.c                    | 21 ++++++++-------------
 7 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index c54ceb3d1f97..3875d99cc40f 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -965,7 +965,7 @@ armv6pmu_handle_irq(int irq_num,
 	 */
 	armv6_pmcr_write(pmcr);
 
-	data.addr = 0;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx <= armpmu->num_events; ++idx) {
@@ -1945,7 +1945,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	 */
 	regs = get_irq_regs();
 
-	data.addr = 0;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx <= armpmu->num_events; ++idx) {
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index b6cf8f1f4d35..5120bd44f69a 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1164,10 +1164,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	 * Finally record data if requested.
 	 */
 	if (record) {
-		struct perf_sample_data data = {
-			.addr	= ~0ULL,
-			.period	= event->hw.last_period,
-		};
+		struct perf_sample_data data;
+
+		perf_sample_data_init(&data, ~0ULL);
+		data.period = event->hw.last_period;
 
 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 9f2b2bac8b2b..6504208f375f 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1189,7 +1189,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 
 	regs = args->regs;
 
-	data.addr = 0;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 97cddbf32936..42aafd11e170 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1097,8 +1097,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 73102df8bfc1..44b60c852107 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -590,10 +590,9 @@ static void intel_pmu_drain_bts_buffer(void)
 
 	ds->bts_index = ds->bts_buffer_base;
 
+	perf_sample_data_init(&data, 0);
 
 	data.period	= event->hw.last_period;
-	data.addr	= 0;
-	data.raw	= NULL;
 	regs.ip		= 0;
 
 	/*
@@ -742,8 +741,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	int bit, loops;
 	u64 ack, status;
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 90e0521b1690..6f8cd7da1a01 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -801,6 +801,13 @@ struct perf_sample_data {
 	struct perf_raw_record		*raw;
 };
 
+static inline
+void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
+{
+	data->addr = addr;
+	data->raw  = NULL;
+}
+
 extern void perf_output_sample(struct perf_output_handle *handle,
 			       struct perf_event_header *header,
 			       struct perf_sample_data *data,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e68745053013..4393b9e73740 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4108,8 +4108,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 	if (rctx < 0)
 		return;
 
-	data.addr = addr;
-	data.raw  = NULL;
+	perf_sample_data_init(&data, addr);
 
 	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
 
@@ -4154,11 +4153,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 	struct perf_event *event;
 	u64 period;
 
-	event	= container_of(hrtimer, struct perf_event, hw.hrtimer);
+	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
 	event->pmu->read(event);
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 	data.period = event->hw.last_period;
 	regs = get_irq_regs();
 	/*
@@ -4322,17 +4320,15 @@ static const struct pmu perf_ops_task_clock = {
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
 {
+	struct pt_regs *regs = get_irq_regs();
+	struct perf_sample_data data;
 	struct perf_raw_record raw = {
 		.size = entry_size,
 		.data = record,
 	};
 
-	struct perf_sample_data data = {
-		.addr = addr,
-		.raw = &raw,
-	};
-
-	struct pt_regs *regs = get_irq_regs();
+	perf_sample_data_init(&data, addr);
+	data.raw = &raw;
 
 	if (!regs)
 		regs = task_pt_regs(current);
@@ -4448,8 +4444,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
 	struct perf_sample_data sample;
 	struct pt_regs *regs = data;
 
-	sample.raw = NULL;
-	sample.addr = bp->attr.bp_addr;
+	perf_sample_data_init(&sample, bp->attr.bp_addr);
 
 	if (!perf_exclude_event(bp, regs))
 		perf_swevent_add(bp, 1, 1, &sample, regs);
-- 
cgit v1.2.3


From 3f6da3905398826d85731247e7fbcf53400c18bd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Mar 2010 13:01:18 +0100
Subject: perf: Rework and fix the arch CPU-hotplug hooks

Remove the hw_perf_event_*() hotplug hooks in favour of per PMU hotplug
notifiers. This has the advantage of reducing the static weak interface
as well as exposing all hotplug actions to the PMU.

Use this to fix x86 hotplug usage where we did things in ONLINE which
should have been done in UP_PREPARE or STARTING.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100305154128.736225361@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c       | 21 +++++++++-
 arch/sh/kernel/perf_event.c            | 20 +++++++++-
 arch/x86/kernel/cpu/perf_event.c       | 70 ++++++++++++++++++++--------------
 arch/x86/kernel/cpu/perf_event_amd.c   | 60 ++++++++++++-----------------
 arch/x86/kernel/cpu/perf_event_intel.c |  5 ++-
 include/linux/perf_event.h             | 16 ++++++++
 kernel/perf_event.c                    | 15 --------
 7 files changed, 126 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5120bd44f69a..fbe101d7505d 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1287,7 +1287,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 		irq_exit();
 }
 
-void hw_perf_event_setup(int cpu)
+static void power_pmu_setup(int cpu)
 {
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
 
@@ -1297,6 +1297,23 @@ void hw_perf_event_setup(int cpu)
 	cpuhw->mmcr[0] = MMCR0_FC;
 }
 
+static int __cpuinit
+power_pmu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		power_pmu_setup(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 int register_power_pmu(struct power_pmu *pmu)
 {
 	if (ppmu)
@@ -1314,5 +1331,7 @@ int register_power_pmu(struct power_pmu *pmu)
 		freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
+	perf_cpu_notifier(power_pmu_notifier);
+
 	return 0;
 }
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7ff0943e7a08..9f253e9cce01 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -275,13 +275,30 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	return &pmu;
 }
 
-void hw_perf_event_setup(int cpu)
+static void sh_pmu_setup(int cpu)
 {
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
 
 	memset(cpuhw, 0, sizeof(struct cpu_hw_events));
 }
 
+static int __cpuinit
+sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		sh_pmu_setup(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 void hw_perf_enable(void)
 {
 	if (!sh_pmu_initialized())
@@ -308,5 +325,6 @@ int register_sh_pmu(struct sh_pmu *pmu)
 
 	WARN_ON(pmu->num_events > MAX_HWEVENTS);
 
+	perf_cpu_notifier(sh_pmu_notifier);
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 42aafd11e170..585d5608ae6b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -157,6 +157,11 @@ struct x86_pmu {
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
 	struct event_constraint *event_constraints;
+
+	void		(*cpu_prepare)(int cpu);
+	void		(*cpu_starting)(int cpu);
+	void		(*cpu_dying)(int cpu);
+	void		(*cpu_dead)(int cpu);
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -293,7 +298,7 @@ static inline bool bts_available(void)
 	return x86_pmu.enable_bts != NULL;
 }
 
-static inline void init_debug_store_on_cpu(int cpu)
+static void init_debug_store_on_cpu(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 
@@ -305,7 +310,7 @@ static inline void init_debug_store_on_cpu(int cpu)
 		     (u32)((u64)(unsigned long)ds >> 32));
 }
 
-static inline void fini_debug_store_on_cpu(int cpu)
+static void fini_debug_store_on_cpu(int cpu)
 {
 	if (!per_cpu(cpu_hw_events, cpu).ds)
 		return;
@@ -1337,6 +1342,39 @@ undo:
 #include "perf_event_p6.c"
 #include "perf_event_intel.c"
 
+static int __cpuinit
+x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		if (x86_pmu.cpu_prepare)
+			x86_pmu.cpu_prepare(cpu);
+		break;
+
+	case CPU_STARTING:
+		if (x86_pmu.cpu_starting)
+			x86_pmu.cpu_starting(cpu);
+		break;
+
+	case CPU_DYING:
+		if (x86_pmu.cpu_dying)
+			x86_pmu.cpu_dying(cpu);
+		break;
+
+	case CPU_DEAD:
+		if (x86_pmu.cpu_dead)
+			x86_pmu.cpu_dead(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 static void __init pmu_check_apic(void)
 {
 	if (cpu_has_apic)
@@ -1415,6 +1453,8 @@ void __init init_hw_perf_events(void)
 	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
 	pr_info("... event mask:             %016Lx\n", perf_event_mask);
+
+	perf_cpu_notifier(x86_pmu_notifier);
 }
 
 static inline void x86_pmu_read(struct perf_event *event)
@@ -1674,29 +1714,3 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 
 	return entry;
 }
-
-void hw_perf_event_setup_online(int cpu)
-{
-	init_debug_store_on_cpu(cpu);
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_pmu_cpu_online(cpu);
-		break;
-	default:
-		return;
-	}
-}
-
-void hw_perf_event_setup_offline(int cpu)
-{
-	init_debug_store_on_cpu(cpu);
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_pmu_cpu_offline(cpu);
-		break;
-	default:
-		return;
-	}
-}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 8f3dbfda3c4f..014528ba7d57 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -271,28 +271,6 @@ done:
 	return &emptyconstraint;
 }
 
-static __initconst struct x86_pmu amd_pmu = {
-	.name			= "AMD",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= x86_pmu_enable_all,
-	.enable			= x86_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.eventsel		= MSR_K7_EVNTSEL0,
-	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_events		= 4,
-	.event_bits		= 48,
-	.event_mask		= (1ULL << 48) - 1,
-	.apic			= 1,
-	/* use highest bit to detect overflow */
-	.max_period		= (1ULL << 47) - 1,
-	.get_event_constraints	= amd_get_event_constraints,
-	.put_event_constraints	= amd_put_event_constraints
-};
-
 static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
 {
 	struct amd_nb *nb;
@@ -378,6 +356,31 @@ static void amd_pmu_cpu_offline(int cpu)
 	raw_spin_unlock(&amd_nb_lock);
 }
 
+static __initconst struct x86_pmu amd_pmu = {
+	.name			= "AMD",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_events		= 4,
+	.event_bits		= 48,
+	.event_mask		= (1ULL << 48) - 1,
+	.apic			= 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
+	.get_event_constraints	= amd_get_event_constraints,
+	.put_event_constraints	= amd_put_event_constraints,
+
+	.cpu_prepare		= amd_pmu_cpu_online,
+	.cpu_dead		= amd_pmu_cpu_offline,
+};
+
 static __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
@@ -390,11 +393,6 @@ static __init int amd_pmu_init(void)
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
 
-	/*
-	 * explicitly initialize the boot cpu, other cpus will get
-	 * the cpu hotplug callbacks from smp_init()
-	 */
-	amd_pmu_cpu_online(smp_processor_id());
 	return 0;
 }
 
@@ -405,12 +403,4 @@ static int amd_pmu_init(void)
 	return 0;
 }
 
-static void amd_pmu_cpu_online(int cpu)
-{
-}
-
-static void amd_pmu_cpu_offline(int cpu)
-{
-}
-
 #endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 44b60c852107..12e811a7d747 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -870,7 +870,10 @@ static __initconst struct x86_pmu intel_pmu = {
 	.max_period		= (1ULL << 31) - 1,
 	.enable_bts		= intel_pmu_enable_bts,
 	.disable_bts		= intel_pmu_disable_bts,
-	.get_event_constraints	= intel_get_event_constraints
+	.get_event_constraints	= intel_get_event_constraints,
+
+	.cpu_starting		= init_debug_store_on_cpu,
+	.cpu_dying		= fini_debug_store_on_cpu,
 };
 
 static __init int intel_pmu_init(void)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6f8cd7da1a01..80acbf3d5de1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -936,5 +936,21 @@ static inline void perf_event_disable(struct perf_event *event)		{ }
 #define perf_output_put(handle, x) \
 	perf_output_copy((handle), &(x), sizeof(x))
 
+/*
+ * This has to have a higher priority than migration_notifier in sched.c.
+ */
+#define perf_cpu_notifier(fn)					\
+do {								\
+	static struct notifier_block fn##_nb __cpuinitdata =	\
+		{ .notifier_call = fn, .priority = 20 };	\
+	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,		\
+		(void *)(unsigned long)smp_processor_id());	\
+	fn(&fn##_nb, (unsigned long)CPU_STARTING,		\
+		(void *)(unsigned long)smp_processor_id());	\
+	fn(&fn##_nb, (unsigned long)CPU_ONLINE,			\
+		(void *)(unsigned long)smp_processor_id());	\
+	register_cpu_notifier(&fn##_nb);			\
+} while (0)
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 4393b9e73740..73329dedb5ad 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -81,10 +81,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
 void __weak hw_perf_disable(void)		{ barrier(); }
 void __weak hw_perf_enable(void)		{ barrier(); }
 
-void __weak hw_perf_event_setup(int cpu)	{ barrier(); }
-void __weak hw_perf_event_setup_online(int cpu)	{ barrier(); }
-void __weak hw_perf_event_setup_offline(int cpu)	{ barrier(); }
-
 int __weak
 hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
@@ -5382,8 +5378,6 @@ static void __cpuinit perf_event_init_cpu(int cpu)
 	spin_lock(&perf_resource_lock);
 	cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
 	spin_unlock(&perf_resource_lock);
-
-	hw_perf_event_setup(cpu);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -5423,20 +5417,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 		perf_event_init_cpu(cpu);
 		break;
 
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		hw_perf_event_setup_online(cpu);
-		break;
-
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		perf_event_exit_cpu(cpu);
 		break;
 
-	case CPU_DEAD:
-		hw_perf_event_setup_offline(cpu);
-		break;
-
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 5331d7b84613b8325362dde53dc2bff2fb87d351 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 4 Mar 2010 21:15:56 +0100
Subject: perf: Introduce new perf_fetch_caller_regs() for hot regs snapshot

Events that trigger overflows by interrupting a context can
use get_irq_regs() or task_pt_regs() to retrieve the state
when the event triggered. But this is not the case for some
other class of events like trace events as tracepoints are
executed in the same context than the code that triggered
the event.

It means we need a different api to capture the regs there,
namely we need a hot snapshot to get the most important
informations for perf: the instruction pointer to get the
event origin, the frame pointer for the callchain, the code
segment for user_mode() tests (we always use __KERNEL_CS as
trace events always occur from the kernel) and the eflags
for further purposes.

v2: rename perf_save_regs to perf_fetch_caller_regs as per
Masami's suggestion.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Archs <linux-arch@vger.kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 12 ++++++++++++
 arch/x86/kernel/dumpstack.h      | 15 ++++++++++++++
 include/linux/perf_event.h       | 42 +++++++++++++++++++++++++++++++++++++++-
 kernel/perf_event.c              |  5 +++++
 4 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1d665a0b202c..c6bde7d7afdc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1707,3 +1707,15 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 
 	return entry;
 }
+
+void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
+{
+	regs->ip = ip;
+	/*
+	 * perf_arch_fetch_caller_regs adds another call, we need to increment
+	 * the skip level
+	 */
+	regs->bp = rewind_frame_pointer(skip + 1);
+	regs->cs = __KERNEL_CS;
+	local_save_flags(regs->flags);
+}
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index 4fd1420faffa..29e5f7c845b2 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -29,4 +29,19 @@ struct stack_frame {
 	struct stack_frame *next_frame;
 	unsigned long return_address;
 };
+
+static inline unsigned long rewind_frame_pointer(int n)
+{
+	struct stack_frame *frame;
+
+	get_bp(frame);
+
+#ifdef CONFIG_FRAME_POINTER
+	while (n--)
+		frame = frame->next_frame;
 #endif
+
+	return (unsigned long)frame;
+}
+
+#endif /* DUMPSTACK_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 80acbf3d5de1..70cffd052c04 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -452,6 +452,7 @@ enum perf_callchain_context {
 #include <linux/fs.h>
 #include <linux/pid_namespace.h>
 #include <linux/workqueue.h>
+#include <linux/ftrace.h>
 #include <asm/atomic.h>
 
 #define PERF_MAX_STACK_DEPTH		255
@@ -847,6 +848,44 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 		__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
+extern void
+perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+
+/*
+ * Take a snapshot of the regs. Skip ip and frame pointer to
+ * the nth caller. We only need a few of the regs:
+ * - ip for PERF_SAMPLE_IP
+ * - cs for user_mode() tests
+ * - bp for callchains
+ * - eflags, for future purposes, just in case
+ */
+static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
+{
+	unsigned long ip;
+
+	memset(regs, 0, sizeof(*regs));
+
+	switch (skip) {
+	case 1 :
+		ip = CALLER_ADDR0;
+		break;
+	case 2 :
+		ip = CALLER_ADDR1;
+		break;
+	case 3 :
+		ip = CALLER_ADDR2;
+		break;
+	case 4:
+		ip = CALLER_ADDR3;
+		break;
+	/* No need to support further for now */
+	default:
+		ip = 0;
+	}
+
+	return perf_arch_fetch_caller_regs(regs, ip, skip);
+}
+
 extern void __perf_event_mmap(struct vm_area_struct *vma);
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)
@@ -880,7 +919,8 @@ static inline bool perf_paranoid_kernel(void)
 }
 
 extern void perf_event_init(void);
-extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
+extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
+			  int entry_size, struct pt_regs *regs);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 52c69a34d697..359d7f690c2b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2786,6 +2786,11 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 	return NULL;
 }
 
+__weak
+void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
+{
+}
+
 /*
  * Output
  */
-- 
cgit v1.2.3


From c530665c31c0140b74ca7689e7f836177796e5bd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 3 Mar 2010 07:16:16 +0100
Subject: perf: Take a hot regs snapshot for trace events

We are taking a wrong regs snapshot when a trace event triggers.
Either we use get_irq_regs(), which gives us the interrupted
registers if we are in an interrupt, or we use task_pt_regs()
which gives us the state before we entered the kernel, assuming
we are lucky enough to be no kernel thread, in which case
task_pt_regs() returns the initial set of regs when the kernel
thread was started.

What we want is different. We need a hot snapshot of the regs,
so that we can get the instruction pointer to record in the
sample, the frame pointer for the callchain, and some other
things.

Let's use the new perf_fetch_caller_regs() for that.

Comparison with perf record -e lock: -R -a -f -g
Before:

        perf  [kernel]                   [k] __do_softirq
               |
               --- __do_softirq
                  |
                  |--55.16%-- __open
                  |
                   --44.84%-- __write_nocancel

After:

            perf  [kernel]           [k] perf_tp_event
               |
               --- perf_tp_event
                  |
                  |--41.07%-- lock_acquire
                  |          |
                  |          |--39.36%-- _raw_spin_lock
                  |          |          |
                  |          |          |--7.81%-- hrtimer_interrupt
                  |          |          |          smp_apic_timer_interrupt
                  |          |          |          apic_timer_interrupt

The old case was producing unreliable callchains. Now having
right frame and instruction pointers, we have the trace we
want.

Also syscalls and kprobe events already have the right regs,
let's use them instead of wasting a retrieval.

v2: Follow the rename perf_save_regs() -> perf_fetch_caller_regs()

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Archs <linux-arch@vger.kernel.org>
---
 include/linux/ftrace_event.h       | 7 +++++--
 include/trace/ftrace.h             | 6 +++++-
 kernel/perf_event.c                | 8 ++------
 kernel/trace/trace_event_profile.c | 3 ++-
 kernel/trace/trace_kprobe.c        | 5 +++--
 kernel/trace/trace_syscalls.c      | 4 ++--
 6 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 6b7c444ab8f6..ac424f18ce63 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -187,6 +187,9 @@ do {									\
 
 #ifdef CONFIG_PERF_EVENTS
 struct perf_event;
+
+DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
+
 extern int ftrace_profile_enable(int event_id);
 extern void ftrace_profile_disable(int event_id);
 extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
@@ -198,11 +201,11 @@ ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
 
 static inline void
 ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
-		       u64 count, unsigned long irq_flags)
+		       u64 count, unsigned long irq_flags, struct pt_regs *regs)
 {
 	struct trace_entry *entry = raw_data;
 
-	perf_tp_event(entry->type, addr, count, raw_data, size);
+	perf_tp_event(entry->type, addr, count, raw_data, size, regs);
 	perf_swevent_put_recursion_context(rctx);
 	local_irq_restore(irq_flags);
 }
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0804cd594803..f31bb8b9777c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -764,6 +764,7 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
+	struct pt_regs *__regs;						\
 	int __entry_size;						\
 	int __data_size;						\
 	int rctx;							\
@@ -784,8 +785,11 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 									\
 	{ assign; }							\
 									\
+	__regs = &__get_cpu_var(perf_trace_regs);			\
+	perf_fetch_caller_regs(__regs, 2);				\
+									\
 	ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr,	\
-			       __count, irq_flags);			\
+			       __count, irq_flags, __regs);		\
 }
 
 #undef DEFINE_EVENT
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 359d7f690c2b..45b4b6e55891 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4318,9 +4318,8 @@ static const struct pmu perf_ops_task_clock = {
 #ifdef CONFIG_EVENT_TRACING
 
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
-			  int entry_size)
+		   int entry_size, struct pt_regs *regs)
 {
-	struct pt_regs *regs = get_irq_regs();
 	struct perf_sample_data data;
 	struct perf_raw_record raw = {
 		.size = entry_size,
@@ -4330,12 +4329,9 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 	perf_sample_data_init(&data, addr);
 	data.raw = &raw;
 
-	if (!regs)
-		regs = task_pt_regs(current);
-
 	/* Trace events already protected against recursion */
 	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
-				&data, regs);
+			 &data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index f0d693005075..e66d21e15a0f 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -2,13 +2,14 @@
  * trace event based perf counter profiling
  *
  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
- *
+ * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
  */
 
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include "trace.h"
 
+DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
 
 static char *perf_trace_buf;
 static char *perf_trace_buf_nmi;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 505c92273b1a..f7a20a8bfb31 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1240,7 +1240,7 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
-	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
+	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
 }
 
 /* Kretprobe profile handler */
@@ -1271,7 +1271,8 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
-	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
+	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1,
+			       irq_flags, regs);
 }
 
 static int probe_profile_enable(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index cba47d7935cc..7e6e84fb7b6c 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -467,7 +467,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
+	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
 }
 
 int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -542,7 +542,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
+	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
 }
 
 int prof_sysexit_enable(struct ftrace_event_call *call)
-- 
cgit v1.2.3


From 97d5a22005f38057b4bc0d95f81cd26510268794 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 5 Mar 2010 05:35:37 +0100
Subject: perf: Drop the obsolete profile naming for trace events

Drop the obsolete "profile" naming used by perf for trace events.
Perf can now do more than simple events counting, so generalize
the API naming.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 include/linux/ftrace_event.h       |  16 ++--
 include/linux/syscalls.h           |  24 +++---
 include/trace/ftrace.h             |  38 ++++-----
 include/trace/syscall.h            |   8 +-
 kernel/perf_event.c                |   4 +-
 kernel/trace/Makefile              |   2 +-
 kernel/trace/trace_event_perf.c    | 165 +++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_event_profile.c | 165 -------------------------------------
 kernel/trace/trace_events.c        |   2 +-
 kernel/trace/trace_kprobe.c        |  28 +++----
 kernel/trace/trace_syscalls.c      |  72 ++++++++--------
 11 files changed, 262 insertions(+), 262 deletions(-)
 create mode 100644 kernel/trace/trace_event_perf.c
 delete mode 100644 kernel/trace/trace_event_profile.c

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ac424f18ce63..c0f4b364c711 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -131,12 +131,12 @@ struct ftrace_event_call {
 	void			*mod;
 	void			*data;
 
-	int			profile_count;
-	int			(*profile_enable)(struct ftrace_event_call *);
-	void			(*profile_disable)(struct ftrace_event_call *);
+	int			perf_refcount;
+	int			(*perf_event_enable)(struct ftrace_event_call *);
+	void			(*perf_event_disable)(struct ftrace_event_call *);
 };
 
-#define FTRACE_MAX_PROFILE_SIZE	2048
+#define PERF_MAX_TRACE_SIZE	2048
 
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
@@ -190,17 +190,17 @@ struct perf_event;
 
 DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 
-extern int ftrace_profile_enable(int event_id);
-extern void ftrace_profile_disable(int event_id);
+extern int perf_trace_enable(int event_id);
+extern void perf_trace_disable(int event_id);
 extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
 extern void *
-ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
+perf_trace_buf_prepare(int size, unsigned short type, int *rctxp,
 			 unsigned long *irq_flags);
 
 static inline void
-ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
+perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
 		       u64 count, unsigned long irq_flags, struct pt_regs *regs)
 {
 	struct trace_entry *entry = raw_data;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8126f239edf0..51435bcc3460 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -101,18 +101,18 @@ struct perf_event_attr;
 
 #ifdef CONFIG_PERF_EVENTS
 
-#define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
-	.profile_enable = prof_sysenter_enable,				       \
-	.profile_disable = prof_sysenter_disable,
+#define TRACE_SYS_ENTER_PERF_INIT(sname)				       \
+	.perf_event_enable = perf_sysenter_enable,			       \
+	.perf_event_disable = perf_sysenter_disable,
 
-#define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
-	.profile_enable = prof_sysexit_enable,				       \
-	.profile_disable = prof_sysexit_disable,
+#define TRACE_SYS_EXIT_PERF_INIT(sname)					       \
+	.perf_event_enable = perf_sysexit_enable,			       \
+	.perf_event_disable = perf_sysexit_disable,
 #else
-#define TRACE_SYS_ENTER_PROFILE(sname)
-#define TRACE_SYS_ENTER_PROFILE_INIT(sname)
-#define TRACE_SYS_EXIT_PROFILE(sname)
-#define TRACE_SYS_EXIT_PROFILE_INIT(sname)
+#define TRACE_SYS_ENTER_PERF(sname)
+#define TRACE_SYS_ENTER_PERF_INIT(sname)
+#define TRACE_SYS_EXIT_PERF(sname)
+#define TRACE_SYS_EXIT_PERF_INIT(sname)
 #endif /* CONFIG_PERF_EVENTS */
 
 #ifdef CONFIG_FTRACE_SYSCALLS
@@ -149,7 +149,7 @@ struct perf_event_attr;
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-		TRACE_SYS_ENTER_PROFILE_INIT(sname)			\
+		TRACE_SYS_ENTER_PERF_INIT(sname)			\
 	}
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
@@ -171,7 +171,7 @@ struct perf_event_attr;
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-		TRACE_SYS_EXIT_PROFILE_INIT(sname)			\
+		TRACE_SYS_EXIT_PERF_INIT(sname)			\
 	}
 
 #define SYSCALL_METADATA(sname, nb)				\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index f31bb8b9777c..25ab56f75d65 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -401,18 +401,18 @@ static inline notrace int ftrace_get_offsets_##call(			\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)			\
 									\
-static void ftrace_profile_##name(proto);				\
+static void perf_trace_##name(proto);					\
 									\
 static notrace int							\
-ftrace_profile_enable_##name(struct ftrace_event_call *unused)		\
+perf_trace_enable_##name(struct ftrace_event_call *unused)		\
 {									\
-	return register_trace_##name(ftrace_profile_##name);		\
+	return register_trace_##name(perf_trace_##name);		\
 }									\
 									\
 static notrace void							\
-ftrace_profile_disable_##name(struct ftrace_event_call *unused)		\
+perf_trace_disable_##name(struct ftrace_event_call *unused)		\
 {									\
-	unregister_trace_##name(ftrace_profile_##name);			\
+	unregister_trace_##name(perf_trace_##name);			\
 }
 
 #undef DEFINE_EVENT_PRINT
@@ -507,12 +507,12 @@ ftrace_profile_disable_##name(struct ftrace_event_call *unused)		\
 
 #ifdef CONFIG_PERF_EVENTS
 
-#define _TRACE_PROFILE_INIT(call)					\
-	.profile_enable = ftrace_profile_enable_##call,			\
-	.profile_disable = ftrace_profile_disable_##call,
+#define _TRACE_PERF_INIT(call)						\
+	.perf_event_enable = perf_trace_enable_##call,			\
+	.perf_event_disable = perf_trace_disable_##call,
 
 #else
-#define _TRACE_PROFILE_INIT(call)
+#define _TRACE_PERF_INIT(call)
 #endif /* CONFIG_PERF_EVENTS */
 
 #undef __entry
@@ -638,7 +638,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.print_fmt		= print_fmt_##template,			\
 	.define_fields		= ftrace_define_fields_##template,	\
-	_TRACE_PROFILE_INIT(call)					\
+	_TRACE_PERF_INIT(call)					\
 }
 
 #undef DEFINE_EVENT_PRINT
@@ -657,18 +657,18 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.print_fmt		= print_fmt_##call,			\
 	.define_fields		= ftrace_define_fields_##template,	\
-	_TRACE_PROFILE_INIT(call)					\
+	_TRACE_PERF_INIT(call)					\
 }
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
- * Define the insertion callback to profile events
+ * Define the insertion callback to perf events
  *
  * The job is very similar to ftrace_raw_event_<call> except that we don't
  * insert in the ring buffer but in a perf counter.
  *
- * static void ftrace_profile_<call>(proto)
+ * static void ftrace_perf_<call>(proto)
  * {
  *	struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
  *	struct ftrace_event_call *event_call = &event_<call>;
@@ -757,7 +757,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static notrace void							\
-ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
+perf_trace_templ_##call(struct ftrace_event_call *event_call,		\
 			    proto)					\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
@@ -774,10 +774,10 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE,		\
+	if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE,		\
 		      "profile buffer not large enough"))		\
 		return;							\
-	entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare(	\
+	entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare(	\
 		__entry_size, event_call->id, &rctx, &irq_flags);	\
 	if (!entry)							\
 		return;							\
@@ -788,17 +788,17 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 	__regs = &__get_cpu_var(perf_trace_regs);			\
 	perf_fetch_caller_regs(__regs, 2);				\
 									\
-	ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr,	\
+	perf_trace_buf_submit(entry, __entry_size, rctx, __addr,	\
 			       __count, irq_flags, __regs);		\
 }
 
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)		\
-static notrace void ftrace_profile_##call(proto)		\
+static notrace void perf_trace_##call(proto)			\
 {								\
 	struct ftrace_event_call *event_call = &event_##call;	\
 								\
-	ftrace_profile_templ_##template(event_call, args);	\
+	perf_trace_templ_##template(event_call, args);		\
 }
 
 #undef DEFINE_EVENT_PRINT
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 0387100752f0..e5e5f48dbfb3 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -47,10 +47,10 @@ enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
 
 #ifdef CONFIG_PERF_EVENTS
-int prof_sysenter_enable(struct ftrace_event_call *call);
-void prof_sysenter_disable(struct ftrace_event_call *call);
-int prof_sysexit_enable(struct ftrace_event_call *call);
-void prof_sysexit_disable(struct ftrace_event_call *call);
+int perf_sysenter_enable(struct ftrace_event_call *call);
+void perf_sysenter_disable(struct ftrace_event_call *call);
+int perf_sysexit_enable(struct ftrace_event_call *call);
+void perf_sysexit_disable(struct ftrace_event_call *call);
 #endif
 
 #endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 45b4b6e55891..c502b18594cc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4347,7 +4347,7 @@ static int perf_tp_event_match(struct perf_event *event,
 
 static void tp_perf_event_destroy(struct perf_event *event)
 {
-	ftrace_profile_disable(event->attr.config);
+	perf_trace_disable(event->attr.config);
 }
 
 static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4361,7 +4361,7 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
 			!capable(CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
 
-	if (ftrace_profile_enable(event->attr.config))
+	if (perf_trace_enable(event->attr.config))
 		return NULL;
 
 	event->destroy = tp_perf_event_destroy;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d00c6fe23f54..78edc6490038 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o
 obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 ifeq ($(CONFIG_PERF_EVENTS),y)
-obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
+obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
new file mode 100644
index 000000000000..f315b12a41d8
--- /dev/null
+++ b/kernel/trace/trace_event_perf.c
@@ -0,0 +1,165 @@
+/*
+ * trace event based perf event profiling/tracing
+ *
+ * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include "trace.h"
+
+DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
+
+static char *perf_trace_buf;
+static char *perf_trace_buf_nmi;
+
+typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ;
+
+/* Count the events in use (per event id, not per instance) */
+static int	total_ref_count;
+
+static int perf_trace_event_enable(struct ftrace_event_call *event)
+{
+	char *buf;
+	int ret = -ENOMEM;
+
+	if (event->perf_refcount++ > 0)
+		return 0;
+
+	if (!total_ref_count) {
+		buf = (char *)alloc_percpu(perf_trace_t);
+		if (!buf)
+			goto fail_buf;
+
+		rcu_assign_pointer(perf_trace_buf, buf);
+
+		buf = (char *)alloc_percpu(perf_trace_t);
+		if (!buf)
+			goto fail_buf_nmi;
+
+		rcu_assign_pointer(perf_trace_buf_nmi, buf);
+	}
+
+	ret = event->perf_event_enable(event);
+	if (!ret) {
+		total_ref_count++;
+		return 0;
+	}
+
+fail_buf_nmi:
+	if (!total_ref_count) {
+		free_percpu(perf_trace_buf_nmi);
+		free_percpu(perf_trace_buf);
+		perf_trace_buf_nmi = NULL;
+		perf_trace_buf = NULL;
+	}
+fail_buf:
+	event->perf_refcount--;
+
+	return ret;
+}
+
+int perf_trace_enable(int event_id)
+{
+	struct ftrace_event_call *event;
+	int ret = -EINVAL;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(event, &ftrace_events, list) {
+		if (event->id == event_id && event->perf_event_enable &&
+		    try_module_get(event->mod)) {
+			ret = perf_trace_event_enable(event);
+			break;
+		}
+	}
+	mutex_unlock(&event_mutex);
+
+	return ret;
+}
+
+static void perf_trace_event_disable(struct ftrace_event_call *event)
+{
+	char *buf, *nmi_buf;
+
+	if (--event->perf_refcount > 0)
+		return;
+
+	event->perf_event_disable(event);
+
+	if (!--total_ref_count) {
+		buf = perf_trace_buf;
+		rcu_assign_pointer(perf_trace_buf, NULL);
+
+		nmi_buf = perf_trace_buf_nmi;
+		rcu_assign_pointer(perf_trace_buf_nmi, NULL);
+
+		/*
+		 * Ensure every events in profiling have finished before
+		 * releasing the buffers
+		 */
+		synchronize_sched();
+
+		free_percpu(buf);
+		free_percpu(nmi_buf);
+	}
+}
+
+void perf_trace_disable(int event_id)
+{
+	struct ftrace_event_call *event;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(event, &ftrace_events, list) {
+		if (event->id == event_id) {
+			perf_trace_event_disable(event);
+			module_put(event->mod);
+			break;
+		}
+	}
+	mutex_unlock(&event_mutex);
+}
+
+__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
+				       int *rctxp, unsigned long *irq_flags)
+{
+	struct trace_entry *entry;
+	char *trace_buf, *raw_data;
+	int pc, cpu;
+
+	pc = preempt_count();
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(*irq_flags);
+
+	*rctxp = perf_swevent_get_recursion_context();
+	if (*rctxp < 0)
+		goto err_recursion;
+
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
+	else
+		trace_buf = rcu_dereference(perf_trace_buf);
+
+	if (!trace_buf)
+		goto err;
+
+	raw_data = per_cpu_ptr(trace_buf, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	entry = (struct trace_entry *)raw_data;
+	tracing_generic_entry_update(entry, *irq_flags, pc);
+	entry->type = type;
+
+	return raw_data;
+err:
+	perf_swevent_put_recursion_context(*rctxp);
+err_recursion:
+	local_irq_restore(*irq_flags);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
deleted file mode 100644
index e66d21e15a0f..000000000000
--- a/kernel/trace/trace_event_profile.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * trace event based perf counter profiling
- *
- * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
- * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/kprobes.h>
-#include "trace.h"
-
-DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
-
-static char *perf_trace_buf;
-static char *perf_trace_buf_nmi;
-
-typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
-
-/* Count the events in use (per event id, not per instance) */
-static int	total_profile_count;
-
-static int ftrace_profile_enable_event(struct ftrace_event_call *event)
-{
-	char *buf;
-	int ret = -ENOMEM;
-
-	if (event->profile_count++ > 0)
-		return 0;
-
-	if (!total_profile_count) {
-		buf = (char *)alloc_percpu(perf_trace_t);
-		if (!buf)
-			goto fail_buf;
-
-		rcu_assign_pointer(perf_trace_buf, buf);
-
-		buf = (char *)alloc_percpu(perf_trace_t);
-		if (!buf)
-			goto fail_buf_nmi;
-
-		rcu_assign_pointer(perf_trace_buf_nmi, buf);
-	}
-
-	ret = event->profile_enable(event);
-	if (!ret) {
-		total_profile_count++;
-		return 0;
-	}
-
-fail_buf_nmi:
-	if (!total_profile_count) {
-		free_percpu(perf_trace_buf_nmi);
-		free_percpu(perf_trace_buf);
-		perf_trace_buf_nmi = NULL;
-		perf_trace_buf = NULL;
-	}
-fail_buf:
-	event->profile_count--;
-
-	return ret;
-}
-
-int ftrace_profile_enable(int event_id)
-{
-	struct ftrace_event_call *event;
-	int ret = -EINVAL;
-
-	mutex_lock(&event_mutex);
-	list_for_each_entry(event, &ftrace_events, list) {
-		if (event->id == event_id && event->profile_enable &&
-		    try_module_get(event->mod)) {
-			ret = ftrace_profile_enable_event(event);
-			break;
-		}
-	}
-	mutex_unlock(&event_mutex);
-
-	return ret;
-}
-
-static void ftrace_profile_disable_event(struct ftrace_event_call *event)
-{
-	char *buf, *nmi_buf;
-
-	if (--event->profile_count > 0)
-		return;
-
-	event->profile_disable(event);
-
-	if (!--total_profile_count) {
-		buf = perf_trace_buf;
-		rcu_assign_pointer(perf_trace_buf, NULL);
-
-		nmi_buf = perf_trace_buf_nmi;
-		rcu_assign_pointer(perf_trace_buf_nmi, NULL);
-
-		/*
-		 * Ensure every events in profiling have finished before
-		 * releasing the buffers
-		 */
-		synchronize_sched();
-
-		free_percpu(buf);
-		free_percpu(nmi_buf);
-	}
-}
-
-void ftrace_profile_disable(int event_id)
-{
-	struct ftrace_event_call *event;
-
-	mutex_lock(&event_mutex);
-	list_for_each_entry(event, &ftrace_events, list) {
-		if (event->id == event_id) {
-			ftrace_profile_disable_event(event);
-			module_put(event->mod);
-			break;
-		}
-	}
-	mutex_unlock(&event_mutex);
-}
-
-__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
-					int *rctxp, unsigned long *irq_flags)
-{
-	struct trace_entry *entry;
-	char *trace_buf, *raw_data;
-	int pc, cpu;
-
-	pc = preempt_count();
-
-	/* Protect the per cpu buffer, begin the rcu read side */
-	local_irq_save(*irq_flags);
-
-	*rctxp = perf_swevent_get_recursion_context();
-	if (*rctxp < 0)
-		goto err_recursion;
-
-	cpu = smp_processor_id();
-
-	if (in_nmi())
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);
-	else
-		trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto err;
-
-	raw_data = per_cpu_ptr(trace_buf, cpu);
-
-	/* zero the dead bytes from align to not leak stack to user */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-
-	entry = (struct trace_entry *)raw_data;
-	tracing_generic_entry_update(entry, *irq_flags, pc);
-	entry->type = type;
-
-	return raw_data;
-err:
-	perf_swevent_put_recursion_context(*rctxp);
-err_recursion:
-	local_irq_restore(*irq_flags);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3f972ad98d04..beab8bf2f310 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -938,7 +938,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 		trace_create_file("enable", 0644, call->dir, call,
 				  enable);
 
-	if (call->id && call->profile_enable)
+	if (call->id && call->perf_event_enable)
 		trace_create_file("id", 0444, call->dir, call,
 		 		  id);
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f7a20a8bfb31..1251e367bae9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1214,7 +1214,7 @@ static int set_print_fmt(struct trace_probe *tp)
 #ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
-static __kprobes void kprobe_profile_func(struct kprobe *kp,
+static __kprobes void kprobe_perf_func(struct kprobe *kp,
 					 struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
@@ -1227,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
 	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
 		     "profile buffer not large enough"))
 		return;
 
-	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+	entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
 	if (!entry)
 		return;
 
@@ -1240,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
-	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
+	perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
 }
 
 /* Kretprobe profile handler */
-static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
 					    struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1257,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
 	__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
 		     "profile buffer not large enough"))
 		return;
 
-	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+	entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
 	if (!entry)
 		return;
 
@@ -1271,11 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
-	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1,
+	perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
 			       irq_flags, regs);
 }
 
-static int probe_profile_enable(struct ftrace_event_call *call)
+static int probe_perf_enable(struct ftrace_event_call *call)
 {
 	struct trace_probe *tp = (struct trace_probe *)call->data;
 
@@ -1287,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
 		return enable_kprobe(&tp->rp.kp);
 }
 
-static void probe_profile_disable(struct ftrace_event_call *call)
+static void probe_perf_disable(struct ftrace_event_call *call)
 {
 	struct trace_probe *tp = (struct trace_probe *)call->data;
 
@@ -1312,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 		kprobe_trace_func(kp, regs);
 #ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
-		kprobe_profile_func(kp, regs);
+		kprobe_perf_func(kp, regs);
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
@@ -1326,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 		kretprobe_trace_func(ri, regs);
 #ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
-		kretprobe_profile_func(ri, regs);
+		kretprobe_perf_func(ri, regs);
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
@@ -1359,8 +1359,8 @@ static int register_probe_event(struct trace_probe *tp)
 	call->unregfunc = probe_event_disable;
 
 #ifdef CONFIG_PERF_EVENTS
-	call->profile_enable = probe_profile_enable;
-	call->profile_disable = probe_profile_disable;
+	call->perf_event_enable = probe_perf_enable;
+	call->perf_event_disable = probe_perf_disable;
 #endif
 	call->data = tp;
 	ret = trace_add_event_call(call);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7e6e84fb7b6c..33c2a5b769dc 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -428,12 +428,12 @@ core_initcall(init_ftrace_syscalls);
 
 #ifdef CONFIG_PERF_EVENTS
 
-static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
-static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
-static int sys_prof_refcount_enter;
-static int sys_prof_refcount_exit;
+static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
+static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
+static int sys_perf_refcount_enter;
+static int sys_perf_refcount_exit;
 
-static void prof_syscall_enter(struct pt_regs *regs, long id)
+static void perf_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_enter *rec;
@@ -443,7 +443,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	int size;
 
 	syscall_nr = syscall_get_nr(current, regs);
-	if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
+	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
 		return;
 
 	sys_data = syscall_nr_to_meta(syscall_nr);
@@ -455,11 +455,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
-		      "profile buffer not large enough"))
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+		      "perf buffer not large enough"))
 		return;
 
-	rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
+	rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
 				sys_data->enter_event->id, &rctx, &flags);
 	if (!rec)
 		return;
@@ -467,10 +467,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
+	perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
 }
 
-int prof_sysenter_enable(struct ftrace_event_call *call)
+int perf_sysenter_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
@@ -478,34 +478,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
-	if (!sys_prof_refcount_enter)
-		ret = register_trace_sys_enter(prof_syscall_enter);
+	if (!sys_perf_refcount_enter)
+		ret = register_trace_sys_enter(perf_syscall_enter);
 	if (ret) {
 		pr_info("event trace: Could not activate"
 				"syscall entry trace point");
 	} else {
-		set_bit(num, enabled_prof_enter_syscalls);
-		sys_prof_refcount_enter++;
+		set_bit(num, enabled_perf_enter_syscalls);
+		sys_perf_refcount_enter++;
 	}
 	mutex_unlock(&syscall_trace_lock);
 	return ret;
 }
 
-void prof_sysenter_disable(struct ftrace_event_call *call)
+void perf_sysenter_disable(struct ftrace_event_call *call)
 {
 	int num;
 
 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
-	sys_prof_refcount_enter--;
-	clear_bit(num, enabled_prof_enter_syscalls);
-	if (!sys_prof_refcount_enter)
-		unregister_trace_sys_enter(prof_syscall_enter);
+	sys_perf_refcount_enter--;
+	clear_bit(num, enabled_perf_enter_syscalls);
+	if (!sys_perf_refcount_enter)
+		unregister_trace_sys_enter(perf_syscall_enter);
 	mutex_unlock(&syscall_trace_lock);
 }
 
-static void prof_syscall_exit(struct pt_regs *regs, long ret)
+static void perf_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_exit *rec;
@@ -515,7 +515,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	int size;
 
 	syscall_nr = syscall_get_nr(current, regs);
-	if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
+	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
 		return;
 
 	sys_data = syscall_nr_to_meta(syscall_nr);
@@ -530,11 +530,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	 * Impossible, but be paranoid with the future
 	 * How to put this check outside runtime?
 	 */
-	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
-		"exit event has grown above profile buffer size"))
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+		"exit event has grown above perf buffer size"))
 		return;
 
-	rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
+	rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
 				sys_data->exit_event->id, &rctx, &flags);
 	if (!rec)
 		return;
@@ -542,10 +542,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
+	perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
 }
 
-int prof_sysexit_enable(struct ftrace_event_call *call)
+int perf_sysexit_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
@@ -553,30 +553,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
-	if (!sys_prof_refcount_exit)
-		ret = register_trace_sys_exit(prof_syscall_exit);
+	if (!sys_perf_refcount_exit)
+		ret = register_trace_sys_exit(perf_syscall_exit);
 	if (ret) {
 		pr_info("event trace: Could not activate"
 				"syscall exit trace point");
 	} else {
-		set_bit(num, enabled_prof_exit_syscalls);
-		sys_prof_refcount_exit++;
+		set_bit(num, enabled_perf_exit_syscalls);
+		sys_perf_refcount_exit++;
 	}
 	mutex_unlock(&syscall_trace_lock);
 	return ret;
 }
 
-void prof_sysexit_disable(struct ftrace_event_call *call)
+void perf_sysexit_disable(struct ftrace_event_call *call)
 {
 	int num;
 
 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
-	sys_prof_refcount_exit--;
-	clear_bit(num, enabled_prof_exit_syscalls);
-	if (!sys_prof_refcount_exit)
-		unregister_trace_sys_exit(prof_syscall_exit);
+	sys_perf_refcount_exit--;
+	clear_bit(num, enabled_perf_exit_syscalls);
+	if (!sys_perf_refcount_exit)
+		unregister_trace_sys_exit(perf_syscall_exit);
 	mutex_unlock(&syscall_trace_lock);
 }
 
-- 
cgit v1.2.3


From fdba2bb1f2eed85085a0fe154e1acb82de3239f7 Mon Sep 17 00:00:00 2001
From: Ranjith Lohithakshan <ranjithl@ti.com>
Date: Wed, 10 Mar 2010 23:41:22 -0800
Subject: Input: ads7846 - add wakeup support

Add wakeup support to the ads7846 driver. Platforms can enable wakeup
capability by setting the wakeup flag in ads7846_platform_data. With this
patch the ads7846 driver can be used to wake the system from suspend.

Signed-off-by: Ranjith Lohithakshan <ranjithl@ti.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ads7846.c | 10 ++++++++++
 include/linux/spi/ads7846.h         |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index d187be05955f..532279cda0e4 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -822,6 +822,9 @@ static int ads7846_suspend(struct spi_device *spi, pm_message_t message)
 
 	spin_unlock_irq(&ts->lock);
 
+	if (device_may_wakeup(&ts->spi->dev))
+		enable_irq_wake(ts->spi->irq);
+
 	return 0;
 
 }
@@ -830,6 +833,9 @@ static int ads7846_resume(struct spi_device *spi)
 {
 	struct ads7846 *ts = dev_get_drvdata(&spi->dev);
 
+	if (device_may_wakeup(&ts->spi->dev))
+		disable_irq_wake(ts->spi->irq);
+
 	spin_lock_irq(&ts->lock);
 
 	ts->is_suspended = 0;
@@ -1201,6 +1207,8 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 	if (err)
 		goto err_remove_attr_group;
 
+	device_init_wakeup(&spi->dev, pdata->wakeup);
+
 	return 0;
 
  err_remove_attr_group:
@@ -1230,6 +1238,8 @@ static int __devexit ads7846_remove(struct spi_device *spi)
 {
 	struct ads7846		*ts = dev_get_drvdata(&spi->dev);
 
+	device_init_wakeup(&spi->dev, false);
+
 	ads784x_hwmon_unregister(spi, ts);
 	input_unregister_device(ts->input);
 
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 5710c15d394a..b4ae570d3c98 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -53,5 +53,6 @@ struct ads7846_platform_data {
 	int	(*filter)	(void *filter_data, int data_idx, int *val);
 	void	(*filter_cleanup)(void *filter_data);
 	void	(*wait_for_sync)(void);
+	bool	wakeup;
 };
 
-- 
cgit v1.2.3


From 85cfabbcd10f8d112feee6e2ec64ee78033b6d3c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Mar 2010 13:06:56 +0100
Subject: perf, ppc: Fix compile error due to new cpu notifiers

Fix:

  arch/powerpc/kernel/perf_event.c:1334: error: 'power_pmu_notifier' undeclared (first use in this function)
  arch/powerpc/kernel/perf_event.c:1334: error: (Each undeclared identifier is reported only once
  arch/powerpc/kernel/perf_event.c:1334: error: for each function it appears in.)
  arch/powerpc/kernel/perf_event.c:1334: error: implicit declaration of function 'power_pmu_notifier'
  arch/powerpc/kernel/perf_event.c:1334: error: implicit declaration of function 'register_cpu_notifier'

Due to commit 3f6da390 (perf: Rework and fix the arch CPU-hotplug hooks).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c | 2 +-
 include/linux/perf_event.h       | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index fbe101d7505d..08460a2e9f41 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1298,7 +1298,7 @@ static void power_pmu_setup(int cpu)
 }
 
 static int __cpuinit
-power_pmu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 70cffd052c04..95477038a72a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -453,6 +453,7 @@ enum perf_callchain_context {
 #include <linux/pid_namespace.h>
 #include <linux/workqueue.h>
 #include <linux/ftrace.h>
+#include <linux/cpu.h>
 #include <asm/atomic.h>
 
 #define PERF_MAX_STACK_DEPTH		255
-- 
cgit v1.2.3


From 80a05b9ffa7dc13f6693902dd8999a2b61a3a0d7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 Mar 2010 17:34:14 +0100
Subject: clockevents: Sanitize min_delta_ns adjustment and prevent overflows

The current logic which handles clock events programming failures can
increase min_delta_ns unlimited and even can cause overflows.

Sanitize it by:
 - prevent zero increase when min_delta_ns == 1
 - limiting min_delta_ns to a jiffie
 - bail out if the jiffie limit is hit
 - add retries stats for /proc/timer_list so we can gather data

Reported-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h |  2 ++
 kernel/time/tick-oneshot.c | 52 +++++++++++++++++++++++++++++++++++-----------
 kernel/time/timer_list.c   |  3 ++-
 3 files changed, 44 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 0cf725bdd2a1..fc53492b6ad7 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -73,6 +73,7 @@ enum clock_event_nofitiers {
  * @list:		list head for the management code
  * @mode:		operating mode assigned by the management code
  * @next_event:		local storage for the next event in oneshot mode
+ * @retries:		number of forced programming retries
  */
 struct clock_event_device {
 	const char		*name;
@@ -93,6 +94,7 @@ struct clock_event_device {
 	struct list_head	list;
 	enum clock_event_mode	mode;
 	ktime_t			next_event;
+	unsigned long		retries;
 };
 
 /*
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 0a8a213016f0..aada0e52680a 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -22,6 +22,29 @@
 
 #include "tick-internal.h"
 
+/* Limit min_delta to a jiffie */
+#define MIN_DELTA_LIMIT		(NSEC_PER_SEC / HZ)
+
+static int tick_increase_min_delta(struct clock_event_device *dev)
+{
+	/* Nothing to do if we already reached the limit */
+	if (dev->min_delta_ns >= MIN_DELTA_LIMIT)
+		return -ETIME;
+
+	if (dev->min_delta_ns < 5000)
+		dev->min_delta_ns = 5000;
+	else
+		dev->min_delta_ns += dev->min_delta_ns >> 1;
+
+	if (dev->min_delta_ns > MIN_DELTA_LIMIT)
+		dev->min_delta_ns = MIN_DELTA_LIMIT;
+
+	printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
+	       dev->name ? dev->name : "?",
+	       (unsigned long long) dev->min_delta_ns);
+	return 0;
+}
+
 /**
  * tick_program_event internal worker function
  */
@@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
 		if (!ret || !force)
 			return ret;
 
+		dev->retries++;
 		/*
-		 * We tried 2 times to program the device with the given
-		 * min_delta_ns. If that's not working then we double it
+		 * We tried 3 times to program the device with the given
+		 * min_delta_ns. If that's not working then we increase it
 		 * and emit a warning.
 		 */
 		if (++i > 2) {
 			/* Increase the min. delta and try again */
-			if (!dev->min_delta_ns)
-				dev->min_delta_ns = 5000;
-			else
-				dev->min_delta_ns += dev->min_delta_ns >> 1;
-
-			printk(KERN_WARNING
-			       "CE: %s increasing min_delta_ns to %llu nsec\n",
-			       dev->name ? dev->name : "?",
-			       (unsigned long long) dev->min_delta_ns << 1);
-
+			if (tick_increase_min_delta(dev)) {
+				/*
+				 * Get out of the loop if min_delta_ns
+				 * hit the limit already. That's
+				 * better than staying here forever.
+				 *
+				 * We clear next_event so we have a
+				 * chance that the box survives.
+				 */
+				printk(KERN_WARNING
+				       "CE: Reprogramming failure. Giving up\n");
+				dev->next_event.tv64 = KTIME_MAX;
+				return -ETIME;
+			}
 			i = 0;
 		}
 
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index bdfb8dd1050c..1a4a7dd78777 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,6 +228,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 	SEQ_printf(m, " event_handler:  ");
 	print_name_offset(m, dev->event_handler);
 	SEQ_printf(m, "\n");
+	SEQ_printf(m, " retries:        %lu\n", dev->retries);
 }
 
 static void timer_list_show_tickdevices(struct seq_file *m)
@@ -257,7 +258,7 @@ static int timer_list_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Timer List Version: v0.5\n");
+	SEQ_printf(m, "Timer List Version: v0.6\n");
 	SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
 	SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 
-- 
cgit v1.2.3


From d0ab4a4d5094e5d17b103dc5073529a04f00a469 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Wed, 10 Mar 2010 15:20:35 -0800
Subject: rtc/hctosys: only claim the RTC provided the system time if it did
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this patch /sys/class/rtc/$CONFIG_RTC_HCTOSYS_DEVICE/hctosys
contains a 1 (meaning "This rtc was used to initialize the system clock")
even if reading the time at bootup failed.

Moreover change error handling in rtc_hctosys() to use goto and so reduce
the indention level.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Paul Gortmaker <p_gortmaker@yahoo.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/hctosys.c   | 59 ++++++++++++++++++++++++++++---------------------
 drivers/rtc/rtc-sysfs.c |  5 +++--
 include/linux/rtc.h     |  6 +++++
 3 files changed, 43 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c
index 33c0e98243ee..bc90b091f195 100644
--- a/drivers/rtc/hctosys.c
+++ b/drivers/rtc/hctosys.c
@@ -22,48 +22,57 @@
  * the best guess is to add 0.5s.
  */
 
+int rtc_hctosys_ret = -ENODEV;
+
 static int __init rtc_hctosys(void)
 {
-	int err;
+	int err = -ENODEV;
 	struct rtc_time tm;
+	struct timespec tv = {
+		.tv_nsec = NSEC_PER_SEC >> 1,
+	};
 	struct rtc_device *rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
 
 	if (rtc == NULL) {
-		printk("%s: unable to open rtc device (%s)\n",
+		pr_err("%s: unable to open rtc device (%s)\n",
 			__FILE__, CONFIG_RTC_HCTOSYS_DEVICE);
-		return -ENODEV;
+		goto err_open;
 	}
 
 	err = rtc_read_time(rtc, &tm);
-	if (err == 0) {
-		err = rtc_valid_tm(&tm);
-		if (err == 0) {
-			struct timespec tv;
+	if (err) {
+		dev_err(rtc->dev.parent,
+			"hctosys: unable to read the hardware clock\n");
+		goto err_read;
 
-			tv.tv_nsec = NSEC_PER_SEC >> 1;
+	}
 
-			rtc_tm_to_time(&tm, &tv.tv_sec);
+	err = rtc_valid_tm(&tm);
+	if (err) {
+		dev_err(rtc->dev.parent,
+			"hctosys: invalid date/time\n");
+		goto err_invalid;
+	}
 
-			do_settimeofday(&tv);
+	rtc_tm_to_time(&tm, &tv.tv_sec);
 
-			dev_info(rtc->dev.parent,
-				"setting system clock to "
-				"%d-%02d-%02d %02d:%02d:%02d UTC (%u)\n",
-				tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
-				tm.tm_hour, tm.tm_min, tm.tm_sec,
-				(unsigned int) tv.tv_sec);
-		}
-		else
-			dev_err(rtc->dev.parent,
-				"hctosys: invalid date/time\n");
-	}
-	else
-		dev_err(rtc->dev.parent,
-			"hctosys: unable to read the hardware clock\n");
+	do_settimeofday(&tv);
 
+	dev_info(rtc->dev.parent,
+		"setting system clock to "
+		"%d-%02d-%02d %02d:%02d:%02d UTC (%u)\n",
+		tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec,
+		(unsigned int) tv.tv_sec);
+
+err_invalid:
+err_read:
 	rtc_class_close(rtc);
 
-	return 0;
+err_open:
+	rtc_hctosys_ret = err;
+
+	return err;
 }
 
 late_initcall(rtc_hctosys);
diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 7dd23a6fc825..380083ca572f 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -107,8 +107,9 @@ rtc_sysfs_show_hctosys(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
 #ifdef CONFIG_RTC_HCTOSYS_DEVICE
-	if (strcmp(dev_name(&to_rtc_device(dev)->dev),
-		   CONFIG_RTC_HCTOSYS_DEVICE) == 0)
+	if (rtc_hctosys_ret == 0 &&
+			strcmp(dev_name(&to_rtc_device(dev)->dev),
+				CONFIG_RTC_HCTOSYS_DEVICE) == 0)
 		return sprintf(buf, "1\n");
 	else
 #endif
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 60f88a7fb13d..14dbc83ded20 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -238,6 +238,12 @@ static inline bool is_leap_year(unsigned int year)
 	return (!(year % 4) && (year % 100)) || !(year % 400);
 }
 
+#ifdef CONFIG_RTC_HCTOSYS
+extern int rtc_hctosys_ret;
+#else
+#define rtc_hctosys_ret -ENODEV
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_RTC_H_ */
-- 
cgit v1.2.3


From 53bddb4e9f3f53df02a783751984ddeade71b085 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 10 Mar 2010 15:20:38 -0800
Subject: nommu: fix build breakage

Commit 34e55232e59f7b19050267a05ff1226e5cd122a5 ("mm: avoid false sharing
of mm_counter") added sync_mm_rss() for syncing loosely accounted rss
counters.  It's for CONFIG_MMU but sync_mm_rss is called even in NOMMU
enviroment (kerne/exit.c, fs/exec.c).  Above commit doesn't handle it
well.

This patch changes
  SPLIT_RSS_COUNTING depends on SPLIT_PTLOCKS && CONFIG_MMU

And for avoid unnecessary function calls, sync_mm_rss changed to be inlined
noop function in header file.

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Michal Simek <monstr@monstr.eu>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 6 ++++++
 include/linux/mm_types.h | 2 +-
 mm/memory.c              | 3 ---
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3899395a03de..7f693b272c4a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -971,7 +971,13 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
 		*maxrss = hiwater_rss;
 }
 
+#if defined(SPLIT_RSS_COUNTING)
 void sync_mm_rss(struct task_struct *task, struct mm_struct *mm);
+#else
+static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+}
+#endif
 
 /*
  * A callback you can register to apply pressure to ageable caches.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 048b46270aa5..b8bb9a6a1f37 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -203,7 +203,7 @@ enum {
 	NR_MM_COUNTERS
 };
 
-#if USE_SPLIT_PTLOCKS
+#if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU)
 #define SPLIT_RSS_COUNTING
 struct mm_rss_stat {
 	atomic_long_t count[NR_MM_COUNTERS];
diff --git a/mm/memory.c b/mm/memory.c
index d1153e37e9ba..3d9130bd95d0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -190,9 +190,6 @@ static void check_sync_rss_stat(struct task_struct *task)
 {
 }
 
-void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
-{
-}
 #endif
 
 /*
-- 
cgit v1.2.3


From 718a38211bf4375c0a1efad3afbc5dbaef5d33f9 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 10 Mar 2010 15:20:43 -0800
Subject: mm: introduce dump_page() and print symbolic flag names

- introduce dump_page() to print the page info for debugging some error
  condition.

- convert three mm users: bad_page(), print_bad_pte() and memory offline
  failure.

- print an extra field: the symbolic names of page->flags

Example dump_page() output:

[  157.521694] page:ffffea0000a7cba8 count:2 mapcount:1 mapping:ffff88001c901791 index:0x147
[  157.525570] page flags: 0x100000000100068(uptodate|lru|active|swapbacked)

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alex Chiang <achiang@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Mel Gorman <mel@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  2 ++
 mm/memory.c         |  8 ++----
 mm/memory_hotplug.c |  6 ++--
 mm/page_alloc.c     | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 86 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7f693b272c4a..e70f21beb4b4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1465,5 +1465,7 @@ extern void shake_page(struct page *p, int access);
 extern atomic_long_t mce_bad_pages;
 extern int soft_offline_page(struct page *page, int flags);
 
+extern void dump_page(struct page *page);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/memory.c b/mm/memory.c
index 3d9130bd95d0..5b7f2002e54b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -509,12 +509,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 		"BUG: Bad page map in process %s  pte:%08llx pmd:%08llx\n",
 		current->comm,
 		(long long)pte_val(pte), (long long)pmd_val(*pmd));
-	if (page) {
-		printk(KERN_ALERT
-		"page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
-		page, (void *)page->flags, page_count(page),
-		page_mapcount(page), page->mapping, page->index);
-	}
+	if (page)
+		dump_page(page);
 	printk(KERN_ALERT
 		"addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
 		(void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 78e34e63c7b8..be211a582930 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -688,9 +688,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 			if (page_count(page))
 				not_managed++;
 #ifdef CONFIG_DEBUG_VM
-			printk(KERN_INFO "removing from LRU failed"
-					 " %lx/%d/%lx\n",
-				pfn, page_count(page), page->flags);
+			printk(KERN_ALERT "removing pfn %lx from LRU failed\n",
+			       pfn);
+			dump_page(page);
 #endif
 		}
 	}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 78ce90dd671f..d03c946d5566 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -50,6 +50,7 @@
 #include <linux/kmemleak.h>
 #include <linux/memory.h>
 #include <trace/events/kmem.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -288,10 +289,7 @@ static void bad_page(struct page *page)
 
 	printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
 		current->comm, page_to_pfn(page));
-	printk(KERN_ALERT
-		"page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
-		page, (void *)page->flags, page_count(page),
-		page_mapcount(page), page->mapping, page->index);
+	dump_page(page);
 
 	dump_stack();
 out:
@@ -5183,3 +5181,80 @@ bool is_free_buddy_page(struct page *page)
 	return order < MAX_ORDER;
 }
 #endif
+
+static struct trace_print_flags pageflag_names[] = {
+	{1UL << PG_locked,		"locked"	},
+	{1UL << PG_error,		"error"		},
+	{1UL << PG_referenced,		"referenced"	},
+	{1UL << PG_uptodate,		"uptodate"	},
+	{1UL << PG_dirty,		"dirty"		},
+	{1UL << PG_lru,			"lru"		},
+	{1UL << PG_active,		"active"	},
+	{1UL << PG_slab,		"slab"		},
+	{1UL << PG_owner_priv_1,	"owner_priv_1"	},
+	{1UL << PG_arch_1,		"arch_1"	},
+	{1UL << PG_reserved,		"reserved"	},
+	{1UL << PG_private,		"private"	},
+	{1UL << PG_private_2,		"private_2"	},
+	{1UL << PG_writeback,		"writeback"	},
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+	{1UL << PG_head,		"head"		},
+	{1UL << PG_tail,		"tail"		},
+#else
+	{1UL << PG_compound,		"compound"	},
+#endif
+	{1UL << PG_swapcache,		"swapcache"	},
+	{1UL << PG_mappedtodisk,	"mappedtodisk"	},
+	{1UL << PG_reclaim,		"reclaim"	},
+	{1UL << PG_buddy,		"buddy"		},
+	{1UL << PG_swapbacked,		"swapbacked"	},
+	{1UL << PG_unevictable,		"unevictable"	},
+#ifdef CONFIG_MMU
+	{1UL << PG_mlocked,		"mlocked"	},
+#endif
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
+	{1UL << PG_uncached,		"uncached"	},
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+	{1UL << PG_hwpoison,		"hwpoison"	},
+#endif
+	{-1UL,				NULL		},
+};
+
+static void dump_page_flags(unsigned long flags)
+{
+	const char *delim = "";
+	unsigned long mask;
+	int i;
+
+	printk(KERN_ALERT "page flags: %#lx(", flags);
+
+	/* remove zone id */
+	flags &= (1UL << NR_PAGEFLAGS) - 1;
+
+	for (i = 0; pageflag_names[i].name && flags; i++) {
+
+		mask = pageflag_names[i].mask;
+		if ((flags & mask) != mask)
+			continue;
+
+		flags &= ~mask;
+		printk("%s%s", delim, pageflag_names[i].name);
+		delim = "|";
+	}
+
+	/* check for left over flags */
+	if (flags)
+		printk("%s%#lx", delim, flags);
+
+	printk(")\n");
+}
+
+void dump_page(struct page *page)
+{
+	printk(KERN_ALERT
+	       "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
+		page, page_count(page), page_mapcount(page),
+		page->mapping, page->index);
+	dump_page_flags(page->flags);
+}
-- 
cgit v1.2.3


From 5d0e52830e9ae09b872567f4aca3dfb5b5918079 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:21:13 -0800
Subject: Add generic sys_old_select()

Add a generic implementation of the old select() syscall, which expects
its argument in a memory block and switch all architectures over to use
it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Morris <jmorris@namei.org>
Acked-by: Andreas Schwab <schwab@linux-m68k.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/unistd.h        |  1 +
 arch/arm/kernel/calls.S              |  2 +-
 arch/arm/kernel/sys_arm.c            | 21 ---------------------
 arch/h8300/include/asm/unistd.h      |  1 +
 arch/h8300/kernel/sys_h8300.c        | 16 ----------------
 arch/h8300/kernel/syscalls.S         |  2 +-
 arch/m68k/include/asm/unistd.h       |  1 +
 arch/m68k/kernel/entry.S             |  2 +-
 arch/m68k/kernel/sys_m68k.c          | 16 ----------------
 arch/m68knommu/kernel/sys_m68k.c     | 16 ----------------
 arch/m68knommu/kernel/syscalltable.S |  2 +-
 arch/mn10300/include/asm/unistd.h    |  1 +
 arch/mn10300/kernel/entry.S          |  2 +-
 arch/mn10300/kernel/sys_mn10300.c    | 18 ------------------
 arch/s390/kernel/entry.h             |  1 -
 arch/um/sys-i386/syscalls.c          | 18 ------------------
 arch/x86/ia32/ia32entry.S            |  2 +-
 arch/x86/ia32/sys_ia32.c             | 18 ------------------
 arch/x86/include/asm/sys_ia32.h      |  2 --
 arch/x86/include/asm/syscalls.h      |  2 --
 arch/x86/include/asm/unistd_32.h     |  1 +
 arch/x86/kernel/sys_i386_32.c        | 17 -----------------
 arch/x86/kernel/syscall_table_32.S   |  2 +-
 fs/compat.c                          | 18 ++++++++++++++++++
 fs/select.c                          | 17 +++++++++++++++++
 include/linux/compat.h               |  3 +++
 include/linux/syscalls.h             |  2 ++
 27 files changed, 52 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index cf9cdaa2d4d4..e18500d305ba 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -443,6 +443,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_OLD_SELECT
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 #define __ARCH_WANT_SYS_TIME
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 9314a2d681f1..7671e9a75449 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -91,7 +91,7 @@
 		CALL(sys_settimeofday)
 /* 80 */	CALL(sys_getgroups16)
 		CALL(sys_setgroups16)
-		CALL(OBSOLETE(old_select))	/* used by libc4 */
+		CALL(OBSOLETE(sys_old_select))	/* used by libc4 */
 		CALL(sys_symlink)
 		CALL(sys_ni_syscall)		/* was sys_lstat */
 /* 85 */	CALL(sys_readlink)
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index ae4027bd01bd..e59cddedcbba 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -54,27 +54,6 @@ out:
 	return error;
 }
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls.
- */
-
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set __user *inp, *outp, *exp;
-	struct timeval __user *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 99f3c3561ecb..3bea0b3eb24f 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -348,6 +348,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index b5969db0ca10..e9a3ecf90c9c 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -60,22 +60,6 @@ out:
 	return error;
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set *inp, *outp, *exp;
-	struct timeval *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index 2d69881eda6a..fe5ae20e60c5 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -96,7 +96,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
 	.long SYMBOL_NAME(sys_settimeofday)
 	.long SYMBOL_NAME(sys_getgroups16)	/* 80 */
 	.long SYMBOL_NAME(sys_setgroups16)
-	.long SYMBOL_NAME(old_select)
+	.long SYMBOL_NAME(sys_old_select)
 	.long SYMBOL_NAME(sys_symlink)
 	.long SYMBOL_NAME(sys_lstat)
 	.long SYMBOL_NAME(sys_readlink)		/* 85 */
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index d72a71dabecb..1582c2db1c86 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -363,6 +363,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index e136b8cbe9b9..09b1f09be3a6 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -510,7 +510,7 @@ sys_call_table:
 	.long sys_settimeofday
 	.long sys_getgroups16	/* 80 */
 	.long sys_setgroups16
-	.long old_select
+	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
 	.long sys_readlink	/* 85 */
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index e3ad2d671973..03b58dd86c7a 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -80,22 +80,6 @@ out:
 	return error;
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set __user *inp, *outp, *exp;
-	struct timeval __user *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index 923dd4aab875..e0d3f13e77a8 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -61,22 +61,6 @@ out:
 	return error;
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set *inp, *outp, *exp;
-	struct timeval *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 56dd01ded148..405738351700 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -100,7 +100,7 @@ ENTRY(sys_call_table)
 	.long sys_settimeofday
 	.long sys_getgroups16	/* 80 */
 	.long sys_setgroups16
-	.long old_select
+	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
 	.long sys_readlink	/* 85 */
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index c05acb95c2a9..d13a56e99bad 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -375,6 +375,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 88e3e1c3cc21..d9ed5a15c547 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -468,7 +468,7 @@ ENTRY(sys_call_table)
 	.long sys_settimeofday
 	.long sys_getgroups16	/* 80 */
 	.long sys_setgroups16
-	.long old_select
+	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
 	.long sys_readlink	/* 85 */
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index 17cc6ce04e84..bef69d6daf15 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -32,24 +32,6 @@ asmlinkage long old_mmap(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set *inp;
-	fd_set *outp;
-	fd_set *exp;
-	struct timeval *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index e1e5e767ab56..9905a0cacf93 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -28,7 +28,6 @@ struct new_utsname;
 struct mmap_arg_struct;
 struct fadvise64_64_args;
 struct old_sigaction;
-struct sel_arg_struct;
 
 long sys_mmap2(struct mmap_arg_struct __user  *arg);
 long sys_s390_old_mmap(struct mmap_arg_struct __user *arg);
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
index 857ca0b3bdef..0e49d2a20c17 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/um/sys-i386/syscalls.c
@@ -44,24 +44,6 @@ long old_mmap_i386(struct mmap_arg_struct __user *arg)
 	return err;
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set __user *inp;
-	fd_set __user *outp;
-	fd_set __user *exp;
-	struct timeval __user *tvp;
-};
-
-long old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * The prototype on i386 is:
  *
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 53147ad85b96..34f821802c23 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -586,7 +586,7 @@ ia32_sys_call_table:
 	.quad compat_sys_settimeofday
 	.quad sys_getgroups16	/* 80 */
 	.quad sys_setgroups16
-	.quad sys32_old_select
+	.quad compat_sys_old_select
 	.quad sys_symlink
 	.quad sys_lstat
 	.quad sys_readlink		/* 85 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 422572c77923..cb80816e7a16 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -332,24 +332,6 @@ asmlinkage long sys32_alarm(unsigned int seconds)
 	return alarm_setitimer(seconds);
 }
 
-struct sel_arg_struct {
-	unsigned int n;
-	unsigned int inp;
-	unsigned int outp;
-	unsigned int exp;
-	unsigned int tvp;
-};
-
-asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
-				 compat_ptr(a.exp), compat_ptr(a.tvp));
-}
-
 asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
 			      int options)
 {
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index d5f69045c100..b26fc750e416 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -40,8 +40,6 @@ asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
 				     compat_sigset_t __user *, unsigned int);
 asmlinkage long sys32_alarm(unsigned int);
 
-struct sel_arg_struct;
-asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
 asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
 asmlinkage long sys32_sysfs(int, u32, u32);
 
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 8868b9420b0e..8406d06c118d 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -52,12 +52,10 @@ unsigned long sys_sigreturn(struct pt_regs *);
 
 /* kernel/sys_i386_32.c */
 struct mmap_arg_struct;
-struct sel_arg_struct;
 struct oldold_utsname;
 struct old_utsname;
 
 asmlinkage int old_mmap(struct mmap_arg_struct __user *);
-asmlinkage int old_select(struct sel_arg_struct __user *);
 asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
 asmlinkage int sys_uname(struct old_utsname __user *);
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 3baf379fa840..4eb2667b54ae 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -366,6 +366,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index dee1ff7cba58..345dbd19a2b3 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -58,23 +58,6 @@ out:
 	return err;
 }
 
-
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set __user *inp, *outp, *exp;
-	struct timeval __user *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 15228b5d3eb7..4d10abacecdb 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -81,7 +81,7 @@ ENTRY(sys_call_table)
 	.long sys_settimeofday
 	.long sys_getgroups16	/* 80 */
 	.long sys_setgroups16
-	.long old_select
+	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
 	.long sys_readlink	/* 85 */
diff --git a/fs/compat.c b/fs/compat.c
index 00d90c2e66f0..030602d453b7 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1795,6 +1795,24 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 	return ret;
 }
 
+struct compat_sel_arg_struct {
+	compat_ulong_t n;
+	compat_uptr_t inp;
+	compat_uptr_t outp;
+	compat_uptr_t exp;
+	compat_uptr_t tvp;
+};
+
+asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg)
+{
+	struct compat_sel_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
+				 compat_ptr(a.exp), compat_ptr(a.tvp));
+}
+
 #ifdef HAVE_SET_RESTORE_SIGMASK
 static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
diff --git a/fs/select.c b/fs/select.c
index 73715e90030f..500a669f7790 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -691,6 +691,23 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
 }
 #endif /* HAVE_SET_RESTORE_SIGMASK */
 
+#ifdef __ARCH_WANT_SYS_OLD_SELECT
+struct sel_arg_struct {
+	unsigned long n;
+	fd_set __user *inp, *outp, *exp;
+	struct timeval __user *tvp;
+};
+
+SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
+{
+	struct sel_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+}
+#endif
+
 struct poll_list {
 	struct poll_list *next;
 	int len;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index ef68119a4fd2..717c691ecd8e 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -23,6 +23,7 @@
 typedef __compat_uid32_t	compat_uid_t;
 typedef __compat_gid32_t	compat_gid_t;
 
+struct compat_sel_arg_struct;
 struct rusage;
 
 struct compat_itimerspec { 
@@ -249,6 +250,8 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
 		struct compat_timeval __user *tvp);
 
+asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg);
+
 asmlinkage long compat_sys_wait4(compat_pid_t pid,
 				 compat_uint_t __user *stat_addr, int options,
 				 struct compat_rusage __user *ru);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8126f239edf0..85a9f21fe11a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -34,6 +34,7 @@ struct pollfd;
 struct rlimit;
 struct rusage;
 struct sched_param;
+struct sel_arg_struct;
 struct semaphore;
 struct sembuf;
 struct shmid_ds;
@@ -638,6 +639,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
 				long timeout);
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
+asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
 asmlinkage long sys_epoll_create(int size);
 asmlinkage long sys_epoll_create1(int flags);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
-- 
cgit v1.2.3


From a4679373cf4ee0e7792dc56205365732b725c2c1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:21:15 -0800
Subject: Add generic sys_old_mmap()

Add a generic implementation of the old mmap() syscall, which expects its
argument in a memory block and switch all architectures over to use it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Morris <jmorris@namei.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/unistd.h             |  1 +
 arch/arm/kernel/calls.S                   |  2 +-
 arch/arm/kernel/sys_arm.c                 | 26 -----------------------
 arch/cris/arch-v10/kernel/entry.S         |  2 +-
 arch/cris/arch-v32/kernel/entry.S         |  2 +-
 arch/cris/include/asm/unistd.h            |  1 +
 arch/cris/kernel/sys_cris.c               | 18 ----------------
 arch/h8300/include/asm/unistd.h           |  1 +
 arch/h8300/kernel/sys_h8300.c             | 34 -------------------------------
 arch/h8300/kernel/syscalls.S              |  2 +-
 arch/m68k/include/asm/unistd.h            |  1 +
 arch/m68k/kernel/entry.S                  |  2 +-
 arch/m68k/kernel/sys_m68k.c               | 34 -------------------------------
 arch/m68knommu/kernel/sys_m68k.c          | 34 -------------------------------
 arch/m68knommu/kernel/syscalltable.S      |  2 +-
 arch/s390/include/asm/unistd.h            |  1 +
 arch/s390/kernel/entry.h                  |  5 ++---
 arch/s390/kernel/sys_s390.c               | 30 ++++++---------------------
 arch/s390/kernel/syscalls.S               |  2 +-
 arch/um/sys-i386/shared/sysdep/syscalls.h |  2 --
 arch/um/sys-i386/sys_call_table.S         |  2 +-
 arch/um/sys-i386/syscalls.c               | 33 ------------------------------
 arch/x86/ia32/sys_ia32.c                  |  6 +++---
 arch/x86/include/asm/sys_ia32.h           |  4 ++--
 arch/x86/include/asm/syscalls.h           |  2 --
 arch/x86/include/asm/unistd_32.h          |  1 +
 arch/x86/kernel/sys_i386_32.c             | 34 -------------------------------
 arch/x86/kernel/syscall_table_32.S        |  2 +-
 include/linux/syscalls.h                  |  3 +++
 mm/mmap.c                                 | 24 ++++++++++++++++++++++
 mm/nommu.c                                | 24 ++++++++++++++++++++++
 31 files changed, 79 insertions(+), 258 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index e18500d305ba..e6eeb2d29953 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -443,6 +443,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 7671e9a75449..37ae301cc47c 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -99,7 +99,7 @@
 		CALL(sys_swapon)
 		CALL(sys_reboot)
 		CALL(OBSOLETE(sys_old_readdir))	/* used by libc4 */
-/* 90 */	CALL(OBSOLETE(old_mmap))	/* used by libc4 */
+/* 90 */	CALL(OBSOLETE(sys_old_mmap))	/* used by libc4 */
 		CALL(sys_munmap)
 		CALL(sys_truncate)
 		CALL(sys_ftruncate)
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index e59cddedcbba..a2e0e6f2ea7f 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -28,32 +28,6 @@
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
-{
-	int error = -EFAULT;
-	struct mmap_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index c52bef39e250..0d6420d087fd 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -692,7 +692,7 @@ sys_call_table:
 	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 435b9671bd4b..1f39861eac8c 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -615,7 +615,7 @@ sys_call_table:
 	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index c17079388bb9..8cffd22623fd 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -364,6 +364,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c
index c2bbb1ac98a9..22f9d6cd947f 100644
--- a/arch/cris/kernel/sys_cris.c
+++ b/arch/cris/kernel/sys_cris.c
@@ -26,24 +26,6 @@
 #include <asm/uaccess.h>
 #include <asm/segment.h>
 
-asmlinkage unsigned long old_mmap(unsigned long __user *args)
-{        
-	unsigned long buffer[6];
-	int err = -EFAULT;
-
-	if (copy_from_user(&buffer, args, sizeof(buffer)))
-		goto out;
-
-	err = -EINVAL;
-	if (buffer[5] & ~PAGE_MASK) /* verify that offset is on page boundary */
-		goto out;
-
-	err = sys_mmap_pgoff(buffer[0], buffer[1], buffer[2], buffer[3],
-                       buffer[4], buffer[5] >> PAGE_SHIFT);
-out:
-	return err;
-}
-
 asmlinkage long
 sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
           unsigned long flags, unsigned long fd, unsigned long pgoff)
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 3bea0b3eb24f..54dab4726954 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -348,6 +348,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index e9a3ecf90c9c..1f13fd6e5309 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -26,40 +26,6 @@
 #include <asm/traps.h>
 #include <asm/unistd.h>
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
- * handle more than 4 system call parameters, so these system calls
- * used a memory block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct *arg)
-{
-	struct mmap_arg_struct a;
-	int error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
-			       a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index fe5ae20e60c5..faefaff7d43d 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -104,7 +104,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
 	.long SYMBOL_NAME(sys_swapon)
 	.long SYMBOL_NAME(sys_reboot)
 	.long SYMBOL_NAME(sys_old_readdir)
-	.long SYMBOL_NAME(old_mmap)		/* 90 */
+	.long SYMBOL_NAME(sys_old_mmap)		/* 90 */
 	.long SYMBOL_NAME(sys_munmap)
 	.long SYMBOL_NAME(sys_truncate)
 	.long SYMBOL_NAME(sys_ftruncate)
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 1582c2db1c86..d801154310ea 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -363,6 +363,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 09b1f09be3a6..2391bdff0996 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -518,7 +518,7 @@ sys_call_table:
 	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 03b58dd86c7a..7b309e7b6cef 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -46,40 +46,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
- * handle more than 4 system call parameters, so these system calls
- * used a memory block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
-{
-	struct mmap_arg_struct a;
-	int error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
-			       a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index e0d3f13e77a8..3e371cc9fd91 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -27,40 +27,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd.h>
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
- * handle more than 4 system call parameters, so these system calls
- * used a memory block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct *arg)
-{
-	struct mmap_arg_struct a;
-	int error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
-				a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 405738351700..b30b3eb197a5 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -108,7 +108,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* sys_swapon */
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 6e9f049fa823..5f0075150a65 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -392,6 +392,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 9905a0cacf93..5de54d2af0b2 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -25,12 +25,11 @@ void __init startup_init(void);
 void die(const char * str, struct pt_regs * regs, long err);
 
 struct new_utsname;
-struct mmap_arg_struct;
+struct s390_mmap_arg_struct;
 struct fadvise64_64_args;
 struct old_sigaction;
 
-long sys_mmap2(struct mmap_arg_struct __user  *arg);
-long sys_s390_old_mmap(struct mmap_arg_struct __user *arg);
+long sys_mmap2(struct s390_mmap_arg_struct __user  *arg);
 long sys_ipc(uint call, int first, unsigned long second,
 	     unsigned long third, void __user *ptr);
 long sys_s390_newuname(struct new_utsname __user *name);
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index 86a74c9c9e63..b2563509b5a9 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -33,13 +33,12 @@
 #include "entry.h"
 
 /*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux for S/390 isn't able to handle more than 5
- * system call parameters, so these system calls used a memory
- * block for parameter passing..
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
  */
 
-struct mmap_arg_struct {
+struct s390_mmap_arg_struct {
 	unsigned long addr;
 	unsigned long len;
 	unsigned long prot;
@@ -48,9 +47,9 @@ struct mmap_arg_struct {
 	unsigned long offset;
 };
 
-SYSCALL_DEFINE1(mmap2, struct mmap_arg_struct __user *, arg)
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
 {
-	struct mmap_arg_struct a;
+	struct s390_mmap_arg_struct a;
 	int error = -EFAULT;
 
 	if (copy_from_user(&a, arg, sizeof(a)))
@@ -60,23 +59,6 @@ out:
 	return error;
 }
 
-SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct __user *, arg)
-{
-	struct mmap_arg_struct a;
-	long error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 30eca070d426..2a24766567af 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -98,7 +98,7 @@ SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper)
 SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper)
 SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper)
 SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper)	/* old readdir syscall */
-SYSCALL(sys_s390_old_mmap,sys_s390_old_mmap,old32_mmap_wrapper)	/* 90 */
+SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper)		/* 90 */
 SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper)
 SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper)
 SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper)
diff --git a/arch/um/sys-i386/shared/sysdep/syscalls.h b/arch/um/sys-i386/shared/sysdep/syscalls.h
index e7787679e317..05cb796aecb5 100644
--- a/arch/um/sys-i386/shared/sysdep/syscalls.h
+++ b/arch/um/sys-i386/shared/sysdep/syscalls.h
@@ -13,8 +13,6 @@ typedef long syscall_handler_t(struct pt_regs);
  */
 extern syscall_handler_t sys_rt_sigaction;
 
-extern syscall_handler_t old_mmap_i386;
-
 extern syscall_handler_t *sys_call_table[];
 
 #define EXECUTE_SYSCALL(syscall, regs) \
diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/um/sys-i386/sys_call_table.S
index c6260dd6ebb9..de274071455d 100644
--- a/arch/um/sys-i386/sys_call_table.S
+++ b/arch/um/sys-i386/sys_call_table.S
@@ -7,7 +7,7 @@
 #define sys_vm86old sys_ni_syscall
 #define sys_vm86 sys_ni_syscall
 
-#define old_mmap old_mmap_i386
+#define old_mmap sys_old_mmap
 
 #define ptregs_fork sys_fork
 #define ptregs_execve sys_execve
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
index 0e49d2a20c17..d0aa8f125ee6 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/um/sys-i386/syscalls.c
@@ -11,39 +11,6 @@
 #include "asm/uaccess.h"
 #include "asm/unistd.h"
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/i386 didn't use to be able to handle more than
- * 4 system call parameters, so these system calls used a memory
- * block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-extern int old_mmap(unsigned long addr, unsigned long len,
-		    unsigned long prot, unsigned long flags,
-		    unsigned long fd, unsigned long offset);
-
-long old_mmap_i386(struct mmap_arg_struct __user *arg)
-{
-	struct mmap_arg_struct a;
-	int err = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	err = old_mmap(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
- out:
-	return err;
-}
-
 /*
  * The prototype on i386 is:
  *
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index cb80816e7a16..56c99f46e289 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -143,7 +143,7 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename,
  * block for parameter passing..
  */
 
-struct mmap_arg_struct {
+struct mmap_arg_struct32 {
 	unsigned int addr;
 	unsigned int len;
 	unsigned int prot;
@@ -152,9 +152,9 @@ struct mmap_arg_struct {
 	unsigned int offset;
 };
 
-asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
+asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
 {
-	struct mmap_arg_struct a;
+	struct mmap_arg_struct32 a;
 
 	if (copy_from_user(&a, arg, sizeof(a)))
 		return -EFAULT;
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index b26fc750e416..7d348d803669 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -26,8 +26,8 @@ asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
 asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
 asmlinkage long sys32_fstatat(unsigned int, char __user *,
 			      struct stat64 __user *, int);
-struct mmap_arg_struct;
-asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
+struct mmap_arg_struct32;
+asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
 asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
 
 struct sigaction32;
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 8406d06c118d..86ab6a0623fd 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -51,11 +51,9 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 unsigned long sys_sigreturn(struct pt_regs *);
 
 /* kernel/sys_i386_32.c */
-struct mmap_arg_struct;
 struct oldold_utsname;
 struct old_utsname;
 
-asmlinkage int old_mmap(struct mmap_arg_struct __user *);
 asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
 asmlinkage int sys_uname(struct old_utsname __user *);
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 4eb2667b54ae..daa65d9aae95 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -366,6 +366,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 345dbd19a2b3..7955e90c8341 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,40 +24,6 @@
 
 #include <asm/syscalls.h>
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/i386 didn't use to be able to handle more than
- * 4 system call parameters, so these system calls used a memory
- * block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
-{
-	struct mmap_arg_struct a;
-	int err = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	err = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags,
-			a.fd, a.offset >> PAGE_SHIFT);
-out:
-	return err;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 4d10abacecdb..8b3729341216 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -89,7 +89,7 @@ ENTRY(sys_call_table)
 	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 85a9f21fe11a..b60907e3b0d5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -23,6 +23,7 @@ struct kexec_segment;
 struct linux_dirent;
 struct linux_dirent64;
 struct list_head;
+struct mmap_arg_struct;
 struct msgbuf;
 struct msghdr;
 struct mmsghdr;
@@ -838,4 +839,6 @@ asmlinkage long sys_perf_event_open(
 asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long pgoff);
+asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
+
 #endif
diff --git a/mm/mmap.c b/mm/mmap.c
index f1b4448626bf..75557c639ad4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1088,6 +1088,30 @@ out:
 	return retval;
 }
 
+#ifdef __ARCH_WANT_SYS_OLD_MMAP
+struct mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
+{
+	struct mmap_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	if (a.offset & ~PAGE_MASK)
+		return -EINVAL;
+
+	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			      a.offset >> PAGE_SHIFT);
+}
+#endif /* __ARCH_WANT_SYS_OLD_MMAP */
+
 /*
  * Some shared mappigns will want the pages marked read-only
  * to track write events. If so, we'll downgrade vm_page_prot
diff --git a/mm/nommu.c b/mm/nommu.c
index b9b5cceb1b68..605ace8982a8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1428,6 +1428,30 @@ out:
 	return retval;
 }
 
+#ifdef __ARCH_WANT_SYS_OLD_MMAP
+struct mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
+{
+	struct mmap_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	if (a.offset & ~PAGE_MASK)
+		return -EINVAL;
+
+	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			      a.offset >> PAGE_SHIFT);
+}
+#endif /* __ARCH_WANT_SYS_OLD_MMAP */
+
 /*
  * split a vma into two pieces at address 'addr', a new vma is allocated either
  * for the first part or the tail.
-- 
cgit v1.2.3


From baed7fc9b580bd3fb8252ff1d9b36eaf1f86b670 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:21:18 -0800
Subject: Add generic sys_ipc wrapper

Add a generic implementation of the ipc demultiplexer syscall.  Except for
s390 and sparc64 all implementations of the sys_ipc are nearly identical.

There are slight differences in the types of the parameters, where mips
and powerpc as the only 64-bit architectures with sys_ipc use unsigned
long for the "third" argument as it gets casted to a pointer later, while
it traditionally is an "int" like most other paramters.  frv goes even
further and uses unsigned long for all parameters execept for "ptr" which
is a pointer type everywhere.  The change from int to unsigned long for
"third" and back to "int" for the others on frv should be fine due to the
in-register calling conventions for syscalls (we already had a similar
issue with the generic sys_ptrace), but I'd prefer to have the arch
maintainers looks over this in details.

Except for that h8300, m68k and m68knommu lack an impplementation of the
semtimedop sub call which this patch adds, and various architectures have
gets used - at least on i386 it seems superflous as the compat code on
x86-64 and ia64 doesn't even bother to implement it.

[akpm@linux-foundation.org: add sys_ipc to sys_ni.c]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Morris <jmorris@namei.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/unistd.h       |   1 +
 arch/arm/kernel/sys_arm.c           |  82 --------------------------
 arch/arm/kernel/sys_oabi-compat.c   |   3 -
 arch/cris/include/asm/unistd.h      |   1 +
 arch/cris/kernel/sys_cris.c         |  78 -------------------------
 arch/frv/include/asm/unistd.h       |   1 +
 arch/frv/kernel/sys_frv.c           |  89 ----------------------------
 arch/h8300/include/asm/unistd.h     |   1 +
 arch/h8300/kernel/sys_h8300.c       |  88 ----------------------------
 arch/m32r/include/asm/unistd.h      |   1 +
 arch/m32r/kernel/sys_m32r.c         |  81 --------------------------
 arch/m68k/include/asm/unistd.h      |   1 +
 arch/m68k/kernel/sys_m68k.c         |  81 --------------------------
 arch/m68knommu/kernel/sys_m68k.c    |  86 ---------------------------
 arch/mips/include/asm/unistd.h      |   1 +
 arch/mips/kernel/syscall.c          |  88 ----------------------------
 arch/mn10300/include/asm/unistd.h   |   1 +
 arch/mn10300/kernel/sys_mn10300.c   |  88 ----------------------------
 arch/powerpc/include/asm/syscalls.h |   2 -
 arch/powerpc/include/asm/unistd.h   |   1 +
 arch/powerpc/kernel/syscalls.c      |  94 ------------------------------
 arch/s390/kernel/entry.h            |   2 +-
 arch/s390/kernel/sys_s390.c         |   2 +-
 arch/s390/kernel/syscalls.S         |   2 +-
 arch/sh/include/asm/syscalls.h      |   2 -
 arch/sh/include/asm/unistd_32.h     |   1 +
 arch/sh/include/asm/unistd_64.h     |   1 +
 arch/sh/kernel/sys_sh.c             | 104 ---------------------------------
 arch/sparc/include/asm/unistd.h     |   4 +-
 arch/sparc/kernel/sys_sparc_32.c    | 113 ------------------------------------
 arch/sparc/kernel/sys_sparc_64.c    |   2 +-
 arch/sparc/kernel/systbls.h         |   2 +-
 arch/sparc/kernel/systbls_64.S      |   2 +-
 arch/um/sys-i386/syscalls.c         |  86 ---------------------------
 arch/x86/include/asm/syscalls.h     |   1 -
 arch/x86/include/asm/unistd_32.h    |   1 +
 arch/x86/kernel/sys_i386_32.c       |  85 ---------------------------
 include/linux/syscalls.h            |   2 +
 ipc/Makefile                        |   2 +-
 ipc/syscall.c                       |  99 +++++++++++++++++++++++++++++++
 kernel/sys_ni.c                     |   1 +
 41 files changed, 124 insertions(+), 1259 deletions(-)
 create mode 100644 ipc/syscall.c

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index e6eeb2d29953..dd2bf53000fe 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -448,6 +448,7 @@
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 #define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_UTIME
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index a2e0e6f2ea7f..4350f75e578c 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -28,88 +28,6 @@
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 
-#if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc(uint call, int first, int second, int third,
-		       void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
-					(const struct timespec __user *)fifth);
-
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd(first, (struct msgbuf __user *) ptr, 
-				  second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-			if (copy_from_user(&tmp,(struct ipc_kludge __user *)ptr,
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv (first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl(first, second, (struct msqid_ds __user *)ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat(first, (char __user *)ptr, second, &raddr);
-			if (ret)
-				return ret;
-			return put_user(raddr, (ulong __user *)third);
-		}
-		case 1: /* Of course, we don't support iBCS2! */
-			return -EINVAL;
-		}
-	case SHMDT: 
-		return sys_shmdt ((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-#endif
-
 /* Fork a new task - this creates a new program thread.
  * This is called indirectly via a small wrapper
  */
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index d59a0cd537f0..33ff678e32f2 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -346,9 +346,6 @@ asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops,
 	return sys_oabi_semtimedop(semid, tsops, nsops, NULL);
 }
 
-extern asmlinkage int sys_ipc(uint call, int first, int second, int third,
-			      void __user *ptr, long fifth);
-
 asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
 			    void __user *ptr, long fifth)
 {
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index 8cffd22623fd..f6fad83b3a8c 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -352,6 +352,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c
index 22f9d6cd947f..7aa036ec78ff 100644
--- a/arch/cris/kernel/sys_cris.c
+++ b/arch/cris/kernel/sys_cris.c
@@ -33,81 +33,3 @@ sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
 	/* bug(?): 8Kb pages here */
         return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly. (same as arch/i386)
- */
-
-asmlinkage int sys_ipc (uint call, int first, int second,
-			int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
-					(const struct timespec __user *)fifth);
-
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd (first, (struct msgbuf __user *) ptr, 
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-			
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr, 
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv (first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
-
-	case SHMAT: {
-                ulong raddr;
-                ret = do_shmat (first, (char __user *) ptr, second, &raddr);
-                if (ret)
-                        return ret;
-                return put_user (raddr, (ulong __user *) third);
-        }
-	case SHMDT: 
-		return sys_shmdt ((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index be6ef0f5cd42..b28da499e22a 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -354,6 +354,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 /* #define __ARCH_WANT_SYS_GETHOSTNAME */
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 /* #define __ARCH_WANT_SYS_SGETMASK */
 /* #define __ARCH_WANT_SYS_SIGNAL */
diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c
index 1d3d4c9e2521..9c4980825bbb 100644
--- a/arch/frv/kernel/sys_frv.c
+++ b/arch/frv/kernel/sys_frv.c
@@ -42,92 +42,3 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd,
 			      pgoff >> (PAGE_SHIFT - 12));
 }
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage long sys_ipc(unsigned long call,
-			unsigned long first,
-			unsigned long second,
-			unsigned long third,
-			void __user *ptr,
-			unsigned long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
-				      (const struct timespec __user *)fifth);
-
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd (first, (struct msgbuf __user *) ptr,
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv (first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat (first, (char __user *) ptr, second, &raddr);
-			if (ret)
-				return ret;
-			return put_user (raddr, (ulong __user *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			/* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
-			return do_shmat (first, (char __user *) ptr, second, (ulong *) third);
-		}
-	case SHMDT:
-		return sys_shmdt ((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 54dab4726954..50f2c5a36591 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -336,6 +336,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index 1f13fd6e5309..f9b3f44da69f 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -26,94 +26,6 @@
 #include <asm/traps.h>
 #include <asm/unistd.h>
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc (uint call, int first, int second,
-			int third, void *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			return sys_semop (first, (struct sembuf *)ptr, second);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void **) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -EINVAL;
-		}
-	if (call <= MSGCTL) 
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, (struct msgbuf *) ptr, 
-					  second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				if (!ptr)
-					return -EINVAL;
-				if (copy_from_user (&tmp,
-						    (struct ipc_kludge *)ptr,
-						    sizeof (tmp)))
-					return -EFAULT;
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-				}
-			default:
-				return sys_msgrcv (first,
-						   (struct msgbuf *) ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second,
-					   (struct msqid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-	if (call <= SHMCTL) 
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = do_shmat (first, (char *) ptr,
-						 second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong *) third);
-			}
-			}
-		case SHMDT: 
-			return sys_shmdt ((char *)ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second,
-					   (struct shmid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-
-	return -EINVAL;
-}
-
 /* sys_cacheflush -- no support.  */
 asmlinkage int
 sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index cf701c933249..76125777483c 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -339,6 +339,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_SYS_UTIME
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index d3c865c5a6ba..cf2e7279ce9b 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -76,87 +76,6 @@ asmlinkage int sys_tas(int __user *addr)
 	return oldval;
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc(uint call, int first, int second,
-		       int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second, (const struct timespec __user *)fifth);
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-		}
-
-	case MSGSND:
-		return sys_msgsnd (first, (struct msgbuf __user *) ptr,
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-			}
-		default:
-			return sys_msgrcv (first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl (first, second,
-				   (struct msqid_ds __user *) ptr);
-	case SHMAT: {
-		ulong raddr;
-
-		if (!access_ok(VERIFY_WRITE, (ulong __user *) third,
-				      sizeof(ulong)))
-			return -EFAULT;
-		ret = do_shmat (first, (char __user *) ptr, second, &raddr);
-		if (ret)
-			return ret;
-		return put_user (raddr, (ulong __user *) third);
-		}
-	case SHMDT:
-		return sys_shmdt ((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-
 asmlinkage int sys_uname(struct old_utsname __user * name)
 {
 	int err;
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index d801154310ea..60b15d0aa072 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -351,6 +351,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 7b309e7b6cef..77896692eb0a 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -46,87 +46,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc (uint call, int first, int second,
-			int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			return sys_semop (first, ptr, second);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void __user *__user *) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -ENOSYS;
-		}
-	if (call <= MSGCTL)
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, ptr, second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				if (!ptr)
-					return -EINVAL;
-				if (copy_from_user (&tmp, ptr, sizeof (tmp)))
-					return -EFAULT;
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-				}
-			default:
-				return sys_msgrcv (first, ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second, ptr);
-		default:
-			return -ENOSYS;
-		}
-	if (call <= SHMCTL)
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = do_shmat (first, ptr, second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong __user *) third);
-			}
-			}
-		case SHMDT:
-			return sys_shmdt (ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second, ptr);
-		default:
-			return -ENOSYS;
-		}
-
-	return -EINVAL;
-}
-
 /* Convert virtual (user) address VADDR to physical address PADDR */
 #define virt_to_phys_040(vaddr)						\
 ({									\
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index 3e371cc9fd91..d65e9c4c930c 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -27,92 +27,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd.h>
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc (uint call, int first, int second,
-			int third, void *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			return sys_semop (first, (struct sembuf *)ptr, second);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void **) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -EINVAL;
-		}
-	if (call <= MSGCTL) 
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, (struct msgbuf *) ptr, 
-					  second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				if (!ptr)
-					return -EINVAL;
-				if (copy_from_user (&tmp,
-						    (struct ipc_kludge *)ptr,
-						    sizeof (tmp)))
-					return -EFAULT;
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-				}
-			default:
-				return sys_msgrcv (first,
-						   (struct msgbuf *) ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second,
-					   (struct msqid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-	if (call <= SHMCTL)
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = do_shmat (first, ptr, second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong __user *) third);
-			}
-			}
-		case SHMDT:
-			return sys_shmdt (ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second, ptr);
-		default:
-			return -ENOSYS;
-		}
-
-	return -EINVAL;
-}
-
 /* sys_cacheflush -- flush (part of) the processor cache.  */
 asmlinkage int
 sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 65c679ecbe6b..97fe472095f2 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -1004,6 +1004,7 @@
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_UTIME
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 3f7f466190b4..257bf0141775 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -406,94 +406,6 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs)
 	return -EINVAL;
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, int, second,
-	unsigned long, third, void __user *, ptr, long, fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-		                      second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second,
-				      (const struct timespec __user *)fifth);
-	case SEMGET:
-		return sys_semget(first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user *__user *) ptr))
-			return -EFAULT;
-		return sys_semctl(first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
-				  second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof(tmp)))
-				return -EFAULT;
-			return sys_msgrcv(first, tmp.msgp, second,
-					  tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv(first,
-					  (struct msgbuf __user *) ptr,
-					  second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl(first, second,
-				  (struct msqid_ds __user *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			unsigned long raddr;
-			ret = do_shmat(first, (char __user *) ptr, second,
-				       &raddr);
-			if (ret)
-				return ret;
-			return put_user(raddr, (unsigned long __user *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			return do_shmat(first, (char __user *) ptr, second,
-				        (unsigned long *) third);
-		}
-	case SHMDT:
-		return sys_shmdt((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget(first, second, third);
-	case SHMCTL:
-		return sys_shmctl(first, second,
-				  (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-
 /*
  * No implemented yet ...
  */
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index d13a56e99bad..9d056f515929 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -363,6 +363,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index bef69d6daf15..815f1355fad4 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -31,91 +31,3 @@ asmlinkage long old_mmap(unsigned long addr, unsigned long len,
 		return -EINVAL;
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
 }
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage long sys_ipc(uint call, int first, int second,
-			int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second,
-				      (const struct timespec __user *)fifth);
-	case SEMGET:
-		return sys_semget(first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl(first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
-				  second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof(tmp)))
-				return -EFAULT;
-			return sys_msgrcv(first, tmp.msgp, second,
-					  tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv(first,
-					  (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl(first, second,
-				   (struct msqid_ds __user *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat(first, (char __user *) ptr, second,
-				       &raddr);
-			if (ret)
-				return ret;
-			return put_user(raddr, (ulong *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			return do_shmat(first, (char __user *) ptr, second,
-					(ulong *) third);
-		}
-	case SHMDT:
-		return sys_shmdt((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget(first, second, third);
-	case SHMCTL:
-		return sys_shmctl(first, second,
-				  (struct shmid_ds __user *) ptr);
-	default:
-		return -EINVAL;
-	}
-}
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index eb8eb400c664..23bb74e7f946 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -35,8 +35,6 @@ asmlinkage long sys_pipe2(int __user *fildes, int flags);
 asmlinkage long sys_rt_sigaction(int sig,
 		const struct sigaction __user *act,
 		struct sigaction __user *oact, size_t sigsetsize);
-asmlinkage int sys_ipc(uint call, int first, unsigned long second,
-		long third, void __user *ptr, long fifth);
 asmlinkage long ppc64_personality(unsigned long personality);
 asmlinkage int ppc_rtas(struct rtas_args __user *uargs);
 asmlinkage time_t sys64_time(time_t __user * tloc);
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index f6ca76176766..c13821fe8741 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -364,6 +364,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 3370e62e43d4..5251221e7a5a 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -42,100 +42,6 @@
 #include <asm/time.h>
 #include <asm/unistd.h>
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-int sys_ipc(uint call, int first, unsigned long second, long third,
-	    void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	ret = -ENOSYS;
-	switch (call) {
-	case SEMOP:
-		ret = sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      (unsigned)second, NULL);
-		break;
-	case SEMTIMEDOP:
-		ret = sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      (unsigned)second,
-				      (const struct timespec __user *) fifth);
-		break;
-	case SEMGET:
-		ret = sys_semget (first, (int)second, third);
-		break;
-	case SEMCTL: {
-		union semun fourth;
-
-		ret = -EINVAL;
-		if (!ptr)
-			break;
-		if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr)))
-			break;
-		ret = sys_semctl(first, (int)second, third, fourth);
-		break;
-	}
-	case MSGSND:
-		ret = sys_msgsnd(first, (struct msgbuf __user *)ptr,
-				 (size_t)second, third);
-		break;
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-
-			ret = -EINVAL;
-			if (!ptr)
-				break;
-			if ((ret = copy_from_user(&tmp,
-						(struct ipc_kludge __user *) ptr,
-						sizeof (tmp)) ? -EFAULT : 0))
-				break;
-			ret = sys_msgrcv(first, tmp.msgp, (size_t) second,
-					  tmp.msgtyp, third);
-			break;
-		}
-		default:
-			ret = sys_msgrcv (first, (struct msgbuf __user *) ptr,
-					  (size_t)second, fifth, third);
-			break;
-		}
-		break;
-	case MSGGET:
-		ret = sys_msgget((key_t)first, (int)second);
-		break;
-	case MSGCTL:
-		ret = sys_msgctl(first, (int)second,
-				  (struct msqid_ds __user *)ptr);
-		break;
-	case SHMAT: {
-		ulong raddr;
-		ret = do_shmat(first, (char __user *)ptr, (int)second, &raddr);
-		if (ret)
-			break;
-		ret = put_user(raddr, (ulong __user *) third);
-		break;
-	}
-	case SHMDT:
-		ret = sys_shmdt((char __user *)ptr);
-		break;
-	case SHMGET:
-		ret = sys_shmget(first, (size_t)second, third);
-		break;
-	case SHMCTL:
-		ret = sys_shmctl(first, (int)second,
-				 (struct shmid_ds __user *)ptr);
-		break;
-	}
-
-	return ret;
-}
-
 static inline unsigned long do_mmap2(unsigned long addr, size_t len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long off, int shift)
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 5de54d2af0b2..15fd68b196c0 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -30,7 +30,7 @@ struct fadvise64_64_args;
 struct old_sigaction;
 
 long sys_mmap2(struct s390_mmap_arg_struct __user  *arg);
-long sys_ipc(uint call, int first, unsigned long second,
+long sys_s390_ipc(uint call, int first, unsigned long second,
 	     unsigned long third, void __user *ptr);
 long sys_s390_newuname(struct new_utsname __user *name);
 long sys_s390_personality(unsigned long personality);
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index b2563509b5a9..b8b78092ab7c 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -64,7 +64,7 @@ out:
  *
  * This is really horribly ugly.
  */
-SYSCALL_DEFINE5(ipc, uint, call, int, first, unsigned long, second,
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
 		unsigned long, third, void __user *, ptr)
 {
         struct ipc_kludge tmp;
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2a24766567af..990ac8b321c8 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -125,7 +125,7 @@ NI_SYSCALL							/* vm86old for i386 */
 SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper)
 SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper)		/* 115 */
 SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper)
-SYSCALL(sys_ipc,sys_ipc,sys32_ipc_wrapper)
+SYSCALL(sys_s390_ipc,sys_s390_ipc,sys32_ipc_wrapper)
 SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper)
 SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn)
 SYSCALL(sys_clone,sys_clone,sys_clone_wrapper)			/* 120 */
diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h
index c1e2b8deb837..c1ce2862f7be 100644
--- a/arch/sh/include/asm/syscalls.h
+++ b/arch/sh/include/asm/syscalls.h
@@ -11,8 +11,6 @@ asmlinkage int old_mmap(unsigned long addr, unsigned long len,
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_ipc(uint call, int first, int second,
-		       int third, void __user *ptr, long fifth);
 asmlinkage int sys_uname(struct old_utsname __user *name);
 
 #ifdef CONFIG_SUPERH32
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index 365744b05269..a48f65e2e429 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -358,6 +358,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 25de158aac3a..7709b2b8f752 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -398,6 +398,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 71399cde03b5..c18cfaa67fdd 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -53,110 +53,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc(uint call, int first, int second,
-		       int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMTIMEDOP)
-		switch (call) {
-		case SEMOP:
-			return sys_semtimedop(first,
-					      (struct sembuf __user *)ptr,
-					      second, NULL);
-		case SEMTIMEDOP:
-			return sys_semtimedop(first,
-				(struct sembuf __user *)ptr, second,
-			        (const struct timespec __user *)fifth);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void __user * __user *) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -EINVAL;
-		}
-
-	if (call <= MSGCTL)
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, (struct msgbuf __user *) ptr,
-					  second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0:
-			{
-				struct ipc_kludge tmp;
-
-				if (!ptr)
-					return -EINVAL;
-
-				if (copy_from_user(&tmp,
-					(struct ipc_kludge __user *) ptr,
-						   sizeof (tmp)))
-					return -EFAULT;
-
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-			}
-			default:
-				return sys_msgrcv (first,
-						   (struct msgbuf __user *) ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second,
-					   (struct msqid_ds __user *) ptr);
-		default:
-			return -EINVAL;
-		}
-	if (call <= SHMCTL)
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = do_shmat (first, (char __user *) ptr,
-						 second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong __user *) third);
-			}
-			case 1:	/* iBCS2 emulator entry point */
-				if (!segment_eq(get_fs(), get_ds()))
-					return -EINVAL;
-				return do_shmat (first, (char __user *) ptr,
-						  second, (ulong *) third);
-			}
-		case SHMDT:
-			return sys_shmdt ((char __user *)ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second,
-					   (struct shmid_ds __user *) ptr);
-		default:
-			return -EINVAL;
-		}
-
-	return -EINVAL;
-}
-
 /* sys_cacheflush -- flush (part of) the processor cache.  */
 asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, int op)
 {
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index cb4b9bfd0d87..d0b3b01ac9d4 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -432,7 +432,9 @@
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#ifndef __32bit_syscall_numbers__
+#ifdef __32bit_syscall_numbers__
+#define __ARCH_WANT_SYS_IPC
+#else
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #endif
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 3a82e65d8db2..ee995b7dae7e 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -98,119 +98,6 @@ out:
 	return error;
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-
-asmlinkage int sys_ipc (uint call, int first, int second, int third, void __user *ptr, long fifth)
-{
-	int version, err;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			err = sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
-			goto out;
-		case SEMTIMEDOP:
-			err = sys_semtimedop (first, (struct sembuf __user *)ptr, second, (const struct timespec __user *) fifth);
-			goto out;
-		case SEMGET:
-			err = sys_semget (first, second, third);
-			goto out;
-		case SEMCTL: {
-			union semun fourth;
-			err = -EINVAL;
-			if (!ptr)
-				goto out;
-			err = -EFAULT;
-			if (get_user(fourth.__pad,
-				     (void __user * __user *)ptr))
-				goto out;
-			err = sys_semctl (first, second, third, fourth);
-			goto out;
-			}
-		default:
-			err = -ENOSYS;
-			goto out;
-		}
-	if (call <= MSGCTL) 
-		switch (call) {
-		case MSGSND:
-			err = sys_msgsnd (first, (struct msgbuf __user *) ptr, 
-					  second, third);
-			goto out;
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				err = -EINVAL;
-				if (!ptr)
-					goto out;
-				err = -EFAULT;
-				if (copy_from_user(&tmp, (struct ipc_kludge __user *) ptr, sizeof (tmp)))
-					goto out;
-				err = sys_msgrcv (first, tmp.msgp, second, tmp.msgtyp, third);
-				goto out;
-				}
-			case 1: default:
-				err = sys_msgrcv (first,
-						  (struct msgbuf __user *) ptr,
-						  second, fifth, third);
-				goto out;
-			}
-		case MSGGET:
-			err = sys_msgget ((key_t) first, second);
-			goto out;
-		case MSGCTL:
-			err = sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
-			goto out;
-		default:
-			err = -ENOSYS;
-			goto out;
-		}
-	if (call <= SHMCTL) 
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			case 0: default: {
-				ulong raddr;
-				err = do_shmat (first, (char __user *) ptr, second, &raddr);
-				if (err)
-					goto out;
-				err = -EFAULT;
-				if (put_user (raddr, (ulong __user *) third))
-					goto out;
-				err = 0;
-				goto out;
-				}
-			case 1:	/* iBCS2 emulator entry point */
-				err = -EINVAL;
-				goto out;
-			}
-		case SHMDT: 
-			err = sys_shmdt ((char __user *)ptr);
-			goto out;
-		case SHMGET:
-			err = sys_shmget (first, second, third);
-			goto out;
-		case SHMCTL:
-			err = sys_shmctl (first, second, (struct shmid_ds __user *) ptr);
-			goto out;
-		default:
-			err = -ENOSYS;
-			goto out;
-		}
-	else
-		err = -ENOSYS;
-out:
-	return err;
-}
-
 int sparc_mmap_check(unsigned long addr, unsigned long len)
 {
 	if (ARCH_SUN4C &&
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index cb1bef6f14b7..45410e939628 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -426,7 +426,7 @@ out:
  * This is really horribly ugly.
  */
 
-SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
+SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second,
 		unsigned long, third, void __user *, ptr, long, fifth)
 {
 	long err;
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index 68312fe8da74..2c331c37e748 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -10,7 +10,7 @@ struct new_utsname;
 
 extern asmlinkage unsigned long sys_getpagesize(void);
 extern asmlinkage long sparc_pipe(struct pt_regs *regs);
-extern asmlinkage long sys_ipc(unsigned int call, int first,
+extern asmlinkage long sys_sparc_ipc(unsigned int call, int first,
 			       unsigned long second,
 			       unsigned long third,
 			       void __user *ptr, long fifth);
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 17614251fb6d..30ca2b1d3a17 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -136,7 +136,7 @@ sys_call_table:
 /*200*/	.word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
 	.word sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64
 /*210*/	.word sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo
-	.word sys_ipc, sys_nis_syscall, sys_clone, sys_ioprio_get, sys_adjtimex
+	.word sys_sparc_ipc, sys_nis_syscall, sys_clone, sys_ioprio_get, sys_adjtimex
 /*220*/	.word sys_nis_syscall, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
 	.word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid, sys_setfsgid
 /*230*/	.word sys_select, sys_nis_syscall, sys_splice, sys_stime, sys_statfs64
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
index d0aa8f125ee6..70ca357393b8 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/um/sys-i386/syscalls.c
@@ -34,92 +34,6 @@ long sys_clone(unsigned long clone_flags, unsigned long newsp,
 	return ret;
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-long sys_ipc (uint call, int first, int second,
-	     int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *) ptr,
-				      second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *) ptr,
-				      second,
-				      (const struct timespec __user *) fifth);
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd (first, (struct msgbuf *) ptr,
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge *) ptr,
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-		        panic("msgrcv with version != 0");
-			return sys_msgrcv (first,
-					   (struct msgbuf *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl (first, second, (struct msqid_ds *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat (first, (char *) ptr, second, &raddr);
-			if (ret)
-				return ret;
-			return put_user (raddr, (ulong *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			return do_shmat (first, (char *) ptr, second, (ulong *) third);
-		}
-	case SHMDT:
-		return sys_shmdt ((char *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 			 struct old_sigaction __user *oact)
 {
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 86ab6a0623fd..50f6a569f0d1 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -54,7 +54,6 @@ unsigned long sys_sigreturn(struct pt_regs *);
 struct oldold_utsname;
 struct old_utsname;
 
-asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
 asmlinkage int sys_uname(struct old_utsname __user *);
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
 
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index daa65d9aae95..45e64a17b86e 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -354,6 +354,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7955e90c8341..8b5c348fdcf2 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,91 +24,6 @@
 
 #include <asm/syscalls.h>
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc(uint call, int first, int second,
-			int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
-					(const struct timespec __user *)fifth);
-
-	case SEMGET:
-		return sys_semget(first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl(first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof(tmp)))
-				return -EFAULT;
-			return sys_msgrcv(first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv(first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl(first, second, (struct msqid_ds __user *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat(first, (char __user *) ptr, second, &raddr);
-			if (ret)
-				return ret;
-			return put_user(raddr, (ulong __user *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			/* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
-			return do_shmat(first, (char __user *) ptr, second, (ulong *) third);
-		}
-	case SHMDT:
-		return sys_shmdt((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget(first, second, third);
-	case SHMCTL:
-		return sys_shmctl(first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-
 /*
  * Old cruft
  */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b60907e3b0d5..fbb61ae70e06 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -684,6 +684,8 @@ asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg);
 asmlinkage long sys_shmget(key_t key, size_t size, int flag);
 asmlinkage long sys_shmdt(char __user *shmaddr);
 asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
+asmlinkage long sys_ipc(unsigned int call, int first, int second,
+		unsigned long third, void __user *ptr, long fifth);
 
 asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr);
 asmlinkage long sys_mq_unlink(const char __user *name);
diff --git a/ipc/Makefile b/ipc/Makefile
index 4e1955ea815d..9075e172e52c 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
-obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o
+obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o syscall.o
 obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
 obj_mq-$(CONFIG_COMPAT) += compat_mq.o
 obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
diff --git a/ipc/syscall.c b/ipc/syscall.c
new file mode 100644
index 000000000000..355a3da9ec73
--- /dev/null
+++ b/ipc/syscall.c
@@ -0,0 +1,99 @@
+/*
+ * sys_ipc() is the old de-multiplexer for the SysV IPC calls.
+ *
+ * This is really horribly ugly, and new architectures should just wire up
+ * the individual syscalls instead.
+ */
+#include <linux/unistd.h>
+
+#ifdef __ARCH_WANT_SYS_IPC
+#include <linux/errno.h>
+#include <linux/ipc.h>
+#include <linux/shm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, int, second,
+		unsigned long, third, void __user *, ptr, long, fifth)
+{
+	int version, ret;
+
+	version = call >> 16; /* hack for backward compatibility */
+	call &= 0xffff;
+
+	switch (call) {
+	case SEMOP:
+		return sys_semtimedop(first, (struct sembuf __user *)ptr,
+				      second, NULL);
+	case SEMTIMEDOP:
+		return sys_semtimedop(first, (struct sembuf __user *)ptr,
+				      second,
+				      (const struct timespec __user *)fifth);
+
+	case SEMGET:
+		return sys_semget(first, second, third);
+	case SEMCTL: {
+		union semun fourth;
+		if (!ptr)
+			return -EINVAL;
+		if (get_user(fourth.__pad, (void __user * __user *) ptr))
+			return -EFAULT;
+		return sys_semctl(first, second, third, fourth);
+	}
+
+	case MSGSND:
+		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
+				  second, third);
+	case MSGRCV:
+		switch (version) {
+		case 0: {
+			struct ipc_kludge tmp;
+			if (!ptr)
+				return -EINVAL;
+
+			if (copy_from_user(&tmp,
+					   (struct ipc_kludge __user *) ptr,
+					   sizeof(tmp)))
+				return -EFAULT;
+			return sys_msgrcv(first, tmp.msgp, second,
+					   tmp.msgtyp, third);
+		}
+		default:
+			return sys_msgrcv(first,
+					   (struct msgbuf __user *) ptr,
+					   second, fifth, third);
+		}
+	case MSGGET:
+		return sys_msgget((key_t) first, second);
+	case MSGCTL:
+		return sys_msgctl(first, second, (struct msqid_ds __user *)ptr);
+
+	case SHMAT:
+		switch (version) {
+		default: {
+			unsigned long raddr;
+			ret = do_shmat(first, (char __user *)ptr,
+				       second, &raddr);
+			if (ret)
+				return ret;
+			return put_user(raddr, (unsigned long __user *) third);
+		}
+		case 1:
+			/*
+			 * This was the entry point for kernel-originating calls
+			 * from iBCS2 in 2.2 days.
+			 */
+			return -EINVAL;
+		}
+	case SHMDT:
+		return sys_shmdt((char __user *)ptr);
+	case SHMGET:
+		return sys_shmget(first, second, third);
+	case SHMCTL:
+		return sys_shmctl(first, second,
+				   (struct shmid_ds __user *) ptr);
+	default:
+		return -ENOSYS;
+	}
+}
+#endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 695384f12a7d..70f2ea758ffe 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -126,6 +126,7 @@ cond_syscall(sys_setreuid16);
 cond_syscall(sys_setuid16);
 cond_syscall(sys_vm86old);
 cond_syscall(sys_vm86);
+cond_syscall(sys_ipc);
 cond_syscall(compat_sys_ipc);
 cond_syscall(compat_sys_sysctl);
 cond_syscall(sys_flock);
-- 
cgit v1.2.3


From 5cacdb4add1b1e50fe75edc50ebbb7bddd9cf5e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:21:21 -0800
Subject: Add generic sys_olduname()

Add generic implementations of the old and really old uname system calls.
Note that sh only implements sys_olduname but not sys_oldolduname, but I'm
not going to bother with another ifdef for that special case.

m32r implemented an old uname but never wired it up, so kill it, too.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Morris <jmorris@namei.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/kernel/sys_m32r.c       | 11 --------
 arch/mips/include/asm/unistd.h    |  1 +
 arch/mips/kernel/syscall.c        | 42 -----------------------------
 arch/powerpc/include/asm/unistd.h |  1 +
 arch/powerpc/kernel/syscalls.c    | 57 ---------------------------------------
 arch/sh/include/asm/syscalls.h    |  3 ---
 arch/sh/include/asm/unistd_32.h   |  1 +
 arch/sh/include/asm/unistd_64.h   |  1 +
 arch/sh/kernel/sys_sh.c           | 11 --------
 arch/um/kernel/syscall.c          | 45 -------------------------------
 arch/x86/ia32/ia32entry.S         |  4 +--
 arch/x86/ia32/sys_ia32.c          | 52 -----------------------------------
 arch/x86/include/asm/sys_ia32.h   |  5 ----
 arch/x86/include/asm/syscalls.h   |  7 -----
 arch/x86/include/asm/unistd_32.h  |  1 +
 arch/x86/include/asm/unistd_64.h  |  1 +
 arch/x86/kernel/sys_i386_32.c     | 49 ---------------------------------
 include/linux/syscalls.h          |  4 +++
 kernel/sys.c                      | 54 +++++++++++++++++++++++++++++++++++++
 19 files changed, 66 insertions(+), 284 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index cf2e7279ce9b..0a00f467edfa 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -76,17 +76,6 @@ asmlinkage int sys_tas(int __user *addr)
 	return oldval;
 }
 
-asmlinkage int sys_uname(struct old_utsname __user * name)
-{
-	int err;
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
-	up_read(&uts_sem);
-	return err?-EFAULT:0;
-}
-
 asmlinkage int sys_cacheflush(void *addr, int bytes, int cache)
 {
 	/* This should flush more selectively ...  */
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 97fe472095f2..1b5a6648eb86 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -1014,6 +1014,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 257bf0141775..e96b1c30c7aa 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -215,48 +215,6 @@ out:
 	return error;
 }
 
-/*
- * Compacrapability ...
- */
-SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
-{
-	if (name && !copy_to_user(name, utsname(), sizeof (*name)))
-		return 0;
-	return -EFAULT;
-}
-
-/*
- * Compacrapability ...
- */
-SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
-{
-	int error;
-
-	if (!name)
-		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
-		return -EFAULT;
-
-	error = __copy_to_user(&name->sysname, &utsname()->sysname,
-			       __OLD_UTS_LEN);
-	error -= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error -= __copy_to_user(&name->nodename, &utsname()->nodename,
-				__OLD_UTS_LEN);
-	error -= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error -= __copy_to_user(&name->release, &utsname()->release,
-				__OLD_UTS_LEN);
-	error -= __put_user(0, name->release + __OLD_UTS_LEN);
-	error -= __copy_to_user(&name->version, &utsname()->version,
-				__OLD_UTS_LEN);
-	error -= __put_user(0, name->version + __OLD_UTS_LEN);
-	error -= __copy_to_user(&name->machine, &utsname()->machine,
-				__OLD_UTS_LEN);
-	error = __put_user(0, name->machine + __OLD_UTS_LEN);
-	error = error ? -EFAULT : 0;
-
-	return error;
-}
-
 SYSCALL_DEFINE1(set_thread_area, unsigned long, addr)
 {
 	struct thread_info *ti = task_thread_info(current);
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index c13821fe8741..f0a10266e7f7 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -377,6 +377,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 69d3c5d50a54..f2496f2faecc 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -116,63 +116,6 @@ long ppc64_personality(unsigned long personality)
 }
 #endif
 
-#ifdef CONFIG_PPC64
-#define OVERRIDE_MACHINE    (personality(current->personality) == PER_LINUX32)
-#else
-#define OVERRIDE_MACHINE    0
-#endif
-
-static inline int override_machine(char __user *mach)
-{
-	if (OVERRIDE_MACHINE) {
-		/* change ppc64 to ppc */
-		if (__put_user(0, mach+3) || __put_user(0, mach+4))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-int sys_uname(struct old_utsname __user *name)
-{
-	int err = 0;
-	
-	down_read(&uts_sem);
-	if (copy_to_user(name, utsname(), sizeof(*name)))
-		err = -EFAULT;
-	up_read(&uts_sem);
-	if (!err)
-		err = override_machine(name->machine);
-	return err;
-}
-
-int sys_olduname(struct oldold_utsname __user *name)
-{
-	int error;
-
-	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
-		return -EFAULT;
-  
-	down_read(&uts_sem);
-	error = __copy_to_user(&name->sysname, &utsname()->sysname,
-			       __OLD_UTS_LEN);
-	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &utsname()->release,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &utsname()->version,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &utsname()->machine,
-				__OLD_UTS_LEN);
-	error |= override_machine(name->machine);
-	up_read(&uts_sem);
-
-	return error? -EFAULT: 0;
-}
-
 long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
 		      u32 len_high, u32 len_low)
 {
diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h
index c1ce2862f7be..507725af2e54 100644
--- a/arch/sh/include/asm/syscalls.h
+++ b/arch/sh/include/asm/syscalls.h
@@ -3,15 +3,12 @@
 
 #ifdef __KERNEL__
 
-struct old_utsname;
-
 asmlinkage int old_mmap(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
 			int fd, unsigned long off);
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_uname(struct old_utsname __user *name);
 
 #ifdef CONFIG_SUPERH32
 # include "syscalls_32.h"
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index a48f65e2e429..0e7f0fc8f086 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -371,6 +371,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 7709b2b8f752..0580c33a1e04 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -411,6 +411,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index c18cfaa67fdd..81f58371613d 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -93,14 +93,3 @@ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, int op)
 	up_read(&current->mm->mmap_sem);
 	return 0;
 }
-
-asmlinkage int sys_uname(struct old_utsname __user *name)
-{
-	int err;
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof(*name));
-	up_read(&uts_sem);
-	return err?-EFAULT:0;
-}
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index cccab850c27e..4393173923f5 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -51,51 +51,6 @@ long old_mmap(unsigned long addr, unsigned long len,
 	return err;
 }
 
-long sys_uname(struct old_utsname __user * name)
-{
-	long err;
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
-	up_read(&uts_sem);
-	return err?-EFAULT:0;
-}
-
-long sys_olduname(struct oldold_utsname __user * name)
-{
-	long error;
-
-	if (!name)
-		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
-		return -EFAULT;
-
-	down_read(&uts_sem);
-
-	error = __copy_to_user(&name->sysname, &utsname()->sysname,
-			       __OLD_UTS_LEN);
-	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &utsname()->release,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &utsname()->version,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &utsname()->machine,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
-
-	up_read(&uts_sem);
-
-	error = error ? -EFAULT : 0;
-
-	return error;
-}
-
 int kernel_execve(const char *filename, char *const argv[], char *const envp[])
 {
 	mm_segment_t fs;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 34f821802c23..59b4556a5b92 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -563,7 +563,7 @@ ia32_sys_call_table:
 	.quad quiet_ni_syscall			/* old mpx syscall holder */
 	.quad sys_setpgid
 	.quad quiet_ni_syscall			/* old ulimit syscall holder */
-	.quad sys32_olduname
+	.quad sys_olduname
 	.quad sys_umask		/* 60 */
 	.quad sys_chroot
 	.quad compat_sys_ustat
@@ -613,7 +613,7 @@ ia32_sys_call_table:
 	.quad compat_sys_newstat
 	.quad compat_sys_newlstat
 	.quad compat_sys_newfstat
-	.quad sys32_uname
+	.quad sys_uname
 	.quad stub32_iopl		/* 110 */
 	.quad sys_vhangup
 	.quad quiet_ni_syscall	/* old "idle" system call */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 56c99f46e289..74c35431b7d8 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -448,58 +448,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
 	return ret;
 }
 
-asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
-{
-	char *arch = "x86_64";
-	int err;
-
-	if (!name)
-		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
-		return -EFAULT;
-
-	down_read(&uts_sem);
-
-	err = __copy_to_user(&name->sysname, &utsname()->sysname,
-			     __OLD_UTS_LEN);
-	err |= __put_user(0, name->sysname+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->nodename, &utsname()->nodename,
-			      __OLD_UTS_LEN);
-	err |= __put_user(0, name->nodename+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->release, &utsname()->release,
-			      __OLD_UTS_LEN);
-	err |= __put_user(0, name->release+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->version, &utsname()->version,
-			      __OLD_UTS_LEN);
-	err |= __put_user(0, name->version+__OLD_UTS_LEN);
-
-	if (personality(current->personality) == PER_LINUX32)
-		arch = "i686";
-
-	err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1);
-
-	up_read(&uts_sem);
-
-	err = err ? -EFAULT : 0;
-
-	return err;
-}
-
-long sys32_uname(struct old_utsname __user *name)
-{
-	int err;
-
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof(*name));
-	up_read(&uts_sem);
-	if (personality(current->personality) == PER_LINUX32)
-		err |= copy_to_user(&name->machine, "i686", 5);
-
-	return err ? -EFAULT : 0;
-}
-
 asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
 			     compat_uptr_t __user *envp, struct pt_regs *regs)
 {
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 7d348d803669..3ad421784ae7 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,11 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
 asmlinkage long sys32_personality(unsigned long);
 asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
 
-struct oldold_utsname;
-struct old_utsname;
-asmlinkage long sys32_olduname(struct oldold_utsname __user *);
-long sys32_uname(struct old_utsname __user *);
-
 asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
 			     compat_uptr_t __user *, struct pt_regs *);
 asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 47cd606c3537..5c044b43e9a7 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -50,13 +50,6 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 			     struct old_sigaction __user *);
 unsigned long sys_sigreturn(struct pt_regs *);
 
-/* kernel/sys_i386_32.c */
-struct oldold_utsname;
-struct old_utsname;
-
-asmlinkage int sys_uname(struct old_utsname __user *);
-asmlinkage int sys_olduname(struct oldold_utsname __user *);
-
 /* kernel/vm86_32.c */
 int sys_vm86old(struct vm86_struct __user *, struct pt_regs *);
 int sys_vm86(unsigned long, unsigned long, struct pt_regs *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 45e64a17b86e..beb9b5f8f8a4 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -367,6 +367,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 83e2d6dc5038..ff4307b0e81e 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -680,6 +680,7 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 8b5c348fdcf2..196552bb412c 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,55 +24,6 @@
 
 #include <asm/syscalls.h>
 
-/*
- * Old cruft
- */
-asmlinkage int sys_uname(struct old_utsname __user *name)
-{
-	int err;
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof(*name));
-	up_read(&uts_sem);
-	return err? -EFAULT:0;
-}
-
-asmlinkage int sys_olduname(struct oldold_utsname __user *name)
-{
-	int error;
-
-	if (!name)
-		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
-		return -EFAULT;
-
-	down_read(&uts_sem);
-
-	error = __copy_to_user(&name->sysname, &utsname()->sysname,
-			       __OLD_UTS_LEN);
-	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &utsname()->release,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &utsname()->version,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &utsname()->machine,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
-
-	up_read(&uts_sem);
-
-	error = error ? -EFAULT : 0;
-
-	return error;
-}
-
-
 /*
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fbb61ae70e06..44f2ad0e8825 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -31,6 +31,8 @@ struct msqid_ds;
 struct new_utsname;
 struct nfsctl_arg;
 struct __old_kernel_stat;
+struct oldold_utsname;
+struct old_utsname;
 struct pollfd;
 struct rlimit;
 struct rusage;
@@ -655,6 +657,8 @@ asmlinkage long sys_gethostname(char __user *name, int len);
 asmlinkage long sys_sethostname(char __user *name, int len);
 asmlinkage long sys_setdomainname(char __user *name, int len);
 asmlinkage long sys_newuname(struct new_utsname __user *name);
+asmlinkage long sys_uname(struct old_utsname __user *);
+asmlinkage long sys_olduname(struct oldold_utsname __user *);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
diff --git a/kernel/sys.c b/kernel/sys.c
index e483eb5530e4..8298878f4f71 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1138,6 +1138,60 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 	return errno;
 }
 
+#ifdef __ARCH_WANT_SYS_OLD_UNAME
+/*
+ * Old cruft
+ */
+SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
+{
+	int error = 0;
+
+	if (!name)
+		return -EFAULT;
+
+	down_read(&uts_sem);
+	if (copy_to_user(name, utsname(), sizeof(*name)))
+		error = -EFAULT;
+	up_read(&uts_sem);
+
+	if (!error && override_architecture(name))
+		error = -EFAULT;
+	return error;
+}
+
+SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
+{
+	int error;
+
+	if (!name)
+		return -EFAULT;
+	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
+		return -EFAULT;
+
+	down_read(&uts_sem);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       __OLD_UTS_LEN);
+	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->release, &utsname()->release,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->release + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->version, &utsname()->version,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->version + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
+	up_read(&uts_sem);
+
+	if (!error && override_architecture(name))
+		error = -EFAULT;
+	return error ? -EFAULT : 0;
+}
+#endif
+
 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
 {
 	int errno;
-- 
cgit v1.2.3


From 6e3e37a5a7c91045497553bcbd6a5faea98b21b3 Mon Sep 17 00:00:00 2001
From: André Goddard Rosa <andre.goddard@gmail.com>
Date: Wed, 10 Mar 2010 15:21:22 -0800
Subject: coredump: plug a memory leak situation on dump_seek()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After having started writing the coredump, if filesystem reports an error
anytime while writing part of the core file, we would leak a memory page
when bailing out.

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Roland McGrath <roland@redhat.com>
Cc: WANG Cong <amwang@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/coredump.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index b3c91d7cede4..8ba66a9d9022 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -16,6 +16,8 @@ static inline int dump_write(struct file *file, const void *addr, int nr)
 
 static inline int dump_seek(struct file *file, loff_t off)
 {
+	int ret = 1;
+
 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
 			return 0;
@@ -29,13 +31,15 @@ static inline int dump_seek(struct file *file, loff_t off)
 
 			if (n > PAGE_SIZE)
 				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n))
-				return 0;
+			if (!dump_write(file, buf, n)) {
+				ret = 0;
+				break;
+			}
 			off -= n;
 		}
 		free_page((unsigned long)buf);
 	}
-	return 1;
+	return ret;
 }
 
 #endif /* _LINUX_COREDUMP_H */
-- 
cgit v1.2.3


From 2468c7234b366eeb799ee0648cb58f9cba394a54 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 10 Mar 2010 15:22:03 -0800
Subject: cgroup: introduce cancel_attach()

Add cancel_attach() operation to struct cgroup_subsys.  cancel_attach()
can be used when can_attach() operation prepares something for the subsys,
but we should rollback what can_attach() operation has prepared if attach
task fails after we've succeeded in can_attach().

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt | 13 ++++++++++++-
 include/linux/cgroup.h            |  2 ++
 kernel/cgroup.c                   | 40 ++++++++++++++++++++++++++++++++-------
 3 files changed, 47 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 0b33bfe7dde9..d45082653e3d 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -536,10 +536,21 @@ returns an error, this will abort the attach operation.  If a NULL
 task is passed, then a successful result indicates that *any*
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex. If threadgroup is
+remain valid while the caller holds cgroup_mutex and it is ensured that either
+attach() or cancel_attach() will be called in future. If threadgroup is
 true, then a successful result indicates that all threads in the given
 thread's threadgroup can be moved together.
 
+void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+	       struct task_struct *task, bool threadgroup)
+(cgroup_mutex held by caller)
+
+Called when a task attach operation has failed after can_attach() has succeeded.
+A subsystem whose can_attach() has some side-effects should provide this
+function, so that the subsytem can implement a rollback. If not, not necessary.
+This will be called only about subsystems whose can_attach() operation have
+succeeded.
+
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	    struct cgroup *old_cgrp, struct task_struct *task,
 	    bool threadgroup)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c9bbcb2a75ae..d08cfe7e12e5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -428,6 +428,8 @@ struct cgroup_subsys {
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			  struct task_struct *tsk, bool threadgroup);
+	void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+			  struct task_struct *tsk, bool threadgroup);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			struct cgroup *old_cgrp, struct task_struct *tsk,
 			bool threadgroup);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4fd90e129772..be45d2f6008a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1554,7 +1554,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	int retval = 0;
-	struct cgroup_subsys *ss;
+	struct cgroup_subsys *ss, *failed_ss = NULL;
 	struct cgroup *oldcgrp;
 	struct css_set *cg;
 	struct css_set *newcg;
@@ -1568,8 +1568,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
 			retval = ss->can_attach(ss, cgrp, tsk, false);
-			if (retval)
-				return retval;
+			if (retval) {
+				/*
+				 * Remember on which subsystem the can_attach()
+				 * failed, so that we only call cancel_attach()
+				 * against the subsystems whose can_attach()
+				 * succeeded. (See below)
+				 */
+				failed_ss = ss;
+				goto out;
+			}
 		}
 	}
 
@@ -1583,14 +1591,17 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	 */
 	newcg = find_css_set(cg, cgrp);
 	put_css_set(cg);
-	if (!newcg)
-		return -ENOMEM;
+	if (!newcg) {
+		retval = -ENOMEM;
+		goto out;
+	}
 
 	task_lock(tsk);
 	if (tsk->flags & PF_EXITING) {
 		task_unlock(tsk);
 		put_css_set(newcg);
-		return -ESRCH;
+		retval = -ESRCH;
+		goto out;
 	}
 	rcu_assign_pointer(tsk->cgroups, newcg);
 	task_unlock(tsk);
@@ -1616,7 +1627,22 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	 * is no longer empty.
 	 */
 	cgroup_wakeup_rmdir_waiter(cgrp);
-	return 0;
+out:
+	if (retval) {
+		for_each_subsys(root, ss) {
+			if (ss == failed_ss)
+				/*
+				 * This subsystem was the one that failed the
+				 * can_attach() check earlier, so we don't need
+				 * to call cancel_attach() against it or any
+				 * remaining subsystems.
+				 */
+				break;
+			if (ss->cancel_attach)
+				ss->cancel_attach(ss, cgrp, tsk, false);
+		}
+	}
+	return retval;
 }
 
 /*
-- 
cgit v1.2.3


From d7b9fff711d5e8db8c844161c684017e556c38a0 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 10 Mar 2010 15:22:05 -0800
Subject: cgroup: introduce coalesce css_get() and css_put()

Current css_get() and css_put() increment/decrement css->refcnt one by
one.

This patch add a new function __css_get(), which takes "count" as a arg
and increment the css->refcnt by "count".  And this patch also add a new
arg("count") to __css_put() and change the function to decrement the
css->refcnt by "count".

These coalesce version of __css_get()/__css_put() will be used to improve
performance of memcg's moving charge feature later, where instead of
calling css_get()/css_put() repeatedly, these new functions will be used.

No change is needed for current users of css_get()/css_put().

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 12 +++++++++---
 kernel/cgroup.c        |  5 +++--
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d08cfe7e12e5..14160b5b693f 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -76,6 +76,12 @@ enum {
 	CSS_REMOVED, /* This CSS is dead */
 };
 
+/* Caller must verify that the css is not for root cgroup */
+static inline void __css_get(struct cgroup_subsys_state *css, int count)
+{
+	atomic_add(count, &css->refcnt);
+}
+
 /*
  * Call css_get() to hold a reference on the css; it can be used
  * for a reference obtained via:
@@ -87,7 +93,7 @@ static inline void css_get(struct cgroup_subsys_state *css)
 {
 	/* We don't need to reference count the root state */
 	if (!test_bit(CSS_ROOT, &css->flags))
-		atomic_inc(&css->refcnt);
+		__css_get(css, 1);
 }
 
 static inline bool css_is_removed(struct cgroup_subsys_state *css)
@@ -118,11 +124,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
  * css_get() or css_tryget()
  */
 
-extern void __css_put(struct cgroup_subsys_state *css);
+extern void __css_put(struct cgroup_subsys_state *css, int count);
 static inline void css_put(struct cgroup_subsys_state *css)
 {
 	if (!test_bit(CSS_ROOT, &css->flags))
-		__css_put(css);
+		__css_put(css, 1);
 }
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index be45d2f6008a..cace83ddbcdc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3746,12 +3746,13 @@ static void check_for_release(struct cgroup *cgrp)
 	}
 }
 
-void __css_put(struct cgroup_subsys_state *css)
+/* Caller must verify that the css is not for root cgroup */
+void __css_put(struct cgroup_subsys_state *css, int count)
 {
 	struct cgroup *cgrp = css->cgroup;
 	int val;
 	rcu_read_lock();
-	val = atomic_dec_return(&css->refcnt);
+	val = atomic_sub_return(count, &css->refcnt);
 	if (val == 1) {
 		if (notify_on_release(cgrp)) {
 			set_bit(CGRP_RELEASABLE, &cgrp->flags);
-- 
cgit v1.2.3


From aae8aab40367036931608fdaf9e2dc568b516f19 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 10 Mar 2010 15:22:07 -0800
Subject: cgroups: revamp subsys array

This patch series provides the ability for cgroup subsystems to be
compiled as modules both within and outside the kernel tree.  This is
mainly useful for classifiers and subsystems that hook into components
that are already modules.  cls_cgroup and blkio-cgroup serve as the
example use cases for this feature.

It provides an interface cgroup_load_subsys() and cgroup_unload_subsys()
which modular subsystems can use to register and depart during runtime.
The net_cls classifier subsystem serves as the example for a subsystem
which can be converted into a module using these changes.

Patch #1 sets up the subsys[] array so its contents can be dynamic as
modules appear and (eventually) disappear.  Iterations over the array are
modified to handle when subsystems are absent, and the dynamic section of
the array is protected by cgroup_mutex.

Patch #2 implements an interface for modules to load subsystems, called
cgroup_load_subsys, similar to cgroup_init_subsys, and adds a module
pointer in struct cgroup_subsys.

Patch #3 adds a mechanism for unloading modular subsystems, which includes
a more advanced rework of the rudimentary reference counting introduced in
patch 2.

Patch #4 modifies the net_cls subsystem, which already had some module
declarations, to be configurable as a module, which also serves as a
simple proof-of-concept.

Part of implementing patches 2 and 4 involved updating css pointers in
each css_set when the module appears or leaves.  In doing this, it was
discovered that css_sets always remain linked to the dummy cgroup,
regardless of whether or not any subsystems are actually bound to it
(i.e., not mounted on an actual hierarchy).  The subsystem loading and
unloading code therefore should keep in mind the special cases where the
added subsystem is the only one in the dummy cgroup (and therefore all
css_sets need to be linked back into it) and where the removed subsys was
the only one in the dummy cgroup (and therefore all css_sets should be
unlinked from it) - however, as all css_sets always stay attached to the
dummy cgroup anyway, these cases are ignored.  Any fix that addresses this
issue should also make sure these cases are addressed in the subsystem
loading and unloading code.

This patch:

Make subsys[] able to be dynamically populated to support modular
subsystems

This patch reworks the way the subsys[] array is used so that subsystems
can register themselves after boot time, and enables the internals of
cgroups to be able to handle when subsystems are not present or may
appear/disappear.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 10 ++++--
 kernel/cgroup.c        | 96 +++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 88 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 14160b5b693f..28319a9fe569 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -40,13 +40,19 @@ extern int cgroupstats_build(struct cgroupstats *stats,
 
 extern const struct file_operations proc_cgroup_operations;
 
-/* Define the enumeration of all cgroup subsystems */
+/* Define the enumeration of all builtin cgroup subsystems */
 #define SUBSYS(_x) _x ## _subsys_id,
 enum cgroup_subsys_id {
 #include <linux/cgroup_subsys.h>
-	CGROUP_SUBSYS_COUNT
+	CGROUP_BUILTIN_SUBSYS_COUNT
 };
 #undef SUBSYS
+/*
+ * This define indicates the maximum number of subsystems that can be loaded
+ * at once. We limit to this many since cgroupfs_root has subsys_bits to keep
+ * track of all of them.
+ */
+#define CGROUP_SUBSYS_COUNT (BITS_PER_BYTE*sizeof(unsigned long))
 
 /* Per-subsystem/per-cgroup state maintained by the system. */
 struct cgroup_subsys_state {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cace83ddbcdc..c92fb9549358 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,10 +57,14 @@
 
 static DEFINE_MUTEX(cgroup_mutex);
 
-/* Generate an array of cgroup subsystem pointers */
+/*
+ * Generate an array of cgroup subsystem pointers. At boot time, this is
+ * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
+ * registered after that. The mutable section of this array is protected by
+ * cgroup_mutex.
+ */
 #define SUBSYS(_x) &_x ## _subsys,
-
-static struct cgroup_subsys *subsys[] = {
+static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
 #include <linux/cgroup_subsys.h>
 };
 
@@ -448,8 +452,11 @@ static struct css_set *find_existing_css_set(
 	struct hlist_node *node;
 	struct css_set *cg;
 
-	/* Built the set of subsystem state objects that we want to
-	 * see in the new css_set */
+	/*
+	 * Build the set of subsystem state objects that we want to see in the
+	 * new css_set. while subsystems can change globally, the entries here
+	 * won't change, so no need for locking.
+	 */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		if (root->subsys_bits & (1UL << i)) {
 			/* Subsystem is in this hierarchy. So we want
@@ -884,7 +891,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
 	css_put(css);
 }
 
-
+/*
+ * Call with cgroup_mutex held.
+ */
 static int rebind_subsystems(struct cgroupfs_root *root,
 			      unsigned long final_bits)
 {
@@ -892,6 +901,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	struct cgroup *cgrp = &root->top_cgroup;
 	int i;
 
+	BUG_ON(!mutex_is_locked(&cgroup_mutex));
+
 	removed_bits = root->actual_subsys_bits & ~final_bits;
 	added_bits = final_bits & ~root->actual_subsys_bits;
 	/* Check that any added subsystems are currently free */
@@ -900,6 +911,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		struct cgroup_subsys *ss = subsys[i];
 		if (!(bit & added_bits))
 			continue;
+		/*
+		 * Nobody should tell us to do a subsys that doesn't exist:
+		 * parse_cgroupfs_options should catch that case and refcounts
+		 * ensure that subsystems won't disappear once selected.
+		 */
+		BUG_ON(ss == NULL);
 		if (ss->root != &rootnode) {
 			/* Subsystem isn't free */
 			return -EBUSY;
@@ -919,6 +936,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		unsigned long bit = 1UL << i;
 		if (bit & added_bits) {
 			/* We're binding this subsystem to this hierarchy */
+			BUG_ON(ss == NULL);
 			BUG_ON(cgrp->subsys[i]);
 			BUG_ON(!dummytop->subsys[i]);
 			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
@@ -932,6 +950,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & removed_bits) {
 			/* We're removing this subsystem */
+			BUG_ON(ss == NULL);
 			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
 			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
 			mutex_lock(&ss->hierarchy_mutex);
@@ -944,6 +963,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & final_bits) {
 			/* Subsystem state should already exist */
+			BUG_ON(ss == NULL);
 			BUG_ON(!cgrp->subsys[i]);
 		} else {
 			/* Subsystem state shouldn't exist */
@@ -986,14 +1006,18 @@ struct cgroup_sb_opts {
 
 };
 
-/* Convert a hierarchy specifier into a bitmask of subsystems and
- * flags. */
+/*
+ * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
+ * with cgroup_mutex held to protect the subsys[] array.
+ */
 static int parse_cgroupfs_options(char *data,
 				     struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data ?: "all";
 	unsigned long mask = (unsigned long)-1;
 
+	BUG_ON(!mutex_is_locked(&cgroup_mutex));
+
 #ifdef CONFIG_CPUSETS
 	mask = ~(1UL << cpuset_subsys_id);
 #endif
@@ -1009,6 +1033,8 @@ static int parse_cgroupfs_options(char *data,
 			opts->subsys_bits = 0;
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				struct cgroup_subsys *ss = subsys[i];
+				if (ss == NULL)
+					continue;
 				if (!ss->disabled)
 					opts->subsys_bits |= 1ul << i;
 			}
@@ -1053,6 +1079,8 @@ static int parse_cgroupfs_options(char *data,
 			int i;
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				ss = subsys[i];
+				if (ss == NULL)
+					continue;
 				if (!strcmp(token, ss->name)) {
 					if (!ss->disabled)
 						set_bit(i, &opts->subsys_bits);
@@ -1306,7 +1334,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 	struct cgroupfs_root *new_root;
 
 	/* First find the desired set of subsystems */
+	mutex_lock(&cgroup_mutex);
 	ret = parse_cgroupfs_options(data, &opts);
+	mutex_unlock(&cgroup_mutex);
 	if (ret)
 		goto out_err;
 
@@ -2918,8 +2948,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
 	/* We need to take each hierarchy_mutex in a consistent order */
 	int i;
 
+	/*
+	 * No worry about a race with rebind_subsystems that might mess up the
+	 * locking order, since both parties are under cgroup_mutex.
+	 */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
+		if (ss == NULL)
+			continue;
 		if (ss->root == root)
 			mutex_lock(&ss->hierarchy_mutex);
 	}
@@ -2931,6 +2967,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
 
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
+		if (ss == NULL)
+			continue;
 		if (ss->root == root)
 			mutex_unlock(&ss->hierarchy_mutex);
 	}
@@ -3054,11 +3092,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
 	 * synchronization other than RCU, and the subsystem linked
 	 * list isn't RCU-safe */
 	int i;
+	/*
+	 * We won't need to lock the subsys array, because the subsystems
+	 * we're concerned about aren't going anywhere since our cgroup root
+	 * has a reference on them.
+	 */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
 		struct cgroup_subsys_state *css;
-		/* Skip subsystems not in this hierarchy */
-		if (ss->root != cgrp->root)
+		/* Skip subsystems not present or not in this hierarchy */
+		if (ss == NULL || ss->root != cgrp->root)
 			continue;
 		css = cgrp->subsys[ss->subsys_id];
 		/* When called from check_for_release() it's possible
@@ -3279,7 +3322,8 @@ int __init cgroup_init_early(void)
 	for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
 		INIT_HLIST_HEAD(&css_set_table[i]);
 
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+	/* at bootup time, we don't worry about modular subsystems */
+	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
 
 		BUG_ON(!ss->name);
@@ -3314,7 +3358,8 @@ int __init cgroup_init(void)
 	if (err)
 		return err;
 
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+	/* at bootup time, we don't worry about modular subsystems */
+	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
 		if (!ss->early_init)
 			cgroup_init_subsys(ss);
@@ -3423,9 +3468,16 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
 	int i;
 
 	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
+	/*
+	 * ideally we don't want subsystems moving around while we do this.
+	 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
+	 * subsys/hierarchy state.
+	 */
 	mutex_lock(&cgroup_mutex);
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
+		if (ss == NULL)
+			continue;
 		seq_printf(m, "%s\t%d\t%d\t%d\n",
 			   ss->name, ss->root->hierarchy_id,
 			   ss->root->number_of_cgroups, !ss->disabled);
@@ -3483,7 +3535,12 @@ void cgroup_fork_callbacks(struct task_struct *child)
 {
 	if (need_forkexit_callback) {
 		int i;
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		/*
+		 * forkexit callbacks are only supported for builtin
+		 * subsystems, and the builtin section of the subsys array is
+		 * immutable, so we don't need to lock the subsys array here.
+		 */
+		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
 			if (ss->fork)
 				ss->fork(ss, child);
@@ -3552,7 +3609,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 	struct css_set *cg;
 
 	if (run_callbacks && need_forkexit_callback) {
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		/*
+		 * modular subsystems can't use callbacks, so no need to lock
+		 * the subsys array
+		 */
+		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
 			if (ss->exit)
 				ss->exit(ss, tsk);
@@ -3844,8 +3905,11 @@ static int __init cgroup_disable(char *str)
 	while ((token = strsep(&str, ",")) != NULL) {
 		if (!*token)
 			continue;
-
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		/*
+		 * cgroup_disable, being at boot time, can't know about module
+		 * subsystems, so we don't worry about them.
+		 */
+		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
 
 			if (!strcmp(token, ss->name)) {
-- 
cgit v1.2.3


From e6a1105ba08b265023dd71a4174fb4a29ebc7083 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 10 Mar 2010 15:22:09 -0800
Subject: cgroups: subsystem module loading interface

Add interface between cgroups subsystem management and module loading

This patch implements rudimentary module-loading support for cgroups -
namely, a cgroup_load_subsys (similar to cgroup_init_subsys) for use as a
module initcall, and a struct module pointer in struct cgroup_subsys.

Several functions that might be wanted by modules have had EXPORT_SYMBOL
added to them, but it's unclear exactly which functions want it and which
won't.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt |   4 +
 include/linux/cgroup.h            |   4 +
 kernel/cgroup.c                   | 150 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 153 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index d45082653e3d..ae8a037a761e 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -488,6 +488,10 @@ Each subsystem should:
 - add an entry in linux/cgroup_subsys.h
 - define a cgroup_subsys object called <name>_subsys
 
+If a subsystem can be compiled as a module, it should also have in its
+module initcall a call to cgroup_load_subsys(&its_subsys_struct). It
+should also set its_subsys.module = THIS_MODULE in its .c file.
+
 Each subsystem may export the following methods. The only mandatory
 methods are create/destroy. Any others that are null are presumed to
 be successful no-ops.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 28319a9fe569..402ce477c47e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -37,6 +37,7 @@ extern void cgroup_post_fork(struct task_struct *p);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
+extern int cgroup_load_subsys(struct cgroup_subsys *ss);
 
 extern const struct file_operations proc_cgroup_operations;
 
@@ -486,6 +487,9 @@ struct cgroup_subsys {
 	/* used when use_id == true */
 	struct idr idr;
 	spinlock_t id_lock;
+
+	/* should be defined only by modular subsystems */
+	struct module *module;
 };
 
 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c92fb9549358..2cae38e64c59 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -44,6 +44,7 @@
 #include <linux/string.h>
 #include <linux/sort.h>
 #include <linux/kmod.h>
+#include <linux/module.h>
 #include <linux/delayacct.h>
 #include <linux/cgroupstats.h>
 #include <linux/hash.h>
@@ -254,7 +255,8 @@ struct cg_cgroup_link {
 static struct css_set init_css_set;
 static struct cg_cgroup_link init_css_set_link;
 
-static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
+static int cgroup_init_idr(struct cgroup_subsys *ss,
+			   struct cgroup_subsys_state *css);
 
 /* css_set_lock protects the list of css_set objects, and the
  * chain of tasks off each css_set.  Nests outside task->alloc_lock
@@ -2125,6 +2127,7 @@ int cgroup_add_file(struct cgroup *cgrp,
 		error = PTR_ERR(dentry);
 	return error;
 }
+EXPORT_SYMBOL_GPL(cgroup_add_file);
 
 int cgroup_add_files(struct cgroup *cgrp,
 			struct cgroup_subsys *subsys,
@@ -2139,6 +2142,7 @@ int cgroup_add_files(struct cgroup *cgrp,
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cgroup_add_files);
 
 /**
  * cgroup_task_count - count the number of tasks in a cgroup.
@@ -3292,7 +3296,144 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	mutex_init(&ss->hierarchy_mutex);
 	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
 	ss->active = 1;
+
+	/* this function shouldn't be used with modular subsystems, since they
+	 * need to register a subsys_id, among other things */
+	BUG_ON(ss->module);
+}
+
+/**
+ * cgroup_load_subsys: load and register a modular subsystem at runtime
+ * @ss: the subsystem to load
+ *
+ * This function should be called in a modular subsystem's initcall. If the
+ * subsytem is built as a module, it will be assigned a new subsys_id and set
+ * up for use. If the subsystem is built-in anyway, work is delegated to the
+ * simpler cgroup_init_subsys.
+ */
+int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
+{
+	int i;
+	struct cgroup_subsys_state *css;
+
+	/* check name and function validity */
+	if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
+	    ss->create == NULL || ss->destroy == NULL)
+		return -EINVAL;
+
+	/*
+	 * we don't support callbacks in modular subsystems. this check is
+	 * before the ss->module check for consistency; a subsystem that could
+	 * be a module should still have no callbacks even if the user isn't
+	 * compiling it as one.
+	 */
+	if (ss->fork || ss->exit)
+		return -EINVAL;
+
+	/*
+	 * an optionally modular subsystem is built-in: we want to do nothing,
+	 * since cgroup_init_subsys will have already taken care of it.
+	 */
+	if (ss->module == NULL) {
+		/* a few sanity checks */
+		BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
+		BUG_ON(subsys[ss->subsys_id] != ss);
+		return 0;
+	}
+
+	/*
+	 * need to register a subsys id before anything else - for example,
+	 * init_cgroup_css needs it.
+	 */
+	mutex_lock(&cgroup_mutex);
+	/* find the first empty slot in the array */
+	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+		if (subsys[i] == NULL)
+			break;
+	}
+	if (i == CGROUP_SUBSYS_COUNT) {
+		/* maximum number of subsystems already registered! */
+		mutex_unlock(&cgroup_mutex);
+		return -EBUSY;
+	}
+	/* assign ourselves the subsys_id */
+	ss->subsys_id = i;
+	subsys[i] = ss;
+
+	/*
+	 * no ss->create seems to need anything important in the ss struct, so
+	 * this can happen first (i.e. before the rootnode attachment).
+	 */
+	css = ss->create(ss, dummytop);
+	if (IS_ERR(css)) {
+		/* failure case - need to deassign the subsys[] slot. */
+		subsys[i] = NULL;
+		mutex_unlock(&cgroup_mutex);
+		return PTR_ERR(css);
+	}
+
+	list_add(&ss->sibling, &rootnode.subsys_list);
+	ss->root = &rootnode;
+
+	/* our new subsystem will be attached to the dummy hierarchy. */
+	init_cgroup_css(css, ss, dummytop);
+	/* init_idr must be after init_cgroup_css because it sets css->id. */
+	if (ss->use_id) {
+		int ret = cgroup_init_idr(ss, css);
+		if (ret) {
+			dummytop->subsys[ss->subsys_id] = NULL;
+			ss->destroy(ss, dummytop);
+			subsys[i] = NULL;
+			mutex_unlock(&cgroup_mutex);
+			return ret;
+		}
+	}
+
+	/*
+	 * Now we need to entangle the css into the existing css_sets. unlike
+	 * in cgroup_init_subsys, there are now multiple css_sets, so each one
+	 * will need a new pointer to it; done by iterating the css_set_table.
+	 * furthermore, modifying the existing css_sets will corrupt the hash
+	 * table state, so each changed css_set will need its hash recomputed.
+	 * this is all done under the css_set_lock.
+	 */
+	write_lock(&css_set_lock);
+	for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
+		struct css_set *cg;
+		struct hlist_node *node, *tmp;
+		struct hlist_head *bucket = &css_set_table[i], *new_bucket;
+
+		hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
+			/* skip entries that we already rehashed */
+			if (cg->subsys[ss->subsys_id])
+				continue;
+			/* remove existing entry */
+			hlist_del(&cg->hlist);
+			/* set new value */
+			cg->subsys[ss->subsys_id] = css;
+			/* recompute hash and restore entry */
+			new_bucket = css_set_hash(cg->subsys);
+			hlist_add_head(&cg->hlist, new_bucket);
+		}
+	}
+	write_unlock(&css_set_lock);
+
+	mutex_init(&ss->hierarchy_mutex);
+	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
+	ss->active = 1;
+
+	/*
+	 * pin the subsystem's module so it doesn't go away. this shouldn't
+	 * fail, since the module's initcall calls us.
+	 * TODO: with module unloading, move this elsewhere
+	 */
+	BUG_ON(!try_module_get(ss->module));
+
+	/* success! */
+	mutex_unlock(&cgroup_mutex);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(cgroup_load_subsys);
 
 /**
  * cgroup_init_early - cgroup initialization at system boot
@@ -3364,7 +3505,7 @@ int __init cgroup_init(void)
 		if (!ss->early_init)
 			cgroup_init_subsys(ss);
 		if (ss->use_id)
-			cgroup_subsys_init_idr(ss);
+			cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
 	}
 
 	/* Add init_css_set to the hash table */
@@ -4033,15 +4174,14 @@ err_out:
 
 }
 
-static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
+static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
+					    struct cgroup_subsys_state *rootcss)
 {
 	struct css_id *newid;
-	struct cgroup_subsys_state *rootcss;
 
 	spin_lock_init(&ss->id_lock);
 	idr_init(&ss->idr);
 
-	rootcss = init_css_set.subsys[ss->subsys_id];
 	newid = get_new_cssid(ss, 0);
 	if (IS_ERR(newid))
 		return PTR_ERR(newid);
-- 
cgit v1.2.3


From cf5d5941fda647fe3d2f2d00cf9e0245236a5f08 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 10 Mar 2010 15:22:09 -0800
Subject: cgroups: subsystem module unloading

Provides support for unloading modular subsystems.

This patch adds a new function cgroup_unload_subsys which is to be used
for removing a loaded subsystem during module deletion.  Reference
counting of the subsystems' modules is moved from once (at load time) to
once per attached hierarchy (in parse_cgroupfs_options and
rebind_subsystems) (i.e., 0 or 1).

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt |   5 +-
 include/linux/cgroup.h            |   4 +-
 kernel/cgroup.c                   | 167 ++++++++++++++++++++++++++++++++------
 3 files changed, 148 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index ae8a037a761e..764007b63921 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -489,8 +489,9 @@ Each subsystem should:
 - define a cgroup_subsys object called <name>_subsys
 
 If a subsystem can be compiled as a module, it should also have in its
-module initcall a call to cgroup_load_subsys(&its_subsys_struct). It
-should also set its_subsys.module = THIS_MODULE in its .c file.
+module initcall a call to cgroup_load_subsys(), and in its exitcall a
+call to cgroup_unload_subsys(). It should also set its_subsys.module =
+THIS_MODULE in its .c file.
 
 Each subsystem may export the following methods. The only mandatory
 methods are create/destroy. Any others that are null are presumed to
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 402ce477c47e..2a59d3101e5d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -38,6 +38,7 @@ extern void cgroup_exit(struct task_struct *p, int run_callbacks);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 extern int cgroup_load_subsys(struct cgroup_subsys *ss);
+extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
 
 extern const struct file_operations proc_cgroup_operations;
 
@@ -271,7 +272,8 @@ struct css_set {
 	/*
 	 * Set of subsystem states, one for each subsystem. This array
 	 * is immutable after creation apart from the init_css_set
-	 * during subsystem registration (at boot time).
+	 * during subsystem registration (at boot time) and modular subsystem
+	 * loading/unloading.
 	 */
 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2cae38e64c59..aa889c96cc74 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -894,7 +894,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
 }
 
 /*
- * Call with cgroup_mutex held.
+ * Call with cgroup_mutex held. Drops reference counts on modules, including
+ * any duplicate ones that parse_cgroupfs_options took. If this function
+ * returns an error, no reference counts are touched.
  */
 static int rebind_subsystems(struct cgroupfs_root *root,
 			      unsigned long final_bits)
@@ -950,6 +952,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			if (ss->bind)
 				ss->bind(ss, cgrp);
 			mutex_unlock(&ss->hierarchy_mutex);
+			/* refcount was already taken, and we're keeping it */
 		} else if (bit & removed_bits) {
 			/* We're removing this subsystem */
 			BUG_ON(ss == NULL);
@@ -963,10 +966,20 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			subsys[i]->root = &rootnode;
 			list_move(&ss->sibling, &rootnode.subsys_list);
 			mutex_unlock(&ss->hierarchy_mutex);
+			/* subsystem is now free - drop reference on module */
+			module_put(ss->module);
 		} else if (bit & final_bits) {
 			/* Subsystem state should already exist */
 			BUG_ON(ss == NULL);
 			BUG_ON(!cgrp->subsys[i]);
+			/*
+			 * a refcount was taken, but we already had one, so
+			 * drop the extra reference.
+			 */
+			module_put(ss->module);
+#ifdef CONFIG_MODULE_UNLOAD
+			BUG_ON(ss->module && !module_refcount(ss->module));
+#endif
 		} else {
 			/* Subsystem state shouldn't exist */
 			BUG_ON(cgrp->subsys[i]);
@@ -1010,13 +1023,16 @@ struct cgroup_sb_opts {
 
 /*
  * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
- * with cgroup_mutex held to protect the subsys[] array.
+ * with cgroup_mutex held to protect the subsys[] array. This function takes
+ * refcounts on subsystems to be used, unless it returns error, in which case
+ * no refcounts are taken.
  */
-static int parse_cgroupfs_options(char *data,
-				     struct cgroup_sb_opts *opts)
+static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data ?: "all";
 	unsigned long mask = (unsigned long)-1;
+	int i;
+	bool module_pin_failed = false;
 
 	BUG_ON(!mutex_is_locked(&cgroup_mutex));
 
@@ -1031,7 +1047,6 @@ static int parse_cgroupfs_options(char *data,
 			return -EINVAL;
 		if (!strcmp(token, "all")) {
 			/* Add all non-disabled subsystems */
-			int i;
 			opts->subsys_bits = 0;
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				struct cgroup_subsys *ss = subsys[i];
@@ -1054,7 +1069,6 @@ static int parse_cgroupfs_options(char *data,
 			if (!opts->release_agent)
 				return -ENOMEM;
 		} else if (!strncmp(token, "name=", 5)) {
-			int i;
 			const char *name = token + 5;
 			/* Can't specify an empty name */
 			if (!strlen(name))
@@ -1078,7 +1092,6 @@ static int parse_cgroupfs_options(char *data,
 				return -ENOMEM;
 		} else {
 			struct cgroup_subsys *ss;
-			int i;
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				ss = subsys[i];
 				if (ss == NULL)
@@ -1117,9 +1130,54 @@ static int parse_cgroupfs_options(char *data,
 	if (!opts->subsys_bits && !opts->name)
 		return -EINVAL;
 
+	/*
+	 * Grab references on all the modules we'll need, so the subsystems
+	 * don't dance around before rebind_subsystems attaches them. This may
+	 * take duplicate reference counts on a subsystem that's already used,
+	 * but rebind_subsystems handles this case.
+	 */
+	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+		unsigned long bit = 1UL << i;
+
+		if (!(bit & opts->subsys_bits))
+			continue;
+		if (!try_module_get(subsys[i]->module)) {
+			module_pin_failed = true;
+			break;
+		}
+	}
+	if (module_pin_failed) {
+		/*
+		 * oops, one of the modules was going away. this means that we
+		 * raced with a module_delete call, and to the user this is
+		 * essentially a "subsystem doesn't exist" case.
+		 */
+		for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
+			/* drop refcounts only on the ones we took */
+			unsigned long bit = 1UL << i;
+
+			if (!(bit & opts->subsys_bits))
+				continue;
+			module_put(subsys[i]->module);
+		}
+		return -ENOENT;
+	}
+
 	return 0;
 }
 
+static void drop_parsed_module_refcounts(unsigned long subsys_bits)
+{
+	int i;
+	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+		unsigned long bit = 1UL << i;
+
+		if (!(bit & subsys_bits))
+			continue;
+		module_put(subsys[i]->module);
+	}
+}
+
 static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 {
 	int ret = 0;
@@ -1136,21 +1194,19 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	if (ret)
 		goto out_unlock;
 
-	/* Don't allow flags to change at remount */
-	if (opts.flags != root->flags) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	/* Don't allow name to change at remount */
-	if (opts.name && strcmp(opts.name, root->name)) {
+	/* Don't allow flags or name to change at remount */
+	if (opts.flags != root->flags ||
+	    (opts.name && strcmp(opts.name, root->name))) {
 		ret = -EINVAL;
+		drop_parsed_module_refcounts(opts.subsys_bits);
 		goto out_unlock;
 	}
 
 	ret = rebind_subsystems(root, opts.subsys_bits);
-	if (ret)
+	if (ret) {
+		drop_parsed_module_refcounts(opts.subsys_bits);
 		goto out_unlock;
+	}
 
 	/* (re)populate subsystem files */
 	cgroup_populate_dir(cgrp);
@@ -1349,7 +1405,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 	new_root = cgroup_root_from_opts(&opts);
 	if (IS_ERR(new_root)) {
 		ret = PTR_ERR(new_root);
-		goto out_err;
+		goto drop_modules;
 	}
 	opts.new_root = new_root;
 
@@ -1358,7 +1414,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		cgroup_drop_root(opts.new_root);
-		goto out_err;
+		goto drop_modules;
 	}
 
 	root = sb->s_fs_info;
@@ -1414,6 +1470,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 			free_cg_links(&tmp_cg_links);
 			goto drop_new_super;
 		}
+		/*
+		 * There must be no failure case after here, since rebinding
+		 * takes care of subsystems' refcounts, which are explicitly
+		 * dropped in the failure exit path.
+		 */
 
 		/* EBUSY should be the only error here */
 		BUG_ON(ret);
@@ -1452,6 +1513,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 		 * any) is not needed
 		 */
 		cgroup_drop_root(opts.new_root);
+		/* no subsys rebinding, so refcounts don't change */
+		drop_parsed_module_refcounts(opts.subsys_bits);
 	}
 
 	simple_set_mnt(mnt, sb);
@@ -1461,6 +1524,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 
  drop_new_super:
 	deactivate_locked_super(sb);
+ drop_modules:
+	drop_parsed_module_refcounts(opts.subsys_bits);
  out_err:
 	kfree(opts.release_agent);
 	kfree(opts.name);
@@ -3422,19 +3487,71 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
 	ss->active = 1;
 
-	/*
-	 * pin the subsystem's module so it doesn't go away. this shouldn't
-	 * fail, since the module's initcall calls us.
-	 * TODO: with module unloading, move this elsewhere
-	 */
-	BUG_ON(!try_module_get(ss->module));
-
 	/* success! */
 	mutex_unlock(&cgroup_mutex);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cgroup_load_subsys);
 
+/**
+ * cgroup_unload_subsys: unload a modular subsystem
+ * @ss: the subsystem to unload
+ *
+ * This function should be called in a modular subsystem's exitcall. When this
+ * function is invoked, the refcount on the subsystem's module will be 0, so
+ * the subsystem will not be attached to any hierarchy.
+ */
+void cgroup_unload_subsys(struct cgroup_subsys *ss)
+{
+	struct cg_cgroup_link *link;
+	struct hlist_head *hhead;
+
+	BUG_ON(ss->module == NULL);
+
+	/*
+	 * we shouldn't be called if the subsystem is in use, and the use of
+	 * try_module_get in parse_cgroupfs_options should ensure that it
+	 * doesn't start being used while we're killing it off.
+	 */
+	BUG_ON(ss->root != &rootnode);
+
+	mutex_lock(&cgroup_mutex);
+	/* deassign the subsys_id */
+	BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
+	subsys[ss->subsys_id] = NULL;
+
+	/* remove subsystem from rootnode's list of subsystems */
+	list_del(&ss->sibling);
+
+	/*
+	 * disentangle the css from all css_sets attached to the dummytop. as
+	 * in loading, we need to pay our respects to the hashtable gods.
+	 */
+	write_lock(&css_set_lock);
+	list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
+		struct css_set *cg = link->cg;
+
+		hlist_del(&cg->hlist);
+		BUG_ON(!cg->subsys[ss->subsys_id]);
+		cg->subsys[ss->subsys_id] = NULL;
+		hhead = css_set_hash(cg->subsys);
+		hlist_add_head(&cg->hlist, hhead);
+	}
+	write_unlock(&css_set_lock);
+
+	/*
+	 * remove subsystem's css from the dummytop and free it - need to free
+	 * before marking as null because ss->destroy needs the cgrp->subsys
+	 * pointer to find their state. note that this also takes care of
+	 * freeing the css_id.
+	 */
+	ss->destroy(ss, dummytop);
+	dummytop->subsys[ss->subsys_id] = NULL;
+
+	mutex_unlock(&cgroup_mutex);
+}
+EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
+
 /**
  * cgroup_init_early - cgroup initialization at system boot
  *
-- 
cgit v1.2.3


From 67523c48aa74d5637848edeccf285af1c60bf14a Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 10 Mar 2010 15:22:11 -0800
Subject: cgroups: blkio subsystem as module

Modify the Block I/O cgroup subsystem to be able to be built as a module.
As the CFQ disk scheduler optionally depends on blk-cgroup, config options
in block/Kconfig, block/Kconfig.iosched, and block/blk-cgroup.h are
enhanced to support the new module dependency.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/Kconfig             |  2 +-
 block/Kconfig.iosched     |  2 +-
 block/blk-cgroup.c        | 53 ++++++++++++++++++++++++++++++++++++-----------
 block/blk-cgroup.h        | 10 +++++++--
 include/linux/iocontext.h |  2 +-
 kernel/cgroup.c           |  9 ++++++++
 6 files changed, 61 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index e20fbde0875c..62a5921321cd 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -78,7 +78,7 @@ config BLK_DEV_INTEGRITY
 	Protection.  If in doubt, say N.
 
 config BLK_CGROUP
-	bool
+	tristate
 	depends on CGROUPS
 	default n
 	---help---
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index b71abfb0d726..fc71cf071fb2 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -23,6 +23,7 @@ config IOSCHED_DEADLINE
 
 config IOSCHED_CFQ
 	tristate "CFQ I/O scheduler"
+	select BLK_CGROUP if CFQ_GROUP_IOSCHED
 	default y
 	---help---
 	  The CFQ I/O scheduler tries to distribute bandwidth equally
@@ -35,7 +36,6 @@ config IOSCHED_CFQ
 config CFQ_GROUP_IOSCHED
 	bool "CFQ Group Scheduling support"
 	depends on IOSCHED_CFQ && CGROUPS
-	select BLK_CGROUP
 	default n
 	---help---
 	  Enable group IO scheduling in CFQ.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c85d74cae200..4b686ad08eaa 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -23,6 +23,31 @@ static LIST_HEAD(blkio_list);
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
+static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
+						  struct cgroup *);
+static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
+			      struct task_struct *, bool);
+static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
+			   struct cgroup *, struct task_struct *, bool);
+static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
+static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
+
+struct cgroup_subsys blkio_subsys = {
+	.name = "blkio",
+	.create = blkiocg_create,
+	.can_attach = blkiocg_can_attach,
+	.attach = blkiocg_attach,
+	.destroy = blkiocg_destroy,
+	.populate = blkiocg_populate,
+#ifdef CONFIG_BLK_CGROUP
+	/* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
+	.subsys_id = blkio_subsys_id,
+#endif
+	.use_id = 1,
+	.module = THIS_MODULE,
+};
+EXPORT_SYMBOL_GPL(blkio_subsys);
+
 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 {
 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
@@ -253,7 +278,8 @@ remove_entry:
 done:
 	free_css_id(&blkio_subsys, &blkcg->css);
 	rcu_read_unlock();
-	kfree(blkcg);
+	if (blkcg != &blkio_root_cgroup)
+		kfree(blkcg);
 }
 
 static struct cgroup_subsys_state *
@@ -319,17 +345,6 @@ static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
 	task_unlock(tsk);
 }
 
-struct cgroup_subsys blkio_subsys = {
-	.name = "blkio",
-	.create = blkiocg_create,
-	.can_attach = blkiocg_can_attach,
-	.attach = blkiocg_attach,
-	.destroy = blkiocg_destroy,
-	.populate = blkiocg_populate,
-	.subsys_id = blkio_subsys_id,
-	.use_id = 1,
-};
-
 void blkio_policy_register(struct blkio_policy_type *blkiop)
 {
 	spin_lock(&blkio_list_lock);
@@ -345,3 +360,17 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 	spin_unlock(&blkio_list_lock);
 }
 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
+
+static int __init init_cgroup_blkio(void)
+{
+	return cgroup_load_subsys(&blkio_subsys);
+}
+
+static void __exit exit_cgroup_blkio(void)
+{
+	cgroup_unload_subsys(&blkio_subsys);
+}
+
+module_init(init_cgroup_blkio);
+module_exit(exit_cgroup_blkio);
+MODULE_LICENSE("GPL");
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 84bf745fa775..8ccc20464dae 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,7 +15,13 @@
 
 #include <linux/cgroup.h>
 
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+
+#ifndef CONFIG_BLK_CGROUP
+/* When blk-cgroup is a module, its subsys_id isn't a compile-time constant */
+extern struct cgroup_subsys blkio_subsys;
+#define blkio_subsys_id blkio_subsys.subsys_id
+#endif
 
 struct blkio_cgroup {
 	struct cgroup_subsys_state css;
@@ -91,7 +97,7 @@ static inline void blkiocg_update_blkio_group_dequeue_stats(
 			struct blkio_group *blkg, unsigned long dequeue) {}
 #endif
 
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
 extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 1195a806fe0c..a0bb301afac0 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -42,7 +42,7 @@ struct io_context {
 	unsigned short ioprio;
 	unsigned short ioprio_changed;
 
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
 	unsigned short cgroup_changed;
 #endif
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index aa889c96cc74..521591dbab2f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -705,6 +705,7 @@ void cgroup_lock(void)
 {
 	mutex_lock(&cgroup_mutex);
 }
+EXPORT_SYMBOL_GPL(cgroup_lock);
 
 /**
  * cgroup_unlock - release lock on cgroup changes
@@ -715,6 +716,7 @@ void cgroup_unlock(void)
 {
 	mutex_unlock(&cgroup_mutex);
 }
+EXPORT_SYMBOL_GPL(cgroup_unlock);
 
 /*
  * A couple of forward declarations required, due to cyclic reference loop:
@@ -1639,6 +1641,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 	memmove(buf, start, buf + buflen - start);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cgroup_path);
 
 /**
  * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
@@ -1805,6 +1808,7 @@ bool cgroup_lock_live_group(struct cgroup *cgrp)
 	}
 	return true;
 }
+EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
 
 static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 				      const char *buffer)
@@ -4082,6 +4086,7 @@ void __css_put(struct cgroup_subsys_state *css, int count)
 	rcu_read_unlock();
 	WARN_ON_ONCE(val < 1);
 }
+EXPORT_SYMBOL_GPL(__css_put);
 
 /*
  * Notify userspace when a cgroup is released, by running the
@@ -4197,6 +4202,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
 		return cssid->id;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(css_id);
 
 unsigned short css_depth(struct cgroup_subsys_state *css)
 {
@@ -4206,6 +4212,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
 		return cssid->depth;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(css_depth);
 
 bool css_is_ancestor(struct cgroup_subsys_state *child,
 		    const struct cgroup_subsys_state *root)
@@ -4242,6 +4249,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
 	spin_unlock(&ss->id_lock);
 	call_rcu(&id->rcu_head, __free_css_id_cb);
 }
+EXPORT_SYMBOL_GPL(free_css_id);
 
 /*
  * This is called by init or create(). Then, calls to this function are
@@ -4358,6 +4366,7 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
 
 	return rcu_dereference(cssid->css);
 }
+EXPORT_SYMBOL_GPL(css_lookup);
 
 /**
  * css_get_next - lookup next cgroup under specified hierarchy.
-- 
cgit v1.2.3


From 024914477e15ef8b17f271ec47f1bb8a589f0806 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 10 Mar 2010 15:22:17 -0800
Subject: memcg: move charges of anonymous swap

This patch is another core part of this move-charge-at-task-migration
feature.  It enables moving charges of anonymous swaps.

To move the charge of swap, we need to exchange swap_cgroup's record.

In current implementation, swap_cgroup's record is protected by:

  - page lock: if the entry is on swap cache.
  - swap_lock: if the entry is not on swap cache.

This works well in usual swap-in/out activity.

But this behavior make the feature of moving swap charge check many
conditions to exchange swap_cgroup's record safely.

So I changed modification of swap_cgroup's recored(swap_cgroup_record())
to use xchg, and define a new function to cmpxchg swap_cgroup's record.

This patch also enables moving charge of non pte_present but not uncharged
swap caches, which can be exist on swap-out path, by getting the target
pages via find_get_page() as do_mincore() does.

[kosaki.motohiro@jp.fujitsu.com: fix ia64 build]
[akpm@linux-foundation.org: fix typos]
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt |   2 +
 include/linux/page_cgroup.h      |   2 +
 include/linux/swap.h             |   9 ++
 mm/memcontrol.c                  | 183 +++++++++++++++++++++++++++++++--------
 mm/page_cgroup.c                 |  34 +++++++-
 mm/swapfile.c                    |  31 +++++++
 6 files changed, 223 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index e726fb0df719..1f59a1a38bd9 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -420,6 +420,8 @@ NOTE2: It is recommended to set the soft limit always below the hard limit,
 
 Users can move charges associated with a task along with task migration, that
 is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
+This feature is not supported in !CONFIG_MMU environments because of lack of
+page tables.
 
 8.1 Interface
 
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index b0e4eb126236..30b08136fdf3 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -118,6 +118,8 @@ static inline void __init page_cgroup_init_flatmem(void)
 #include <linux/swap.h>
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
+					unsigned short old, unsigned short new);
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
 extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a2602a8207a6..1f59d9340c4d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -355,6 +355,7 @@ static inline void disable_swap_token(void)
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 extern void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
+extern int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep);
 #else
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
@@ -485,6 +486,14 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
 {
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+static inline int
+mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
+{
+	return 0;
+}
+#endif
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 589084f00b70..e883198baf81 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -33,6 +33,7 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
+#include <linux/swapops.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
@@ -2270,6 +2271,54 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 	}
 	rcu_read_unlock();
 }
+
+/**
+ * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
+ * @entry: swap entry to be moved
+ * @from:  mem_cgroup which the entry is moved from
+ * @to:  mem_cgroup which the entry is moved to
+ *
+ * It succeeds only when the swap_cgroup's record for this entry is the same
+ * as the mem_cgroup's id of @from.
+ *
+ * Returns 0 on success, -EINVAL on failure.
+ *
+ * The caller must have charged to @to, IOW, called res_counter_charge() about
+ * both res and memsw, and called css_get().
+ */
+static int mem_cgroup_move_swap_account(swp_entry_t entry,
+				struct mem_cgroup *from, struct mem_cgroup *to)
+{
+	unsigned short old_id, new_id;
+
+	old_id = css_id(&from->css);
+	new_id = css_id(&to->css);
+
+	if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
+		if (!mem_cgroup_is_root(from))
+			res_counter_uncharge(&from->memsw, PAGE_SIZE);
+		mem_cgroup_swap_statistics(from, false);
+		mem_cgroup_put(from);
+		/*
+		 * we charged both to->res and to->memsw, so we should uncharge
+		 * to->res.
+		 */
+		if (!mem_cgroup_is_root(to))
+			res_counter_uncharge(&to->res, PAGE_SIZE);
+		mem_cgroup_swap_statistics(to, true);
+		mem_cgroup_get(to);
+		css_put(&to->css);
+
+		return 0;
+	}
+	return -EINVAL;
+}
+#else
+static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
+				struct mem_cgroup *from, struct mem_cgroup *to)
+{
+	return -EINVAL;
+}
 #endif
 
 /*
@@ -2949,6 +2998,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
 	return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
 }
 
+#ifdef CONFIG_MMU
 static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
 					struct cftype *cft, u64 val)
 {
@@ -2967,6 +3017,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
 
 	return 0;
 }
+#else
+static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
+					struct cftype *cft, u64 val)
+{
+	return -ENOSYS;
+}
+#endif
 
 
 /* For read statistics */
@@ -3489,6 +3546,7 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
 	return ret;
 }
 
+#ifdef CONFIG_MMU
 /* Handlers for move charge at task migration. */
 #define PRECHARGE_COUNT_AT_ONCE	256
 static int mem_cgroup_do_precharge(unsigned long count)
@@ -3544,77 +3602,124 @@ one_by_one:
 	}
 	return ret;
 }
+#else	/* !CONFIG_MMU */
+static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
+				struct cgroup *cgroup,
+				struct task_struct *p,
+				bool threadgroup)
+{
+	return 0;
+}
+static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
+				struct cgroup *cgroup,
+				struct task_struct *p,
+				bool threadgroup)
+{
+}
+static void mem_cgroup_move_task(struct cgroup_subsys *ss,
+				struct cgroup *cont,
+				struct cgroup *old_cont,
+				struct task_struct *p,
+				bool threadgroup)
+{
+}
+#endif
 
 /**
  * is_target_pte_for_mc - check a pte whether it is valid for move charge
  * @vma: the vma the pte to be checked belongs
  * @addr: the address corresponding to the pte to be checked
  * @ptent: the pte to be checked
- * @target: the pointer the target page will be stored(can be NULL)
+ * @target: the pointer the target page or swap ent will be stored(can be NULL)
  *
  * Returns
  *   0(MC_TARGET_NONE): if the pte is not a target for move charge.
  *   1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
  *     move charge. if @target is not NULL, the page is stored in target->page
  *     with extra refcnt got(Callers should handle it).
+ *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
+ *     target for charge migration. if @target is not NULL, the entry is stored
+ *     in target->ent.
  *
  * Called with pte lock held.
  */
-/* We add a new member later. */
 union mc_target {
 	struct page	*page;
+	swp_entry_t	ent;
 };
 
-/* We add a new type later. */
 enum mc_target_type {
 	MC_TARGET_NONE,	/* not used */
 	MC_TARGET_PAGE,
+	MC_TARGET_SWAP,
 };
 
 static int is_target_pte_for_mc(struct vm_area_struct *vma,
 		unsigned long addr, pte_t ptent, union mc_target *target)
 {
-	struct page *page;
+	struct page *page = NULL;
 	struct page_cgroup *pc;
 	int ret = 0;
+	swp_entry_t ent = { .val = 0 };
+	int usage_count = 0;
 	bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON,
 					&mc.to->move_charge_at_immigrate);
 
-	if (!pte_present(ptent))
-		return 0;
-
-	page = vm_normal_page(vma, addr, ptent);
-	if (!page || !page_mapped(page))
-		return 0;
-	/*
-	 * TODO: We don't move charges of file(including shmem/tmpfs) pages for
-	 * now.
-	 */
-	if (!move_anon || !PageAnon(page))
-		return 0;
-	/*
-	 * TODO: We don't move charges of shared(used by multiple processes)
-	 * pages for now.
-	 */
-	if (page_mapcount(page) > 1)
-		return 0;
-	if (!get_page_unless_zero(page))
+	if (!pte_present(ptent)) {
+		/* TODO: handle swap of shmes/tmpfs */
+		if (pte_none(ptent) || pte_file(ptent))
+			return 0;
+		else if (is_swap_pte(ptent)) {
+			ent = pte_to_swp_entry(ptent);
+			if (!move_anon || non_swap_entry(ent))
+				return 0;
+			usage_count = mem_cgroup_count_swap_user(ent, &page);
+		}
+	} else {
+		page = vm_normal_page(vma, addr, ptent);
+		if (!page || !page_mapped(page))
+			return 0;
+		/*
+		 * TODO: We don't move charges of file(including shmem/tmpfs)
+		 * pages for now.
+		 */
+		if (!move_anon || !PageAnon(page))
+			return 0;
+		if (!get_page_unless_zero(page))
+			return 0;
+		usage_count = page_mapcount(page);
+	}
+	if (usage_count > 1) {
+		/*
+		 * TODO: We don't move charges of shared(used by multiple
+		 * processes) pages for now.
+		 */
+		if (page)
+			put_page(page);
 		return 0;
-
-	pc = lookup_page_cgroup(page);
-	/*
-	 * Do only loose check w/o page_cgroup lock. mem_cgroup_move_account()
-	 * checks the pc is valid or not under the lock.
-	 */
-	if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
-		ret = MC_TARGET_PAGE;
+	}
+	if (page) {
+		pc = lookup_page_cgroup(page);
+		/*
+		 * Do only loose check w/o page_cgroup lock.
+		 * mem_cgroup_move_account() checks the pc is valid or not under
+		 * the lock.
+		 */
+		if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+			ret = MC_TARGET_PAGE;
+			if (target)
+				target->page = page;
+		}
+		if (!ret || !target)
+			put_page(page);
+	}
+	/* throught */
+	if (ent.val && do_swap_account && !ret &&
+			css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
+		ret = MC_TARGET_SWAP;
 		if (target)
-			target->page = page;
+			target->ent = ent;
 	}
-
-	if (!ret || !target)
-		put_page(page);
-
 	return ret;
 }
 
@@ -3754,6 +3859,7 @@ retry:
 		int type;
 		struct page *page;
 		struct page_cgroup *pc;
+		swp_entry_t ent;
 
 		if (!mc.precharge)
 			break;
@@ -3775,6 +3881,11 @@ retry:
 put:			/* is_target_pte_for_mc() gets the page */
 			put_page(page);
 			break;
+		case MC_TARGET_SWAP:
+			ent = target.ent;
+			if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to))
+				mc.precharge--;
+			break;
 		default:
 			break;
 		}
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 3d535d594826..3dd88539a0e6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -334,6 +334,37 @@ not_enough_page:
 	return -ENOMEM;
 }
 
+/**
+ * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
+ * @end: swap entry to be cmpxchged
+ * @old: old id
+ * @new: new id
+ *
+ * Returns old id at success, 0 at failure.
+ * (There is no mem_cgroup useing 0 as its id)
+ */
+unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
+					unsigned short old, unsigned short new)
+{
+	int type = swp_type(ent);
+	unsigned long offset = swp_offset(ent);
+	unsigned long idx = offset / SC_PER_PAGE;
+	unsigned long pos = offset & SC_POS_MASK;
+	struct swap_cgroup_ctrl *ctrl;
+	struct page *mappage;
+	struct swap_cgroup *sc;
+
+	ctrl = &swap_cgroup_ctrl[type];
+
+	mappage = ctrl->map[idx];
+	sc = page_address(mappage);
+	sc += pos;
+	if (cmpxchg(&sc->id, old, new) == old)
+		return old;
+	else
+		return 0;
+}
+
 /**
  * swap_cgroup_record - record mem_cgroup for this swp_entry.
  * @ent: swap entry to be recorded into
@@ -358,8 +389,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 	mappage = ctrl->map[idx];
 	sc = page_address(mappage);
 	sc += pos;
-	old = sc->id;
-	sc->id = id;
+	old = xchg(&sc->id, id);
 
 	return old;
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 84374d8cf814..6cd0a8f90dc7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -723,6 +723,37 @@ int free_swap_and_cache(swp_entry_t entry)
 	return p != NULL;
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+/**
+ * mem_cgroup_count_swap_user - count the user of a swap entry
+ * @ent: the swap entry to be checked
+ * @pagep: the pointer for the swap cache page of the entry to be stored
+ *
+ * Returns the number of the user of the swap entry. The number is valid only
+ * for swaps of anonymous pages.
+ * If the entry is found on swap cache, the page is stored to pagep with
+ * refcount of it being incremented.
+ */
+int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
+{
+	struct page *page;
+	struct swap_info_struct *p;
+	int count = 0;
+
+	page = find_get_page(&swapper_space, ent.val);
+	if (page)
+		count += page_mapcount(page);
+	p = swap_info_get(ent);
+	if (p) {
+		count += swap_count(p->swap_map[swp_offset(ent)]);
+		spin_unlock(&swap_lock);
+	}
+
+	*pagep = page;
+	return count;
+}
+#endif
+
 #ifdef CONFIG_HIBERNATION
 /*
  * Find the swap type that corresponds to given device (if any).
-- 
cgit v1.2.3


From 0dea116876eefc9c7ca9c5d74fe665481e499fa3 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Wed, 10 Mar 2010 15:22:20 -0800
Subject: cgroup: implement eventfd-based generic API for notifications

This patchset introduces eventfd-based API for notifications in cgroups
and implements memory notifications on top of it.

It uses statistics in memory controler to track memory usage.

Output of time(1) on building kernel on tmpfs:

Root cgroup before changes:
	make -j2  506.37 user 60.93s system 193% cpu 4:52.77 total
Non-root cgroup before changes:
	make -j2  507.14 user 62.66s system 193% cpu 4:54.74 total
Root cgroup after changes (0 thresholds):
	make -j2  507.13 user 62.20s system 193% cpu 4:53.55 total
Non-root cgroup after changes (0 thresholds):
	make -j2  507.70 user 64.20s system 193% cpu 4:55.70 total
Root cgroup after changes (1 thresholds, never crossed):
	make -j2  506.97 user 62.20s system 193% cpu 4:53.90 total
Non-root cgroup after changes (1 thresholds, never crossed):
	make -j2  507.55 user 64.08s system 193% cpu 4:55.63 total

This patch:

Introduce the write-only file "cgroup.event_control" in every cgroup.

To register new notification handler you need:
- create an eventfd;
- open a control file to be monitored. Callbacks register_event() and
  unregister_event() must be defined for the control file;
- write "<event_fd> <control_fd> <args>" to cgroup.event_control.
  Interpretation of args is defined by control file implementation;

eventfd will be woken up by control file implementation or when the
cgroup is removed.

To unregister notification handler just close eventfd.

If you need notification functionality for a control file you have to
implement callbacks register_event() and unregister_event() in the
struct cftype.

[kamezawa.hiroyu@jp.fujitsu.com: Kconfig fix]
Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Dan Malek <dan@embeddedalley.com>
Cc: Vladislav Buzov <vbuzov@embeddedalley.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Alexander Shishkin <virtuoso@slind.org>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt |  20 ++++
 include/linux/cgroup.h            |  24 ++++
 init/Kconfig                      |   1 +
 kernel/cgroup.c                   | 228 +++++++++++++++++++++++++++++++++++++-
 4 files changed, 272 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index c0358c30c64f..fd588ff0e296 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -23,6 +23,7 @@ CONTENTS:
   2.1 Basic Usage
   2.2 Attaching processes
   2.3 Mounting hierarchies by name
+  2.4 Notification API
 3. Kernel API
   3.1 Overview
   3.2 Synchronization
@@ -435,6 +436,25 @@ you give a subsystem a name.
 The name of the subsystem appears as part of the hierarchy description
 in /proc/mounts and /proc/<pid>/cgroups.
 
+2.4 Notification API
+--------------------
+
+There is mechanism which allows to get notifications about changing
+status of a cgroup.
+
+To register new notification handler you need:
+ - create a file descriptor for event notification using eventfd(2);
+ - open a control file to be monitored (e.g. memory.usage_in_bytes);
+ - write "<event_fd> <control_fd> <args>" to cgroup.event_control.
+   Interpretation of args is defined by control file implementation;
+
+eventfd will be woken up by control file implementation or when the
+cgroup is removed.
+
+To unregister notification handler just close eventfd.
+
+NOTE: Support of notifications should be implemented for the control
+file. See documentation for the subsystem.
 
 3. Kernel API
 =============
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2a59d3101e5d..b4f2201321cd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -235,6 +235,10 @@ struct cgroup {
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
+
+	/* List of events which userspace want to recieve */
+	struct list_head event_list;
+	spinlock_t event_list_lock;
 };
 
 /*
@@ -378,6 +382,26 @@ struct cftype {
 	int (*trigger)(struct cgroup *cgrp, unsigned int event);
 
 	int (*release)(struct inode *inode, struct file *file);
+
+	/*
+	 * register_event() callback will be used to add new userspace
+	 * waiter for changes related to the cftype. Implement it if
+	 * you want to provide this functionality. Use eventfd_signal()
+	 * on eventfd to send notification to userspace.
+	 */
+	int (*register_event)(struct cgroup *cgrp, struct cftype *cft,
+			struct eventfd_ctx *eventfd, const char *args);
+	/*
+	 * unregister_event() callback will be called when userspace
+	 * closes the eventfd or on cgroup removing.
+	 * This callback must be implemented, if you want provide
+	 * notification functionality.
+	 *
+	 * Be careful. It can be called after destroy(), so you have
+	 * to keep all nesessary data, until all events are removed.
+	 */
+	int (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
+			struct eventfd_ctx *eventfd);
 };
 
 struct cgroup_scanner {
diff --git a/init/Kconfig b/init/Kconfig
index 089a230e5652..eb77e8ccde1c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -463,6 +463,7 @@ config HAVE_UNSTABLE_SCHED_CLOCK
 
 menuconfig CGROUPS
 	boolean "Control Group support"
+	depends on EVENTFD
 	help
 	  This option adds support for grouping sets of processes together, for
 	  use with process control subsystems such as Cpusets, CFS, memory
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1bf4d6db54ab..ea94984a3895 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4,6 +4,10 @@
  *  Based originally on the cpuset system, extracted by Paul Menage
  *  Copyright (C) 2006 Google, Inc
  *
+ *  Notifications support
+ *  Copyright (C) 2009 Nokia Corporation
+ *  Author: Kirill A. Shutemov
+ *
  *  Copyright notices from the original cpuset code:
  *  --------------------------------------------------
  *  Copyright (C) 2003 BULL SA.
@@ -53,6 +57,8 @@
 #include <linux/pid_namespace.h>
 #include <linux/idr.h>
 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
+#include <linux/eventfd.h>
+#include <linux/poll.h>
 
 #include <asm/atomic.h>
 
@@ -152,6 +158,35 @@ struct css_id {
 	unsigned short stack[0]; /* Array of Length (depth+1) */
 };
 
+/*
+ * cgroup_event represents events which userspace want to recieve.
+ */
+struct cgroup_event {
+	/*
+	 * Cgroup which the event belongs to.
+	 */
+	struct cgroup *cgrp;
+	/*
+	 * Control file which the event associated.
+	 */
+	struct cftype *cft;
+	/*
+	 * eventfd to signal userspace about the event.
+	 */
+	struct eventfd_ctx *eventfd;
+	/*
+	 * Each of these stored in a list by the cgroup.
+	 */
+	struct list_head list;
+	/*
+	 * All fields below needed to unregister event when
+	 * userspace closes eventfd.
+	 */
+	poll_table pt;
+	wait_queue_head_t *wqh;
+	wait_queue_t wait;
+	struct work_struct remove;
+};
 
 /* The list of hierarchy roots */
 
@@ -760,14 +795,28 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
 static int cgroup_call_pre_destroy(struct cgroup *cgrp)
 {
 	struct cgroup_subsys *ss;
+	struct cgroup_event *event, *tmp;
 	int ret = 0;
 
 	for_each_subsys(cgrp->root, ss)
 		if (ss->pre_destroy) {
 			ret = ss->pre_destroy(ss, cgrp);
 			if (ret)
-				break;
+				goto out;
 		}
+
+	/*
+	 * Unregister events and notify userspace.
+	 */
+	spin_lock(&cgrp->event_list_lock);
+	list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
+		list_del(&event->list);
+		eventfd_signal(event->eventfd, 1);
+		schedule_work(&event->remove);
+	}
+	spin_unlock(&cgrp->event_list_lock);
+
+out:
 	return ret;
 }
 
@@ -1239,6 +1288,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
+	INIT_LIST_HEAD(&cgrp->event_list);
+	spin_lock_init(&cgrp->event_list_lock);
 }
 
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -2077,6 +2128,16 @@ static const struct inode_operations cgroup_dir_inode_operations = {
 	.rename = cgroup_rename,
 };
 
+/*
+ * Check if a file is a control file
+ */
+static inline struct cftype *__file_cft(struct file *file)
+{
+	if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
+		return ERR_PTR(-EINVAL);
+	return __d_cft(file->f_dentry);
+}
+
 static int cgroup_create_file(struct dentry *dentry, mode_t mode,
 				struct super_block *sb)
 {
@@ -2930,6 +2991,166 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
 	return 0;
 }
 
+/*
+ * Unregister event and free resources.
+ *
+ * Gets called from workqueue.
+ */
+static void cgroup_event_remove(struct work_struct *work)
+{
+	struct cgroup_event *event = container_of(work, struct cgroup_event,
+			remove);
+	struct cgroup *cgrp = event->cgrp;
+
+	/* TODO: check return code */
+	event->cft->unregister_event(cgrp, event->cft, event->eventfd);
+
+	eventfd_ctx_put(event->eventfd);
+	remove_wait_queue(event->wqh, &event->wait);
+	kfree(event);
+}
+
+/*
+ * Gets called on POLLHUP on eventfd when user closes it.
+ *
+ * Called with wqh->lock held and interrupts disabled.
+ */
+static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
+		int sync, void *key)
+{
+	struct cgroup_event *event = container_of(wait,
+			struct cgroup_event, wait);
+	struct cgroup *cgrp = event->cgrp;
+	unsigned long flags = (unsigned long)key;
+
+	if (flags & POLLHUP) {
+		spin_lock(&cgrp->event_list_lock);
+		list_del(&event->list);
+		spin_unlock(&cgrp->event_list_lock);
+		/*
+		 * We are in atomic context, but cgroup_event_remove() may
+		 * sleep, so we have to call it in workqueue.
+		 */
+		schedule_work(&event->remove);
+	}
+
+	return 0;
+}
+
+static void cgroup_event_ptable_queue_proc(struct file *file,
+		wait_queue_head_t *wqh, poll_table *pt)
+{
+	struct cgroup_event *event = container_of(pt,
+			struct cgroup_event, pt);
+
+	event->wqh = wqh;
+	add_wait_queue(wqh, &event->wait);
+}
+
+/*
+ * Parse input and register new cgroup event handler.
+ *
+ * Input must be in format '<event_fd> <control_fd> <args>'.
+ * Interpretation of args is defined by control file implementation.
+ */
+static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
+				      const char *buffer)
+{
+	struct cgroup_event *event = NULL;
+	unsigned int efd, cfd;
+	struct file *efile = NULL;
+	struct file *cfile = NULL;
+	char *endp;
+	int ret;
+
+	efd = simple_strtoul(buffer, &endp, 10);
+	if (*endp != ' ')
+		return -EINVAL;
+	buffer = endp + 1;
+
+	cfd = simple_strtoul(buffer, &endp, 10);
+	if ((*endp != ' ') && (*endp != '\0'))
+		return -EINVAL;
+	buffer = endp + 1;
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event)
+		return -ENOMEM;
+	event->cgrp = cgrp;
+	INIT_LIST_HEAD(&event->list);
+	init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
+	init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
+	INIT_WORK(&event->remove, cgroup_event_remove);
+
+	efile = eventfd_fget(efd);
+	if (IS_ERR(efile)) {
+		ret = PTR_ERR(efile);
+		goto fail;
+	}
+
+	event->eventfd = eventfd_ctx_fileget(efile);
+	if (IS_ERR(event->eventfd)) {
+		ret = PTR_ERR(event->eventfd);
+		goto fail;
+	}
+
+	cfile = fget(cfd);
+	if (!cfile) {
+		ret = -EBADF;
+		goto fail;
+	}
+
+	/* the process need read permission on control file */
+	ret = file_permission(cfile, MAY_READ);
+	if (ret < 0)
+		goto fail;
+
+	event->cft = __file_cft(cfile);
+	if (IS_ERR(event->cft)) {
+		ret = PTR_ERR(event->cft);
+		goto fail;
+	}
+
+	if (!event->cft->register_event || !event->cft->unregister_event) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	ret = event->cft->register_event(cgrp, event->cft,
+			event->eventfd, buffer);
+	if (ret)
+		goto fail;
+
+	if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
+		event->cft->unregister_event(cgrp, event->cft, event->eventfd);
+		ret = 0;
+		goto fail;
+	}
+
+	spin_lock(&cgrp->event_list_lock);
+	list_add(&event->list, &cgrp->event_list);
+	spin_unlock(&cgrp->event_list_lock);
+
+	fput(cfile);
+	fput(efile);
+
+	return 0;
+
+fail:
+	if (cfile)
+		fput(cfile);
+
+	if (event && event->eventfd && !IS_ERR(event->eventfd))
+		eventfd_ctx_put(event->eventfd);
+
+	if (!IS_ERR_OR_NULL(efile))
+		fput(efile);
+
+	kfree(event);
+
+	return ret;
+}
+
 /*
  * for the common functions, 'private' gives the type of file
  */
@@ -2955,6 +3176,11 @@ static struct cftype files[] = {
 		.read_u64 = cgroup_read_notify_on_release,
 		.write_u64 = cgroup_write_notify_on_release,
 	},
+	{
+		.name = CGROUP_FILE_GENERIC_PREFIX "event_control",
+		.write_string = cgroup_write_event_control,
+		.mode = S_IWUGO,
+	},
 };
 
 static struct cftype cft_release_agent = {
-- 
cgit v1.2.3


From a0a4db548edcce067c1201ef25cf2bc29f32dca4 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Wed, 10 Mar 2010 15:22:34 -0800
Subject: cgroups: remove events before destroying subsystem state objects

Events should be removed after rmdir of cgroup directory, but before
destroying subsystem state objects.  Let's take reference to cgroup
directory dentry to do that.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Dan Malek <dan@embeddedalley.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 3 ---
 kernel/cgroup.c        | 8 ++++++++
 mm/memcontrol.c        | 9 ---------
 3 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b4f2201321cd..b8ad1ea99586 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -396,9 +396,6 @@ struct cftype {
 	 * closes the eventfd or on cgroup removing.
 	 * This callback must be implemented, if you want provide
 	 * notification functionality.
-	 *
-	 * Be careful. It can be called after destroy(), so you have
-	 * to keep all nesessary data, until all events are removed.
 	 */
 	int (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
 			struct eventfd_ctx *eventfd);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 87441fc75663..ef909a329750 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2994,6 +2994,7 @@ static void cgroup_event_remove(struct work_struct *work)
 
 	eventfd_ctx_put(event->eventfd);
 	kfree(event);
+	dput(cgrp->dentry);
 }
 
 /*
@@ -3114,6 +3115,13 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
 		goto fail;
 	}
 
+	/*
+	 * Events should be removed after rmdir of cgroup directory, but before
+	 * destroying subsystem state objects. Let's take reference to cgroup
+	 * directory dentry to do that.
+	 */
+	dget(cgrp->dentry);
+
 	spin_lock(&cgrp->event_list_lock);
 	list_add(&event->list, &cgrp->event_list);
 	spin_unlock(&cgrp->event_list_lock);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f9ae4b4c36eb..f7b910fc14fb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3361,12 +3361,6 @@ static int mem_cgroup_register_event(struct cgroup *cgrp, struct cftype *cft,
 		}
 	}
 
-	/*
-	 * We need to increment refcnt to be sure that all thresholds
-	 * will be unregistered before calling __mem_cgroup_free()
-	 */
-	mem_cgroup_get(memcg);
-
 	if (type == _MEM)
 		rcu_assign_pointer(memcg->thresholds, thresholds_new);
 	else
@@ -3460,9 +3454,6 @@ assign:
 	/* To be sure that nobody uses thresholds before freeing it */
 	synchronize_rcu();
 
-	for (i = 0; i < thresholds->size - size; i++)
-		mem_cgroup_put(memcg);
-
 	kfree(thresholds);
 unlock:
 	mutex_unlock(&memcg->thresholds_lock);
-- 
cgit v1.2.3


From 867578cbccb0893cc14fc29c670f7185809c90d6 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 10 Mar 2010 15:22:39 -0800
Subject: memcg: fix oom kill behavior

In current page-fault code,

	handle_mm_fault()
		-> ...
		-> mem_cgroup_charge()
		-> map page or handle error.
	-> check return code.

If page fault's return code is VM_FAULT_OOM, page_fault_out_of_memory() is
called.  But if it's caused by memcg, OOM should have been already
invoked.

Then, I added a patch: a636b327f731143ccc544b966cfd8de6cb6d72c6.  That
patch records last_oom_jiffies for memcg's sub-hierarchy and prevents
page_fault_out_of_memory from being invoked in near future.

But Nishimura-san reported that check by jiffies is not enough when the
system is terribly heavy.

This patch changes memcg's oom logic as.
 * If memcg causes OOM-kill, continue to retry.
 * remove jiffies check which is used now.
 * add memcg-oom-lock which works like perzone oom lock.
 * If current is killed(as a process), bypass charge.

Something more sophisticated can be added but this pactch does
fundamental things.
TODO:
 - add oom notifier
 - add permemcg disable-oom-kill flag and freezer at oom.
 - more chances for wake up oom waiter (when changing memory limit etc..)

Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |   6 --
 mm/memcontrol.c            | 134 ++++++++++++++++++++++++++++++++++++---------
 mm/oom_kill.c              |   8 ---
 3 files changed, 107 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1f9b119f4ace..44301c6affa8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -124,7 +124,6 @@ static inline bool mem_cgroup_disabled(void)
 	return false;
 }
 
-extern bool mem_cgroup_oom_called(struct task_struct *task);
 void mem_cgroup_update_file_mapped(struct page *page, int val);
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask, int nid,
@@ -258,11 +257,6 @@ static inline bool mem_cgroup_disabled(void)
 	return true;
 }
 
-static inline bool mem_cgroup_oom_called(struct task_struct *task)
-{
-	return false;
-}
-
 static inline int
 mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f7b910fc14fb..7973b5221fb8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -203,7 +203,7 @@ struct mem_cgroup {
 	 * Should the accounting and control be hierarchical, per subtree?
 	 */
 	bool use_hierarchy;
-	unsigned long	last_oom_jiffies;
+	atomic_t	oom_lock;
 	atomic_t	refcnt;
 
 	unsigned int	swappiness;
@@ -1246,32 +1246,102 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 	return total;
 }
 
-bool mem_cgroup_oom_called(struct task_struct *task)
+static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data)
 {
-	bool ret = false;
-	struct mem_cgroup *mem;
-	struct mm_struct *mm;
+	int *val = (int *)data;
+	int x;
+	/*
+	 * Logically, we can stop scanning immediately when we find
+	 * a memcg is already locked. But condidering unlock ops and
+	 * creation/removal of memcg, scan-all is simple operation.
+	 */
+	x = atomic_inc_return(&mem->oom_lock);
+	*val = max(x, *val);
+	return 0;
+}
+/*
+ * Check OOM-Killer is already running under our hierarchy.
+ * If someone is running, return false.
+ */
+static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
+{
+	int lock_count = 0;
 
-	rcu_read_lock();
-	mm = task->mm;
-	if (!mm)
-		mm = &init_mm;
-	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10))
-		ret = true;
-	rcu_read_unlock();
-	return ret;
+	mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb);
+
+	if (lock_count == 1)
+		return true;
+	return false;
 }
 
-static int record_last_oom_cb(struct mem_cgroup *mem, void *data)
+static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data)
 {
-	mem->last_oom_jiffies = jiffies;
+	/*
+	 * When a new child is created while the hierarchy is under oom,
+	 * mem_cgroup_oom_lock() may not be called. We have to use
+	 * atomic_add_unless() here.
+	 */
+	atomic_add_unless(&mem->oom_lock, -1, 0);
 	return 0;
 }
 
-static void record_last_oom(struct mem_cgroup *mem)
+static void mem_cgroup_oom_unlock(struct mem_cgroup *mem)
 {
-	mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
+	mem_cgroup_walk_tree(mem, NULL,	mem_cgroup_oom_unlock_cb);
+}
+
+static DEFINE_MUTEX(memcg_oom_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
+
+/*
+ * try to call OOM killer. returns false if we should exit memory-reclaim loop.
+ */
+bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
+{
+	DEFINE_WAIT(wait);
+	bool locked;
+
+	/* At first, try to OOM lock hierarchy under mem.*/
+	mutex_lock(&memcg_oom_mutex);
+	locked = mem_cgroup_oom_lock(mem);
+	/*
+	 * Even if signal_pending(), we can't quit charge() loop without
+	 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
+	 * under OOM is always welcomed, use TASK_KILLABLE here.
+	 */
+	if (!locked)
+		prepare_to_wait(&memcg_oom_waitq, &wait, TASK_KILLABLE);
+	mutex_unlock(&memcg_oom_mutex);
+
+	if (locked)
+		mem_cgroup_out_of_memory(mem, mask);
+	else {
+		schedule();
+		finish_wait(&memcg_oom_waitq, &wait);
+	}
+	mutex_lock(&memcg_oom_mutex);
+	mem_cgroup_oom_unlock(mem);
+	/*
+	 * Here, we use global waitq .....more fine grained waitq ?
+	 * Assume following hierarchy.
+	 * A/
+	 *   01
+	 *   02
+	 * assume OOM happens both in A and 01 at the same time. Tthey are
+	 * mutually exclusive by lock. (kill in 01 helps A.)
+	 * When we use per memcg waitq, we have to wake up waiters on A and 02
+	 * in addtion to waiters on 01. We use global waitq for avoiding mess.
+	 * It will not be a big problem.
+	 * (And a task may be moved to other groups while it's waiting for OOM.)
+	 */
+	wake_up_all(&memcg_oom_waitq);
+	mutex_unlock(&memcg_oom_mutex);
+
+	if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
+		return false;
+	/* Give chance to dying process */
+	schedule_timeout(1);
+	return true;
 }
 
 /*
@@ -1443,11 +1513,14 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	struct res_counter *fail_res;
 	int csize = CHARGE_SIZE;
 
-	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
-		/* Don't account this! */
-		*memcg = NULL;
-		return 0;
-	}
+	/*
+	 * Unlike gloval-vm's OOM-kill, we're not in memory shortage
+	 * in system level. So, allow to go ahead dying process in addition to
+	 * MEMDIE process.
+	 */
+	if (unlikely(test_thread_flag(TIF_MEMDIE)
+		     || fatal_signal_pending(current)))
+		goto bypass;
 
 	/*
 	 * We always charge the cgroup the mm_struct belongs to.
@@ -1560,11 +1633,15 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		}
 
 		if (!nr_retries--) {
-			if (oom) {
-				mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
-				record_last_oom(mem_over_limit);
+			if (!oom)
+				goto nomem;
+			if (mem_cgroup_handle_oom(mem_over_limit, gfp_mask)) {
+				nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+				continue;
 			}
-			goto nomem;
+			/* When we reach here, current task is dying .*/
+			css_put(&mem->css);
+			goto bypass;
 		}
 	}
 	if (csize > PAGE_SIZE)
@@ -1574,6 +1651,9 @@ done:
 nomem:
 	css_put(&mem->css);
 	return -ENOMEM;
+bypass:
+	*memcg = NULL;
+	return 0;
 }
 
 /*
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 71d10bf52dc8..9b223af6a147 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -603,13 +603,6 @@ void pagefault_out_of_memory(void)
 		/* Got some memory back in the last second. */
 		return;
 
-	/*
-	 * If this is from memcg, oom-killer is already invoked.
-	 * and not worth to go system-wide-oom.
-	 */
-	if (mem_cgroup_oom_called(current))
-		goto rest_and_return;
-
 	if (sysctl_panic_on_oom)
 		panic("out of memory from page fault. panic_on_oom is selected.\n");
 
@@ -621,7 +614,6 @@ void pagefault_out_of_memory(void)
 	 * Give "p" a good chance of killing itself before we
 	 * retry to allocate memory.
 	 */
-rest_and_return:
 	if (!test_thread_flag(TIF_MEMDIE))
 		schedule_timeout_uninterruptible(1);
 }
-- 
cgit v1.2.3


From 7baab93f9297da3e42a8cecfbf91d5f22f415500 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 10 Mar 2010 15:22:42 -0800
Subject: nodemask: fix the declaration of NODEMASK_ALLOC()

we can't declarate two variable at the same scope by NODEMASK_ALLOC().

This patch fixes it.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Paul Menage <menage@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index c4fa64b585ff..dba35e413371 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -483,7 +483,7 @@ static inline int num_node_state(enum node_states state)
 			type *name = kmalloc(sizeof(*name), gfp_flags)
 #define NODEMASK_FREE(m)			kfree(m)
 #else
-#define NODEMASK_ALLOC(type, name, gfp_flags)	type _name, *name = &_name
+#define NODEMASK_ALLOC(type, name, gfp_flags)	type _##name, *name = &_##name
 #define NODEMASK_FREE(m)			do {} while (0)
 #endif
 
-- 
cgit v1.2.3


From dacbe41f776db0a5a9aee1e41594f405c95778a5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:22:46 -0800
Subject: ptrace: move user_enable_single_step & co prototypes to
 linux/ptrace.h

While in theory user_enable_single_step/user_disable_single_step/
user_enable_blockstep could also be provided as an inline or macro there's
no good reason to do so, and having the prototype in one places keeps code
size and confusion down.

Roland said:

  The original thought there was that user_enable_single_step() et al
  might well be only an instruction or three on a sane machine (as if we
  have any of those!), and since there is only one call site inlining
  would be beneficial.  But I agree that there is no strong reason to care
  about inlining it.

  As to the arch changes, there is only one thought I'd add to the
  record.  It was always my thinking that for an arch where
  PTRACE_SINGLESTEP does text-modifying breakpoint insertion,
  user_enable_single_step() should not be provided.  That is,
  arch_has_single_step()=>true means that there is an arch facility with
  "pure" semantics that does not have any unexpected side effects.
  Inserting a breakpoint might do very unexpected strange things in
  multi-threaded situations.  Aside from that, it is a peculiar side
  effect that user_{enable,disable}_single_step() should cause COW
  de-sharing of text pages and so forth.  For PTRACE_SINGLESTEP, all these
  peculiarities are the status quo ante for that arch, so having
  arch_ptrace() itself do those is one thing.  But for building other
  things in the future, it is nicer to have a uniform "pure" semantics
  that arch-independent code can expect.

  OTOH, all such arch issues are really up to the arch maintainer.  As
  of today, there is nothing but ptrace using user_enable_single_step() et
  al so it's a distinction without a practical difference.  If/when there
  are other facilities that use user_enable_single_step() and might care,
  the affected arch's can revisit the question when someone cares about
  the quality of the arch support for said new facility.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/include/asm/ptrace.h     | 2 --
 arch/ia64/include/asm/ptrace.h    | 4 ----
 arch/m68k/include/asm/ptrace.h    | 8 --------
 arch/mn10300/include/asm/ptrace.h | 2 --
 arch/parisc/include/asm/ptrace.h  | 5 -----
 arch/powerpc/include/asm/ptrace.h | 7 -------
 arch/s390/include/asm/ptrace.h    | 3 ---
 arch/score/include/asm/ptrace.h   | 3 +--
 arch/sh/include/asm/ptrace.h      | 2 --
 arch/x86/include/asm/ptrace.h     | 7 -------
 include/linux/ptrace.h            | 5 +++++
 11 files changed, 6 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/frv/include/asm/ptrace.h b/arch/frv/include/asm/ptrace.h
index a54b535c9e49..6bfad4cf1907 100644
--- a/arch/frv/include/asm/ptrace.h
+++ b/arch/frv/include/asm/ptrace.h
@@ -84,8 +84,6 @@ extern void show_regs(struct pt_regs *);
 #define task_pt_regs(task) ((task)->thread.frame0)
 
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index 14055c636adf..7ae9c3f15a1c 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -319,11 +319,7 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
 	ptrace_attach_sync_user_rbs(child)
 
   #define arch_has_single_step()  (1)
-  extern void user_enable_single_step(struct task_struct *);
-  extern void user_disable_single_step(struct task_struct *);
-
   #define arch_has_block_step()   (1)
-  extern void user_enable_block_step(struct task_struct *);
 
 #endif /* !__KERNEL__ */
 
diff --git a/arch/m68k/include/asm/ptrace.h b/arch/m68k/include/asm/ptrace.h
index 21605c736f69..6e6e3ac1d913 100644
--- a/arch/m68k/include/asm/ptrace.h
+++ b/arch/m68k/include/asm/ptrace.h
@@ -87,18 +87,10 @@ struct switch_stack {
 #define profile_pc(regs) instruction_pointer(regs)
 extern void show_regs(struct pt_regs *);
 
-/*
- * These are defined as per linux/ptrace.h.
- */
-struct task_struct;
-
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 
 #ifdef CONFIG_MMU
 #define arch_has_block_step()	(1)
-extern void user_enable_block_step(struct task_struct *);
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h
index 1b0ba5e182b0..7c2e911052b6 100644
--- a/arch/mn10300/include/asm/ptrace.h
+++ b/arch/mn10300/include/asm/ptrace.h
@@ -99,8 +99,6 @@ struct task_struct;
 extern void show_regs(struct pt_regs *);
 
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 
 #endif  /*  !__ASSEMBLY  */
 
diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index aead40b16dd8..7f09533da771 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -47,13 +47,8 @@ struct pt_regs {
 
 #define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
 
-struct task_struct;
 #define arch_has_single_step()	1
-void user_disable_single_step(struct task_struct *task);
-void user_enable_single_step(struct task_struct *task);
-
 #define arch_has_block_step()	1
-void user_enable_block_step(struct task_struct *task);
 
 /* XXX should we use iaoq[1] or iaoq[0] ? */
 #define user_mode(regs)			(((regs)->iaoq[0] & 3) ? 1 : 0)
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index b45108126562..9e2d84c06b74 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -137,15 +137,8 @@ do {									      \
 } while (0)
 #endif /* __powerpc64__ */
 
-/*
- * These are defined as per linux/ptrace.h, which see.
- */
 #define arch_has_single_step()	(1)
 #define arch_has_block_step()	(!cpu_has_feature(CPU_FTR_601))
-extern void user_enable_single_step(struct task_struct *);
-extern void user_enable_block_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
-
 #define ARCH_HAS_USER_SINGLE_STEP_INFO
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index dd2d913afcae..fef9b33cdd59 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -489,9 +489,6 @@ struct user_regs_struct
  * These are defined as per linux/ptrace.h, which see.
  */
 #define arch_has_single_step()	(1)
-struct task_struct;
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 extern void show_regs(struct pt_regs * regs);
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
diff --git a/arch/score/include/asm/ptrace.h b/arch/score/include/asm/ptrace.h
index d40e691f23e2..e89dc9b1ef49 100644
--- a/arch/score/include/asm/ptrace.h
+++ b/arch/score/include/asm/ptrace.h
@@ -90,8 +90,7 @@ extern int read_tsk_short(struct task_struct *, unsigned long,
 			 unsigned short *);
 
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_SCORE_PTRACE_H */
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index e11b14ea2c43..2168fde25611 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -123,8 +123,6 @@ extern void show_regs(struct pt_regs *);
 struct task_struct;
 
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 
 struct perf_event;
 struct perf_sample_data;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 20102808b191..69a686a7dff0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,14 +274,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 		return 0;
 }
 
-/*
- * These are defined as per linux/ptrace.h, which see.
- */
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
-
-extern void user_enable_block_step(struct task_struct *);
 #ifdef CONFIG_X86_DEBUGCTLMSR
 #define arch_has_block_step()	(1)
 #else
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index c5eab89da51e..e1fb60729979 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -264,6 +264,9 @@ static inline void user_enable_single_step(struct task_struct *task)
 static inline void user_disable_single_step(struct task_struct *task)
 {
 }
+#else
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
 #endif	/* arch_has_single_step */
 
 #ifndef arch_has_block_step
@@ -291,6 +294,8 @@ static inline void user_enable_block_step(struct task_struct *task)
 {
 	BUG();			/* This can never be called.  */
 }
+#else
+extern void user_enable_block_step(struct task_struct *);
 #endif	/* arch_has_block_step */
 
 #ifdef ARCH_HAS_USER_SINGLE_STEP_INFO
-- 
cgit v1.2.3


From 4dd66e69d472f0ba5355a2529364d0db9a18a02b Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 10 Mar 2010 15:23:02 -0800
Subject: copy_signal() cleanup: kill taskstats_tgid_init() and
 acct_init_pacct()

Kill unused functions taskstats_tgid_init() and acct_init_pacct() because
we don't use them anywhere after using kmem_cache_zalloc() in
copy_signal().

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acct.h           |  2 --
 include/linux/taskstats_kern.h |  7 -------
 kernel/acct.c                  | 10 ----------
 3 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 882dc7248766..93f46096ad4c 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -123,14 +123,12 @@ struct pacct_struct;
 struct pid_namespace;
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
-extern void acct_init_pacct(struct pacct_struct *pacct);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
 extern void acct_exit_ns(struct pid_namespace *);
 #else
 #define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
-#define acct_init_pacct(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
 #define acct_exit_ns(ns)	do { } while (0)
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 3398f4553269..b6523c1427ce 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -14,11 +14,6 @@
 extern struct kmem_cache *taskstats_cache;
 extern struct mutex taskstats_exit_mutex;
 
-static inline void taskstats_tgid_init(struct signal_struct *sig)
-{
-	sig->stats = NULL;
-}
-
 static inline void taskstats_tgid_free(struct signal_struct *sig)
 {
 	if (sig->stats)
@@ -30,8 +25,6 @@ extern void taskstats_init_early(void);
 #else
 static inline void taskstats_exit(struct task_struct *tsk, int group_dead)
 {}
-static inline void taskstats_tgid_init(struct signal_struct *sig)
-{}
 static inline void taskstats_tgid_free(struct signal_struct *sig)
 {}
 static inline void taskstats_init_early(void)
diff --git a/kernel/acct.c b/kernel/acct.c
index a6605ca921b6..24f8c81fc48d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -587,16 +587,6 @@ out:
 	revert_creds(orig_cred);
 }
 
-/**
- * acct_init_pacct - initialize a new pacct_struct
- * @pacct: per-process accounting info struct to initialize
- */
-void acct_init_pacct(struct pacct_struct *pacct)
-{
-	memset(pacct, 0, sizeof(struct pacct_struct));
-	pacct->ac_utime = pacct->ac_stime = cputime_zero;
-}
-
 /**
  * acct_collect - collect accounting information into pacct_struct
  * @exitcode: task exit code
-- 
cgit v1.2.3


From 93c59907c6f247d09239135caecf294a106a2ae0 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 10 Mar 2010 15:23:03 -0800
Subject: copy_signal() cleanup: clean thread_group_cputime_init()

Remove unneeded initializations in thread_group_cputime_init() and in
posix_cpu_timers_init_group().  They are useless after kmem_cache_zalloc()
was used in copy_signal().

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 --
 kernel/fork.c         | 11 -----------
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 46c6f8d5dc06..ca635c128482 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2391,9 +2391,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
 static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
-	sig->cputimer.cputime = INIT_CPUTIME;
 	spin_lock_init(&sig->cputimer.lock);
-	sig->cputimer.running = 0;
 }
 
 static inline void thread_group_cputime_free(struct signal_struct *sig)
diff --git a/kernel/fork.c b/kernel/fork.c
index ce2666f84d85..1beb6c303c41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -833,17 +833,6 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	/* Thread group counters. */
 	thread_group_cputime_init(sig);
 
-	/* Expiration times and increments. */
-	sig->it[CPUCLOCK_PROF].expires = cputime_zero;
-	sig->it[CPUCLOCK_PROF].incr = cputime_zero;
-	sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
-	sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
-
-	/* Cached expiration times. */
-	sig->cputime_expires.prof_exp = cputime_zero;
-	sig->cputime_expires.virt_exp = cputime_zero;
-	sig->cputime_expires.sched_exp = 0;
-
 	cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 	if (cpu_limit != RLIM_INFINITY) {
 		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
-- 
cgit v1.2.3


From 6edb6764409392836b44a61b06d94954efd6200f Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Wed, 10 Mar 2010 15:23:07 -0800
Subject: ipmi: remove ipmi_smi.h self-include

There is no need for linux/ipmi_smi.h to include itself.

Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipmi_smi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index f7c9c75a2775..4b48318ac542 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -39,7 +39,6 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
-#include <linux/ipmi_smi.h>
 
 /* This files describes the interface for IPMI system management interface
    drivers to bind into the IPMI message handler. */
-- 
cgit v1.2.3


From 8467005da3ef6104b89a4cc5e9c9d9445b75565f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 10 Mar 2010 15:23:10 -0800
Subject: nsproxy: remove INIT_NSPROXY()

Remove INIT_NSPROXY(), use C99 initializer.
Remove INIT_IPC_NS(), INIT_NET_NS() while I'm at it.

Note: headers trim will be done later, now it's quite pointless because
results will be invalidated by merge window.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init_task.h     |  8 --------
 include/linux/ipc_namespace.h |  5 -----
 include/net/net_namespace.h   |  5 -----
 kernel/nsproxy.c              | 13 ++++++++++++-
 4 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index abec69b63d7e..b1ed1cd8e2a8 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -32,14 +32,6 @@ extern struct fs_struct init_fs;
 }
 
 extern struct nsproxy init_nsproxy;
-#define INIT_NSPROXY(nsproxy) {						\
-	.pid_ns		= &init_pid_ns,					\
-	.count		= ATOMIC_INIT(1),				\
-	.uts_ns		= &init_uts_ns,					\
-	.mnt_ns		= NULL,						\
-	INIT_NET_NS(net_ns)                                             \
-	INIT_IPC_NS(ipc_ns)						\
-}
 
 #define INIT_SIGHAND(sighand) {						\
 	.count		= ATOMIC_INIT(1), 				\
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 07baa38bce37..51952989ad42 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -62,11 +62,6 @@ extern struct ipc_namespace init_ipc_ns;
 extern atomic_t nr_ipc_ns;
 
 extern spinlock_t mq_lock;
-#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
-#define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
-#else
-#define INIT_IPC_NS(ns)
-#endif
 
 #ifdef CONFIG_SYSVIPC
 extern int register_ipcns_notifier(struct ipc_namespace *);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 82b7be4db89a..bd10a7908993 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -100,14 +100,9 @@ struct net {
 extern struct net init_net;
 
 #ifdef CONFIG_NET
-#define INIT_NET_NS(net_ns) .net_ns = &init_net,
-
 extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns);
 
 #else /* CONFIG_NET */
-
-#define INIT_NET_NS(net_ns)
-
 static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
 {
 	/* There is nothing to copy so this is a noop */
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 09b4ff9711b2..2ab67233ee8f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -24,7 +24,18 @@
 
 static struct kmem_cache *nsproxy_cachep;
 
-struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+struct nsproxy init_nsproxy = {
+	.count	= ATOMIC_INIT(1),
+	.uts_ns	= &init_uts_ns,
+#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
+	.ipc_ns	= &init_ipc_ns,
+#endif
+	.mnt_ns	= NULL,
+	.pid_ns	= &init_pid_ns,
+#ifdef CONFIG_NET
+	.net_ns	= &init_net,
+#endif
+};
 
 static inline struct nsproxy *create_nsproxy(void)
 {
-- 
cgit v1.2.3


From 5637f2df8d56b64697c1ee5c96cf0d6b650b30cb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:21 -0800
Subject: pci-dma: add include/linux/pci-dma.h

This patch adds include/linux/pci-dma.h that defines the pci_unmap state
API:

DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
DECLARE_PCI_UNMAP_LEN(LEN_NAME)
pci_unmap_addr(PTR, ADDR_NAME)
pci_unmap_addr_set(PTR, ADDR_NAME, VAL)
pci_unmap_len(PTR, LEN_NAME)
pci_unmap_len_set(PTR, LEN_NAME, VAL)

This enables us to remove lots of the duplication in architecture
implementations since there are only two ways to define the API.

If architectures define CONFIG_NEED_DMA_MAP_STATE, they get the real
definition of pci_unmap state API.  If not, they get the noop definition.

In the long term, it's better to replace the API with the generic device
model API such as DECLARE_DMA_UNMAP_ADDR.  We can map the API to the
generic one (like dma-mapping-compat.h does).  This patch also makes the
migration process easier.  We can remove this file after the migration.

It might be simpler to add the API to include/linux/pci.h but looks it's
already too large.  We'll remove pci-dma.h after finishing moving to the
generic device model.  So I put the API to a separate file.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <James.Bottomley@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Chris Zankel <chris@zankel.net>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci-dma.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/linux/pci-dma.h

(limited to 'include/linux')

diff --git a/include/linux/pci-dma.h b/include/linux/pci-dma.h
new file mode 100644
index 000000000000..cfd63ab09abc
--- /dev/null
+++ b/include/linux/pci-dma.h
@@ -0,0 +1,20 @@
+#ifndef _LINUX_PCI_DMA_H
+#define _LINUX_PCI_DMA_H
+
+#ifdef CONFIG_NEED_DMA_MAP_STATE
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME;
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME;
+#define pci_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)  (((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME)             ((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)    (((PTR)->LEN_NAME) = (VAL))
+#else
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME)           (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)  do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME)             (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)    do { } while (0)
+#endif
+
+#endif
-- 
cgit v1.2.3


From f41b177157718abe9a93868bb76e47d4a6f3681d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:30 -0800
Subject: pci-dma: add linux/pci-dma.h to linux/pci.h

All the architectures properly set NEED_DMA_MAP_STATE now so we can safely
add linux/pci-dma.h to linux/pci.h and remove the linux/pci-dma.h
inclusion in arch's asm/pci.h

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/pci.h   | 2 --
 arch/arm/include/asm/pci.h     | 2 --
 arch/cris/include/asm/pci.h    | 2 --
 arch/frv/include/asm/pci.h     | 2 --
 arch/ia64/include/asm/pci.h    | 2 --
 arch/mips/include/asm/pci.h    | 2 --
 arch/parisc/include/asm/pci.h  | 2 --
 arch/powerpc/include/asm/pci.h | 2 --
 arch/sh/include/asm/pci.h      | 2 --
 arch/sparc/include/asm/pci.h   | 2 --
 arch/x86/include/asm/pci.h     | 2 --
 arch/xtensa/include/asm/pci.h  | 2 --
 include/linux/pci.h            | 1 +
 13 files changed, 1 insertion(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index 47659464ef45..e1846ba6aaba 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -119,8 +119,6 @@ pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 extern void pci_unmap_single(struct pci_dev *, dma_addr_t, size_t, int);
 extern void pci_unmap_page(struct pci_dev *, dma_addr_t, size_t, int);
 
-#include <linux/pci-dma.h>
-
 /* Map a set of buffers described by scatterlist in streaming mode for
    PCI DMA.  This is the scatter-gather version of the above
    pci_map_single interface.  Here the scatter gather list elements
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index aea3450dd27a..47980118d0a5 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -30,8 +30,6 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
  */
 #define PCI_DMA_BUS_IS_PHYS     (1)
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/cris/include/asm/pci.h b/arch/cris/include/asm/pci.h
index 43cfa0ca0583..9f1cd56da28c 100644
--- a/arch/cris/include/asm/pci.h
+++ b/arch/cris/include/asm/pci.h
@@ -44,8 +44,6 @@ struct pci_dev;
  */
 #define PCI_DMA_BUS_IS_PHYS	(1)
 
-#include <linux/pci-dma.h>
-
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			       enum pci_mmap_state mmap_state, int write_combine);
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 05db569e52e8..0d5997909850 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -43,8 +43,6 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
 /* Return the index of the PCI controller for device PDEV. */
 #define pci_controller_num(PDEV)	(0)
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 4adf22762277..73b5f785e70c 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -56,8 +56,6 @@ pcibios_penalize_isa_irq (int irq, int active)
 
 #include <asm-generic/pci-dma-compat.h>
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 9b196f8f7060..3beea1479b43 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -102,8 +102,6 @@ struct pci_dev;
  */
 extern unsigned int PCI_DMA_BUS_IS_PHYS;
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 632088d0aa67..2242a5c636c2 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -183,8 +183,6 @@ struct pci_bios_ops {
 	void (*fixup_bus)(struct pci_bus *bus);
 };
 
-#include <linux/pci-dma.h>
-
 /*
 ** Stuff declared in arch/parisc/kernel/pci.c
 */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 4a9991ba249a..a20a9ad2258b 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -141,8 +141,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
 
 #define HAVE_PCI_LEGACY	1
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PPC64
 
 /* The PCI address space does not equal the physical memory address
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 991c2a424b24..8bd952fcf3ba 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -83,8 +83,6 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
  */
 #define PCI_DMA_BUS_IS_PHYS	(dma_ops->is_phys)
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 /*
  * None of the SH PCI controllers support MWI, it is always treated as a
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
index 5ce773eded12..d9c031f9910f 100644
--- a/arch/sparc/include/asm/pci.h
+++ b/arch/sparc/include/asm/pci.h
@@ -6,8 +6,6 @@
 #include <asm/pci_32.h>
 #endif
 
-#include <linux/pci-dma.h>
-
 #include <asm-generic/pci-dma-compat.h>
 
 #endif
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index e2655dc9b9cd..404a880ea325 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -108,8 +108,6 @@ void dma32_reserve_bootmem(void);
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
 #include <asm-generic/pci-dma-compat.h>
 
-#include <linux/pci-dma.h>
-
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 #define PCIBIOS_MAX_MEM_32 0xffffffff
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index f3f0cf3439ad..4609b0f15f1f 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -56,8 +56,6 @@ struct pci_dev;
 
 #define PCI_DMA_BUS_IS_PHYS	(1)
 
-#include <linux/pci-dma.h>
-
 /* Map a range of PCI memory or I/O space for a device into user space */
 int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
                         enum pci_mmap_state mmap_state, int write_combine);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd5809a5963e..7fd5c574efae 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -904,6 +904,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, bool change_bridge);
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
+#include <linux/pci-dma.h>
 #include <linux/dmapool.h>
 
 #define	pci_pool dma_pool
-- 
cgit v1.2.3


From 0acedc124aca35f5cce9d4ee288dc372bf517e09 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:31 -0800
Subject: dma-mapping.h: add the dma_unmap state API

Adds the following macros:

DECLARE_DMA_UNMAP_ADDR(ADDR_NAME)
DECLARE_DMA_UNMAP_LEN(LEN_NAME)
dma_unmap_addr(PTR, ADDR_NAME)
dma_unmap_addr_set(PTR, ADDR_NAME, VAL)
dma_unmap_len(PTR, LEN_NAME)
dma_unmap_len_set(PTR, LEN_NAME, VAL)

The API corresponds to the pci_unmap state API.  We'll move to this new
generic API from the PCI specific API in the long term.  As
include/asm-generic/pci-dma-compat.h does, the pci_unmap API simply calls
the new generic API for some time.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: James Bottomley <James.Bottomley@suse.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-API.txt   | 58 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dma-mapping.h | 16 +++++++++++++
 include/linux/pci-dma.h     | 21 +++++-----------
 3 files changed, 80 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 364a6cb444a5..29a48fbae779 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -472,6 +472,64 @@ void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
 	....
 
 
+Part Ie - Optimizing Unmap State Space Consumption
+--------------------------------
+
+On some platforms, dma_unmap_{single,page}() is simply a nop.
+Therefore, keeping track of the mapping address and length is a waste
+of space. Instead of filling your drivers up with ifdefs and the like
+to "work around" this (which would defeat the whole purpose of a
+portable API) the following facilities are provided.
+
+Actually, instead of describing the macros one by one, we'll
+transform some example code.
+
+1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
+   Example, before:
+
+	struct ring_state {
+		struct sk_buff *skb;
+		dma_addr_t mapping;
+		__u32 len;
+	};
+
+   after:
+
+	struct ring_state {
+		struct sk_buff *skb;
+		DEFINE_DMA_UNMAP_ADDR(mapping);
+		DEFINE_DMA_UNMAP_LEN(len);
+	};
+
+2) Use dma_unmap_{addr,len}_set to set these values.
+   Example, before:
+
+	ringp->mapping = FOO;
+	ringp->len = BAR;
+
+   after:
+
+	dma_unmap_addr_set(ringp, mapping, FOO);
+	dma_unmap_len_set(ringp, len, BAR);
+
+3) Use dma_unmap_{addr,len} to access these values.
+   Example, before:
+
+	dma_unmap_single(dev, ringp->mapping, ringp->len,
+			 DMA_FROM_DEVICE);
+
+   after:
+
+	dma_unmap_single(dev,
+			 dma_unmap_addr(ringp, mapping),
+			 dma_unmap_len(ringp, len),
+			 DMA_FROM_DEVICE);
+
+It really should be self-explanatory.  We treat the ADDR and LEN
+separately, because it is possible for an implementation to only
+need the address in order to perform the unmap operation.
+
+
 Part II - Advanced dma_ usage
 -----------------------------
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 91b761846061..c5ac9d49cc06 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -232,4 +232,20 @@ struct dma_attrs;
 
 #endif /* CONFIG_HAVE_DMA_ATTRS */
 
+#ifdef CONFIG_NEED_DMA_MAP_STATE
+#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
+#define DEFINE_DMA_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME
+#define dma_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
+#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL)  (((PTR)->ADDR_NAME) = (VAL))
+#define dma_unmap_len(PTR, LEN_NAME)             ((PTR)->LEN_NAME)
+#define dma_unmap_len_set(PTR, LEN_NAME, VAL)    (((PTR)->LEN_NAME) = (VAL))
+#else
+#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)
+#define DEFINE_DMA_UNMAP_LEN(LEN_NAME)
+#define dma_unmap_addr(PTR, ADDR_NAME)           (0)
+#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL)  do { } while (0)
+#define dma_unmap_len(PTR, LEN_NAME)             (0)
+#define dma_unmap_len_set(PTR, LEN_NAME, VAL)    do { } while (0)
+#endif
+
 #endif
diff --git a/include/linux/pci-dma.h b/include/linux/pci-dma.h
index cfd63ab09abc..549a041f9c08 100644
--- a/include/linux/pci-dma.h
+++ b/include/linux/pci-dma.h
@@ -1,20 +1,11 @@
 #ifndef _LINUX_PCI_DMA_H
 #define _LINUX_PCI_DMA_H
 
-#ifdef CONFIG_NEED_DMA_MAP_STATE
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME;
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME;
-#define pci_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)  (((PTR)->ADDR_NAME) = (VAL))
-#define pci_unmap_len(PTR, LEN_NAME)             ((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL)    (((PTR)->LEN_NAME) = (VAL))
-#else
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME)           (0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)  do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME)             (0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL)    do { } while (0)
-#endif
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) DEFINE_DMA_UNMAP_ADDR(ADDR_NAME);
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)   DEFINE_DMA_UNMAP_LEN(LEN_NAME);
+#define pci_unmap_addr             dma_unmap_addr
+#define pci_unmap_addr_set         dma_unmap_addr_set
+#define pci_unmap_len              dma_unmap_len
+#define pci_unmap_len_set          dma_unmap_len_set
 
 #endif
-- 
cgit v1.2.3


From 6a1961f49ee8d7339ea2454443dfc0460e0b2748 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:39 -0800
Subject: dma-mapping: dma-mapping.h: add dma_set_coherent_mask

dma_set_coherent_mask corresponds to pci_set_consistent_dma_mask.  This is
necessary to move to the generic device model DMA API from the PCI bus
specific API in the long term.

dma_set_coherent_mask works in the exact same way that
pci_set_consistent_dma_mask does.  So this patch also changes
pci_set_consistent_dma_mask to call dma_set_coherent_mask.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: James Bottomley <James.Bottomley@suse.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Greg KH <greg@kroah.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-API.txt   | 10 ++++++++++
 drivers/pci/pci.c           |  7 +++----
 include/linux/dma-mapping.h |  8 ++++++++
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 29a48fbae779..0fc5728ed487 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -167,6 +167,16 @@ parameters if it is.
 
 Returns: 0 if successful and a negative error if not.
 
+int
+dma_set_coherent_mask(struct device *dev, u64 mask)
+int
+pci_set_consistent_dma_mask(struct pci_device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
 u64
 dma_get_required_mask(struct device *dev)
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b2d23d1b0d41..929fd3932032 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2315,12 +2315,11 @@ pci_set_dma_mask(struct pci_dev *dev, u64 mask)
 int
 pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
 {
-	if (!pci_dma_supported(dev, mask))
-		return -EIO;
+	int ret = dma_set_coherent_mask(&dev->dev, mask);
+	if (ret)
+		return ret;
 
-	dev->dev.coherent_dma_mask = mask;
 	dev_dbg(&dev->dev, "using %dbit consistent DMA mask\n", fls64(mask));
-
 	return 0;
 }
 #endif
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index c5ac9d49cc06..ca32ed78b057 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -127,6 +127,14 @@ static inline u64 dma_get_mask(struct device *dev)
 	return DMA_BIT_MASK(32);
 }
 
+static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (!dma_supported(dev, mask))
+		return -EIO;
+	dev->coherent_dma_mask = mask;
+	return 0;
+}
+
 extern u64 dma_get_required_mask(struct device *dev);
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
-- 
cgit v1.2.3


From 5f3cd1e0bb452c31a306a3e764514ea2eaf7d2e0 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:41 -0800
Subject: dma-mapping: pci: move pci_set_dma_mask and
 pci_set_consistent_dma_mask to pci-dma-compat.h

We can use pci-dma-compat.h to implement pci_set_dma_mask and
pci_set_consistent_dma_mask as we do with the other PCI DMA API.

We can remove HAVE_ARCH_PCI_SET_DMA_MASK too.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Greg KH <greg@kroah.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/pci.c                    | 28 ----------------------------
 include/asm-generic/pci-dma-compat.h | 15 ++++++++++++---
 include/linux/pci.h                  |  2 --
 3 files changed, 12 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 929fd3932032..cb1dd5f4988c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2298,32 +2298,6 @@ void pci_msi_off(struct pci_dev *dev)
 	}
 }
 
-#ifndef HAVE_ARCH_PCI_SET_DMA_MASK
-/*
- * These can be overridden by arch-specific implementations
- */
-int
-pci_set_dma_mask(struct pci_dev *dev, u64 mask)
-{
-	int ret = dma_set_mask(&dev->dev, mask);
-	if (ret)
-		return ret;
-	dev_dbg(&dev->dev, "using %dbit DMA mask\n", fls64(mask));
-	return 0;
-}
-
-int
-pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
-{
-	int ret = dma_set_coherent_mask(&dev->dev, mask);
-	if (ret)
-		return ret;
-
-	dev_dbg(&dev->dev, "using %dbit consistent DMA mask\n", fls64(mask));
-	return 0;
-}
-#endif
-
 #ifndef HAVE_ARCH_PCI_SET_DMA_MAX_SEGMENT_SIZE
 int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size)
 {
@@ -3065,8 +3039,6 @@ EXPORT_SYMBOL(pci_set_mwi);
 EXPORT_SYMBOL(pci_try_set_mwi);
 EXPORT_SYMBOL(pci_clear_mwi);
 EXPORT_SYMBOL_GPL(pci_intx);
-EXPORT_SYMBOL(pci_set_dma_mask);
-EXPORT_SYMBOL(pci_set_consistent_dma_mask);
 EXPORT_SYMBOL(pci_assign_resource);
 EXPORT_SYMBOL(pci_find_parent_resource);
 EXPORT_SYMBOL(pci_select_bars);
diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h
index 37b3706226e7..1437b7da09b2 100644
--- a/include/asm-generic/pci-dma-compat.h
+++ b/include/asm-generic/pci-dma-compat.h
@@ -6,9 +6,6 @@
 
 #include <linux/dma-mapping.h>
 
-/* note pci_set_dma_mask isn't here, since it's a public function
- * exported from drivers/pci, use dma_supported instead */
-
 static inline int
 pci_dma_supported(struct pci_dev *hwdev, u64 mask)
 {
@@ -104,4 +101,16 @@ pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 	return dma_mapping_error(&pdev->dev, dma_addr);
 }
 
+#ifdef CONFIG_PCI
+static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+	return dma_set_mask(&dev->dev, mask);
+}
+
+static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
+{
+	return dma_set_coherent_mask(&dev->dev, mask);
+}
+#endif
+
 #endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7fd5c574efae..a788fa12ff31 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -769,8 +769,6 @@ int pci_try_set_mwi(struct pci_dev *dev);
 void pci_clear_mwi(struct pci_dev *dev);
 void pci_intx(struct pci_dev *dev, int enable);
 void pci_msi_off(struct pci_dev *dev);
-int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
-int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask);
 int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size);
 int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask);
 int pcix_get_max_mmrbc(struct pci_dev *dev);
-- 
cgit v1.2.3


From b3e63afe8a74c0134d05a551cc74facc3b3ec0d7 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Wed, 10 Mar 2010 15:23:45 -0800
Subject: ldisc: new dcd_change() method for line disciplines

Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Alexander Gordeev <lasaine@lvk.cs.msu.su>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/serial/tty.txt | 4 ++++
 include/linux/tty_ldisc.h    | 8 ++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/serial/tty.txt b/Documentation/serial/tty.txt
index 5e5349a4fcd2..7c900507279f 100644
--- a/Documentation/serial/tty.txt
+++ b/Documentation/serial/tty.txt
@@ -105,6 +105,10 @@ write_wakeup()	-	May be called at any point between open and close.
 			is permitted to call the driver write method from
 			this function. In such a situation defer it.
 
+dcd_change()	-	Report to the tty line the current DCD pin status
+			changes and the relative timestamp. The timestamp
+			can be NULL.
+
 
 Driver Access
 
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 0c4ee9b88f85..526d66f066a3 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -99,6 +99,12 @@
  *	cease I/O to the tty driver. Can sleep. The driver should
  *	seek to perform this action quickly but should wait until
  *	any pending driver I/O is completed.
+ *
+ * void (*dcd_change)(struct tty_struct *tty, unsigned int status,
+ * 			struct timespec *ts)
+ *
+ *	Tells the discipline that the DCD pin has changed its status and
+ *	the relative timestamp. Pointer ts can be NULL.
  */
 
 #include <linux/fs.h>
@@ -136,6 +142,8 @@ struct tty_ldisc_ops {
 	void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
 			       char *fp, int count);
 	void	(*write_wakeup)(struct tty_struct *);
+	void	(*dcd_change)(struct tty_struct *, unsigned int,
+				struct timespec *);
 
 	struct  module *owner;
 	
-- 
cgit v1.2.3


From 572b9adbd40b5565dc413db04af9cc234f72bf19 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Wed, 10 Mar 2010 15:23:46 -0800
Subject: ldisc n_tty: add new method n_tty_inherit_ops()

This new method can be used to init a new struct tty_ldisc_ops as the
default tty_ldisc_N_TTY struct.

Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Alexander Gordeev <lasaine@lvk.cs.msu.su>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/n_tty.c | 17 +++++++++++++++++
 include/linux/tty.h  |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 2e50f4dfc79c..bdae8327143c 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -48,6 +48,7 @@
 #include <linux/audit.h>
 #include <linux/file.h>
 #include <linux/uaccess.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 
@@ -2091,3 +2092,19 @@ struct tty_ldisc_ops tty_ldisc_N_TTY = {
 	.receive_buf     = n_tty_receive_buf,
 	.write_wakeup    = n_tty_write_wakeup
 };
+
+/**
+ *	n_tty_inherit_ops	-	inherit N_TTY methods
+ *	@ops: struct tty_ldisc_ops where to save N_TTY methods
+ *
+ *	Used by a generic struct tty_ldisc_ops to easily inherit N_TTY
+ *	methods.
+ */
+
+void n_tty_inherit_ops(struct tty_ldisc_ops *ops)
+{
+	*ops = tty_ldisc_N_TTY;
+	ops->owner = NULL;
+	ops->refcount = ops->flags = 0;
+}
+EXPORT_SYMBOL_GPL(n_tty_inherit_ops);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d96e5882f129..568369a86306 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -514,6 +514,7 @@ extern void tty_ldisc_enable(struct tty_struct *tty);
 
 /* n_tty.c */
 extern struct tty_ldisc_ops tty_ldisc_N_TTY;
+extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
-- 
cgit v1.2.3


From a0880df0ccde8d551fc4d88c455acb2ee0801e26 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Wed, 10 Mar 2010 15:23:47 -0800
Subject: pps: serial clients support

Adds support, by using the PPS line discipline, for the PPS sources
connected with the CD (Carrier Detect) pin of a serial port.

[akpm@linux-foundation.org: fix cast size warnings]
Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Alexander Gordeev <lasaine@lvk.cs.msu.su>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pps/clients/Kconfig     |   7 ++
 drivers/pps/clients/Makefile    |   1 +
 drivers/pps/clients/pps-ldisc.c | 154 ++++++++++++++++++++++++++++++++++++++++
 include/linux/serial_core.h     |  11 ++-
 4 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 drivers/pps/clients/pps-ldisc.c

(limited to 'include/linux')

diff --git a/drivers/pps/clients/Kconfig b/drivers/pps/clients/Kconfig
index 43ccd3b644f0..4e801bd7254f 100644
--- a/drivers/pps/clients/Kconfig
+++ b/drivers/pps/clients/Kconfig
@@ -15,4 +15,11 @@ config PPS_CLIENT_KTIMER
 	  This driver can also be built as a module.  If so, the module
 	  will be called pps-ktimer.
 
+config PPS_CLIENT_LDISC
+	tristate "PPS line discipline"
+	depends on PPS
+	help
+	  If you say yes here you get support for a PPS source connected
+	  with the CD (Carrier Detect) pin of your serial port.
+
 endif
diff --git a/drivers/pps/clients/Makefile b/drivers/pps/clients/Makefile
index 115572ae3e99..812c9b19b430 100644
--- a/drivers/pps/clients/Makefile
+++ b/drivers/pps/clients/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_PPS_CLIENT_KTIMER)	+= pps-ktimer.o
+obj-$(CONFIG_PPS_CLIENT_LDISC)	+= pps-ldisc.o
 
 ifeq ($(CONFIG_PPS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/pps/clients/pps-ldisc.c b/drivers/pps/clients/pps-ldisc.c
new file mode 100644
index 000000000000..8e1932d29fd4
--- /dev/null
+++ b/drivers/pps/clients/pps-ldisc.c
@@ -0,0 +1,154 @@
+/*
+ * pps-ldisc.c -- PPS line discipline
+ *
+ *
+ * Copyright (C) 2008	Rodolfo Giometti <giometti@linux.it>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/serial_core.h>
+#include <linux/tty.h>
+#include <linux/pps_kernel.h>
+
+#define PPS_TTY_MAGIC		0x0001
+
+static void pps_tty_dcd_change(struct tty_struct *tty, unsigned int status,
+				struct timespec *ts)
+{
+	int id = (long)tty->disc_data;
+	struct timespec __ts;
+	struct pps_ktime pps_ts;
+
+	/* First of all we get the time stamp... */
+	getnstimeofday(&__ts);
+
+	/* Does caller give us a timestamp? */
+	if (ts) {	/* Yes. Let's use it! */
+		pps_ts.sec = ts->tv_sec;
+		pps_ts.nsec = ts->tv_nsec;
+	} else {	/* No. Do it ourself! */
+		pps_ts.sec = __ts.tv_sec;
+		pps_ts.nsec = __ts.tv_nsec;
+	}
+
+	/* Now do the PPS event report */
+	pps_event(id, &pps_ts, status ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR,
+			NULL);
+
+	pr_debug("PPS %s at %lu on source #%d\n",
+			status ? "assert" : "clear", jiffies, id);
+}
+
+static int (*alias_n_tty_open)(struct tty_struct *tty);
+
+static int pps_tty_open(struct tty_struct *tty)
+{
+	struct pps_source_info info;
+	struct tty_driver *drv = tty->driver;
+	int index = tty->index + drv->name_base;
+	int ret;
+
+	info.owner = THIS_MODULE;
+	info.dev = NULL;
+	snprintf(info.name, PPS_MAX_NAME_LEN, "%s%d", drv->driver_name, index);
+	snprintf(info.path, PPS_MAX_NAME_LEN, "/dev/%s%d", drv->name, index);
+	info.mode = PPS_CAPTUREBOTH | \
+			PPS_OFFSETASSERT | PPS_OFFSETCLEAR | \
+			PPS_CANWAIT | PPS_TSFMT_TSPEC;
+
+	ret = pps_register_source(&info, PPS_CAPTUREBOTH | \
+				PPS_OFFSETASSERT | PPS_OFFSETCLEAR);
+	if (ret < 0) {
+		pr_err("cannot register PPS source \"%s\"\n", info.path);
+		return ret;
+	}
+	tty->disc_data = (void *)(long)ret;
+
+	/* Should open N_TTY ldisc too */
+	ret = alias_n_tty_open(tty);
+	if (ret < 0)
+		pps_unregister_source((long)tty->disc_data);
+
+	pr_info("PPS source #%d \"%s\" added\n", ret, info.path);
+
+	return 0;
+}
+
+static void (*alias_n_tty_close)(struct tty_struct *tty);
+
+static void pps_tty_close(struct tty_struct *tty)
+{
+	int id = (long)tty->disc_data;
+
+	pps_unregister_source(id);
+	alias_n_tty_close(tty);
+
+	pr_info("PPS source #%d removed\n", id);
+}
+
+static struct tty_ldisc_ops pps_ldisc_ops;
+
+/*
+ * Module stuff
+ */
+
+static int __init pps_tty_init(void)
+{
+	int err;
+
+	/* Inherit the N_TTY's ops */
+	n_tty_inherit_ops(&pps_ldisc_ops);
+
+	/* Save N_TTY's open()/close() methods */
+	alias_n_tty_open = pps_ldisc_ops.open;
+	alias_n_tty_close = pps_ldisc_ops.close;
+
+	/* Init PPS_TTY data */
+	pps_ldisc_ops.owner = THIS_MODULE;
+	pps_ldisc_ops.magic = PPS_TTY_MAGIC;
+	pps_ldisc_ops.name = "pps_tty";
+	pps_ldisc_ops.dcd_change = pps_tty_dcd_change;
+	pps_ldisc_ops.open = pps_tty_open;
+	pps_ldisc_ops.close = pps_tty_close;
+
+	err = tty_register_ldisc(N_PPS, &pps_ldisc_ops);
+	if (err)
+		pr_err("can't register PPS line discipline\n");
+	else
+		pr_info("PPS line discipline registered\n");
+
+	return err;
+}
+
+static void __exit pps_tty_cleanup(void)
+{
+	int err;
+
+	err = tty_unregister_ldisc(N_PPS);
+	if (err)
+		pr_err("can't unregister PPS line discipline\n");
+	else
+		pr_info("PPS line discipline removed\n");
+}
+
+module_init(pps_tty_init);
+module_exit(pps_tty_cleanup);
+
+MODULE_ALIAS_LDISC(N_PPS);
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("PPS TTY device driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 8c3dd36fe91a..78dd1e7120a9 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -491,9 +491,13 @@ uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 {
 	struct uart_state *state = uport->state;
 	struct tty_port *port = &state->port;
+	struct tty_ldisc *ld = tty_ldisc_ref(port->tty);
+	struct timespec ts;
 
-	uport->icount.dcd++;
+	if (ld && ld->ops->dcd_change)
+		getnstimeofday(&ts);
 
+	uport->icount.dcd++;
 #ifdef CONFIG_HARD_PPS
 	if ((uport->flags & UPF_HARDPPS_CD) && status)
 		hardpps();
@@ -505,6 +509,11 @@ uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 		else if (port->tty)
 			tty_hangup(port->tty);
 	}
+
+	if (ld && ld->ops->dcd_change)
+		ld->ops->dcd_change(port->tty, status, &ts);
+	if (ld)
+		tty_ldisc_deref(ld);
 }
 
 /**
-- 
cgit v1.2.3


From 5ceaa2f39bfa73c4398cd01e78f1c3ebde3d3383 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@arm.linux.org.uk>
Date: Wed, 10 Mar 2010 15:23:53 -0800
Subject: decompress: fix new decompressor for PIC

The ARM kernel decompressor wants to be able to relocate r/w data
independently from the rest of the image, and we do this by ensuring that
r/w data has global visibility.  Define STATIC_RW_DATA to be empty to
achieve this.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Alain Knaff <alain@knaff.lu>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/decompress/mm.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 5032b9a31ae7..ad5ec1d0475e 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -14,11 +14,21 @@
 
 /* Code active when included from pre-boot environment: */
 
+/*
+ * Some architectures want to ensure there is no local data in their
+ * pre-boot environment, so that data can arbitarily relocated (via
+ * GOT references).  This is achieved by defining STATIC_RW_DATA to
+ * be null.
+ */
+#ifndef STATIC_RW_DATA
+#define STATIC_RW_DATA static
+#endif
+
 /* A trivial malloc implementation, adapted from
  *  malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
  */
-static unsigned long malloc_ptr;
-static int malloc_count;
+STATIC_RW_DATA unsigned long malloc_ptr;
+STATIC_RW_DATA int malloc_count;
 
 static void *malloc(int size)
 {
-- 
cgit v1.2.3


From eb5572fed55f4c2b7dbc42582bc82dcb47632380 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:23:59 -0800
Subject: sysctl extern cleanup: C_A_D

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move C_A_D extern variable declaration to linux/reboot.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/reboot.h | 1 +
 kernel/sysctl.c        | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 988e55fe649b..3005d5a7fce5 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -64,6 +64,7 @@ extern void kernel_restart(char *cmd);
 extern void kernel_halt(void);
 extern void kernel_power_off(void);
 
+extern int C_A_D; /* for sysctl */
 void ctrl_alt_del(void);
 
 #define POWEROFF_CMD_PATH_LEN	256
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0ef19c614f6d..72c3b1e80d7b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -65,7 +65,6 @@
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
-extern int C_A_D;
 extern int print_fatal_signals;
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
-- 
cgit v1.2.3


From d33ed52d57e794eba55cea3f5eab3c8f80b6cb5a Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:23:59 -0800
Subject: sysctl extern cleanup: signal

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move print_fatal_signals extern declaration to linux/signal.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 ++
 kernel/sysctl.c        | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index ab9272cc270c..fcd2b14b1932 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -7,6 +7,8 @@
 #ifdef __KERNEL__
 #include <linux/list.h>
 
+/* for sysctl */
+extern int print_fatal_signals;
 /*
  * Real Time signals may be queued.
  */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 72c3b1e80d7b..a8fd10a9a501 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/signal.h>
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/ctype.h>
@@ -65,7 +66,6 @@
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
-extern int print_fatal_signals;
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern int sysctl_panic_on_oom;
-- 
cgit v1.2.3


From e5ab67726f33b50f40db0ccf271ceb3c658554d5 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:05 -0800
Subject: sysctl extern cleanup: rcu

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move rcutorture_runnable extern declaration to linux/rcupdate.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Acked-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rcupdate.h | 4 ++++
 kernel/sysctl.c          | 3 ---
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c84373626336..a005cac5e302 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,6 +41,10 @@
 #include <linux/lockdep.h>
 #include <linux/completion.h>
 
+#ifdef CONFIG_RCU_TORTURE_TEST
+extern int rcutorture_runnable; /* for sysctl */
+#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+
 /**
  * struct rcu_head - callback structure for use with RCU
  * @next: next update requests in a list
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a8fd10a9a501..f18aaa7b0d65 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -87,9 +87,6 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max;
 #ifndef CONFIG_MMU
 extern int sysctl_nr_trim_pages;
 #endif
-#ifdef CONFIG_RCU_TORTURE_TEST
-extern int rcutorture_runnable;
-#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 #ifdef CONFIG_BLOCK
 extern int blk_iopoll_enabled;
 #endif
-- 
cgit v1.2.3


From 5ed109103d73b0bafc92e860cead56725231384d Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:06 -0800
Subject: sysctl extern cleanup: module

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move modprobe_path extern declaration to linux/kmod.h
Move modules_disabled extern declaration to linux/module.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h   | 1 +
 include/linux/module.h | 1 +
 kernel/sysctl.c        | 4 ----
 3 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 384ca8bbf1ac..facb27fe7de0 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -27,6 +27,7 @@
 #define KMOD_PATH_LEN 256
 
 #ifdef CONFIG_MODULES
+extern char modprobe_path[]; /* for sysctl */
 /* modprobe exit status on success, -ve on error.  Return value
  * usually useless though. */
 extern int __request_module(bool wait, const char *name, ...) \
diff --git a/include/linux/module.h b/include/linux/module.h
index dd618eb026aa..5e869ffd34aa 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -175,6 +175,7 @@ struct notifier_block;
 
 #ifdef CONFIG_MODULES
 
+extern int modules_disabled; /* for sysctl */
 /* Get/put a kernel symbol (calls must be symmetric) */
 void *__symbol_get(const char *symbol);
 void *__symbol_get_gpl(const char *symbol);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f18aaa7b0d65..44e9492368fd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -116,10 +116,6 @@ static int min_percpu_pagelist_fract = 8;
 
 static int ngroups_max = NGROUPS_MAX;
 
-#ifdef CONFIG_MODULES
-extern char modprobe_path[];
-extern int modules_disabled;
-#endif
 #ifdef CONFIG_CHR_DEV_SG
 extern int sg_big_buff;
 #endif
-- 
cgit v1.2.3


From c55b7c3e82d0ad58f35a0785faaaf2f70b9b6cd3 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:08 -0800
Subject: sysctl extern cleanup: acct

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move acct_parm extern declaration to linux/acct.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acct.h | 1 +
 kernel/sysctl.c      | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 93f46096ad4c..3e4737fa6cce 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -121,6 +121,7 @@ struct vfsmount;
 struct super_block;
 struct pacct_struct;
 struct pid_namespace;
+extern int acct_parm[]; /* for sysctl */
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
 extern void acct_collect(long exitcode, int group_dead);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5290c437f151..7635bb15f5af 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,9 @@
 #include <asm/stacktrace.h>
 #include <asm/io.h>
 #endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+#include <linux/acct.h>
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
@@ -140,10 +143,6 @@ extern int sysctl_userprocess_debug;
 extern int spin_retry;
 #endif
 
-#ifdef CONFIG_BSD_PROCESS_ACCT
-extern int acct_parm[];
-#endif
-
 #ifdef CONFIG_IA64
 extern int no_unaligned_warning;
 extern int unaligned_dump_stack;
-- 
cgit v1.2.3


From 4f0e056fdebc15d3f4724ebc7bbf323158add1d7 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:09 -0800
Subject: sysctl extern cleanup: rtmutex

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move max_lock_depth extern declaration to linux/rtmutex.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rtmutex.h | 2 ++
 kernel/sysctl.c         | 7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 281d8fd775e8..8d522ffeda33 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -16,6 +16,8 @@
 #include <linux/plist.h>
 #include <linux/spinlock_types.h>
 
+extern int max_lock_depth; /* for sysctl */
+
 /**
  * The rt_mutex structure
  *
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7635bb15f5af..622029ba5103 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,9 @@
 #ifdef CONFIG_BSD_PROCESS_ACCT
 #include <linux/acct.h>
 #endif
+#ifdef CONFIG_RT_MUTEXES
+#include <linux/rtmutex.h>
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
@@ -150,10 +153,6 @@ extern int unaligned_dump_stack;
 
 extern struct ratelimit_state printk_ratelimit_state;
 
-#ifdef CONFIG_RT_MUTEXES
-extern int max_lock_depth;
-#endif
-
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-- 
cgit v1.2.3


From 2edf5e49800846a2b2b6461d99cdae18067c440f Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:10 -0800
Subject: sysctl extern cleanup: lockdep

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move lockdep extern declarations to linux/lockdep.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 4 ++++
 kernel/sysctl.c         | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 10206a87da19..a03977a96d7e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -12,6 +12,10 @@
 struct task_struct;
 struct lockdep_map;
 
+/* for sysctl */
+extern int prove_locking;
+extern int lock_stat;
+
 #ifdef CONFIG_LOCKDEP
 
 #include <linux/linkage.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 622029ba5103..8686b0f5fc12 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -67,6 +67,9 @@
 #ifdef CONFIG_RT_MUTEXES
 #include <linux/rtmutex.h>
 #endif
+#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
+#include <linux/lockdep.h>
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
@@ -191,9 +194,6 @@ extern struct ctl_table epoll_table[];
 int sysctl_legacy_va_layout;
 #endif
 
-extern int prove_locking;
-extern int lock_stat;
-
 /* The default sysctl tables: */
 
 static struct ctl_table root_table[] = {
-- 
cgit v1.2.3


From 9ff99339447de403a46be5e3f23d0c794d540b06 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:10 -0800
Subject: sysctl extern cleanup: poll

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move epoll_table extern declaration to linux/poll.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/poll.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/poll.h b/include/linux/poll.h
index 6673743946f7..600cc1fde64d 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -10,8 +10,10 @@
 #include <linux/wait.h>
 #include <linux/string.h>
 #include <linux/fs.h>
+#include <linux/sysctl.h>
 #include <asm/uaccess.h>
 
+extern struct ctl_table epoll_table[]; /* for sysctl */
 /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
    additional memory. */
 #define MAX_STACK_ALLOC 832
-- 
cgit v1.2.3


From b97c4bc16734a2e597dac7f91ee9eb78f4aeef9a Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Thu, 11 Mar 2010 14:08:45 -0800
Subject: locking: Make sparse work with inline spinlocks and rwlocks

Currently sparse does not work with inline spinlock and rwlock functions.
The problem is that they do not use the __acquires/__releases out-of-line
functions, but use inline functions with no sparse annotations.

This patch adds the appropriate annotations to make it work properly.

Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rwlock.h   | 20 ++++++++++----------
 include/linux/spinlock.h | 13 ++++++++-----
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 71e0b00b6f2c..bc2994ed66e1 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -29,25 +29,25 @@ do {								\
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK
- extern void do_raw_read_lock(rwlock_t *lock);
+ extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock);
 #define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock)
  extern int do_raw_read_trylock(rwlock_t *lock);
- extern void do_raw_read_unlock(rwlock_t *lock);
- extern void do_raw_write_lock(rwlock_t *lock);
+ extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock);
+ extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock);
 #define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock)
  extern int do_raw_write_trylock(rwlock_t *lock);
- extern void do_raw_write_unlock(rwlock_t *lock);
+ extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
 #else
-# define do_raw_read_lock(rwlock)	arch_read_lock(&(rwlock)->raw_lock)
+# define do_raw_read_lock(rwlock)	do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
 # define do_raw_read_lock_flags(lock, flags) \
-		arch_read_lock_flags(&(lock)->raw_lock, *(flags))
+		do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
 # define do_raw_read_trylock(rwlock)	arch_read_trylock(&(rwlock)->raw_lock)
-# define do_raw_read_unlock(rwlock)	arch_read_unlock(&(rwlock)->raw_lock)
-# define do_raw_write_lock(rwlock)	arch_write_lock(&(rwlock)->raw_lock)
+# define do_raw_read_unlock(rwlock)	do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
+# define do_raw_write_lock(rwlock)	do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0)
 # define do_raw_write_lock_flags(lock, flags) \
-		arch_write_lock_flags(&(lock)->raw_lock, *(flags))
+		do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
 # define do_raw_write_trylock(rwlock)	arch_write_trylock(&(rwlock)->raw_lock)
-# define do_raw_write_unlock(rwlock)	arch_write_unlock(&(rwlock)->raw_lock)
+# define do_raw_write_unlock(rwlock)	do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
 #endif
 
 #define read_can_lock(rwlock)		arch_read_can_lock(&(rwlock)->raw_lock)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 86088213334a..89fac6a3f78b 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -128,19 +128,21 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 #define raw_spin_unlock_wait(lock)	arch_spin_unlock_wait(&(lock)->raw_lock)
 
 #ifdef CONFIG_DEBUG_SPINLOCK
- extern void do_raw_spin_lock(raw_spinlock_t *lock);
+ extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
 #define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
  extern int do_raw_spin_trylock(raw_spinlock_t *lock);
- extern void do_raw_spin_unlock(raw_spinlock_t *lock);
+ extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
 #else
-static inline void do_raw_spin_lock(raw_spinlock_t *lock)
+static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
 {
+	__acquire(lock);
 	arch_spin_lock(&lock->raw_lock);
 }
 
 static inline void
-do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags)
+do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
 {
+	__acquire(lock);
 	arch_spin_lock_flags(&lock->raw_lock, *flags);
 }
 
@@ -149,9 +151,10 @@ static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
 	return arch_spin_trylock(&(lock)->raw_lock);
 }
 
-static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
+static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 {
 	arch_spin_unlock(&lock->raw_lock);
+	__release(lock);
 }
 #endif
 
-- 
cgit v1.2.3


From 97ee9b0257402f4731b55dfea42f24d26d793ddf Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 6 Mar 2010 04:44:14 +0000
Subject: net/9p: Use the tag name in the config space for identifying mount
 point

This patch use the tag name in the config space to identify the
mount device. The the virtio device name depend on the enumeration
order of the device and may not remain the same across multiple boots
So we use the tag name which is set via qemu option to uniquely identify
the mount device

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio_9p.h | 12 ++++++++++++
 net/9p/trans_virtio.c     | 44 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 332275080083..5cf11765146b 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -5,4 +5,16 @@
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
+/* The feature bitmap for virtio 9P */
+
+/* The mount point is specified in a config variable */
+#define VIRTIO_9P_MOUNT_TAG 0
+
+struct virtio_9p_config {
+	/* length of the tag name */
+	__u16 tag_len;
+	/* non-NULL terminated tag name */
+	__u8 tag[0];
+} __attribute__((packed));
+
 #endif /* _LINUX_VIRTIO_9P_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 0aaed4819379..026775ad391a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -78,6 +78,12 @@ struct virtio_chan {
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 
+	int tag_len;
+	/*
+	 * tag name to identify a mount Non-null terminated
+	 */
+	char *tag;
+
 	struct list_head chan_list;
 };
 
@@ -224,6 +230,8 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 static int p9_virtio_probe(struct virtio_device *vdev)
 {
+	__u16 tag_len;
+	char *tag;
 	int err;
 	struct virtio_chan *chan;
 
@@ -248,6 +256,23 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	sg_init_table(chan->sg, VIRTQUEUE_NUM);
 
 	chan->inuse = false;
+	if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
+		vdev->config->get(vdev,
+				offsetof(struct virtio_9p_config, tag_len),
+				&tag_len, sizeof(tag_len));
+	} else {
+		err = -EINVAL;
+		goto out_free_vq;
+	}
+	tag = kmalloc(tag_len, GFP_KERNEL);
+	if (!tag) {
+		err = -ENOMEM;
+		goto out_free_vq;
+	}
+	vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag),
+			tag, tag_len);
+	chan->tag = tag;
+	chan->tag_len = tag_len;
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
 	mutex_unlock(&virtio_9p_lock);
@@ -284,7 +309,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 
 	mutex_lock(&virtio_9p_lock);
 	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
-		if (!strcmp(devname, dev_name(&chan->vdev->dev))) {
+		if (!strncmp(devname, chan->tag, chan->tag_len)) {
 			if (!chan->inuse) {
 				chan->inuse = true;
 				found = 1;
@@ -323,6 +348,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	mutex_lock(&virtio_9p_lock);
 	list_del(&chan->chan_list);
 	mutex_unlock(&virtio_9p_lock);
+	kfree(chan->tag);
 	kfree(chan);
 
 }
@@ -332,13 +358,19 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_9P_MOUNT_TAG,
+};
+
 /* The standard "struct lguest_driver": */
 static struct virtio_driver p9_virtio_drv = {
-	.driver.name = 	KBUILD_MODNAME,
-	.driver.owner = THIS_MODULE,
-	.id_table =	id_table,
-	.probe = 	p9_virtio_probe,
-	.remove =	p9_virtio_remove,
+	.feature_table  = features,
+	.feature_table_size = ARRAY_SIZE(features),
+	.driver.name    = KBUILD_MODNAME,
+	.driver.owner	= THIS_MODULE,
+	.id_table	= id_table,
+	.probe		= p9_virtio_probe,
+	.remove		= p9_virtio_remove,
 };
 
 static struct p9_trans_module p9_virtio_trans = {
-- 
cgit v1.2.3


From 86c8437383acd85c05ec7c9a004f59fe7ac9821a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 6 Mar 2010 04:44:15 +0000
Subject: net/9p: Add sysfs mount_tag file for virtio 9P device

This adds a new file for virtio 9P device. The file
contain details of the mount device name that should
be used to mount the 9P file system.

Ex: /sys/devices/virtio-pci/virtio1/mount_tag  file now
contian the tag name to be used to mount the 9P file system.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio.h |  1 +
 net/9p/trans_virtio.c  | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index f508c651e53d..40d1709bdbf4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -98,6 +98,7 @@ struct virtio_device {
 	void *priv;
 };
 
+#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 026775ad391a..afde1a89fbb3 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -220,6 +220,20 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 	return 0;
 }
 
+static ssize_t p9_mount_tag_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct virtio_chan *chan;
+	struct virtio_device *vdev;
+
+	vdev = dev_to_virtio(dev);
+	chan = vdev->priv;
+
+	return snprintf(buf, chan->tag_len + 1, "%s", chan->tag);
+}
+
+static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
+
 /**
  * p9_virtio_probe - probe for existence of 9P virtio channels
  * @vdev: virtio device to probe
@@ -273,6 +287,11 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 			tag, tag_len);
 	chan->tag = tag;
 	chan->tag_len = tag_len;
+	err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+	if (err) {
+		kfree(tag);
+		goto out_free_vq;
+	}
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
 	mutex_unlock(&virtio_9p_lock);
@@ -348,6 +367,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	mutex_lock(&virtio_9p_lock);
 	list_del(&chan->chan_list);
 	mutex_unlock(&virtio_9p_lock);
+	sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
 	kfree(chan->tag);
 	kfree(chan);
 
-- 
cgit v1.2.3


From 0a9c14751377a1407f5e35791e13651d2fc7801c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 13 Mar 2010 20:56:56 +0100
Subject: i2c-algo-bit: Add pre- and post-xfer hooks

Drivers might have to do random things before and/or after I2C
transfers. Add hooks to the i2c-algo-bit implementation to let them do
so.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Alex Deucher <alexdeucher@gmail.com>
---
 drivers/i2c/algos/i2c-algo-bit.c | 9 +++++++++
 include/linux/i2c-algo-bit.h     | 2 ++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index e25e13980af3..e8d568c3fb09 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -522,6 +522,12 @@ static int bit_xfer(struct i2c_adapter *i2c_adap,
 	int i, ret;
 	unsigned short nak_ok;
 
+	if (adap->pre_xfer) {
+		ret = adap->pre_xfer(i2c_adap);
+		if (ret < 0)
+			return ret;
+	}
+
 	bit_dbg(3, &i2c_adap->dev, "emitting start condition\n");
 	i2c_start(adap);
 	for (i = 0; i < num; i++) {
@@ -570,6 +576,9 @@ static int bit_xfer(struct i2c_adapter *i2c_adap,
 bailout:
 	bit_dbg(3, &i2c_adap->dev, "emitting stop condition\n");
 	i2c_stop(adap);
+
+	if (adap->post_xfer)
+		adap->post_xfer(i2c_adap);
 	return ret;
 }
 
diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h
index 111334f5b922..4f98148c11c3 100644
--- a/include/linux/i2c-algo-bit.h
+++ b/include/linux/i2c-algo-bit.h
@@ -36,6 +36,8 @@ struct i2c_algo_bit_data {
 	void (*setscl) (void *data, int state);
 	int  (*getsda) (void *data);
 	int  (*getscl) (void *data);
+	int  (*pre_xfer)  (struct i2c_adapter *);
+	void (*post_xfer) (struct i2c_adapter *);
 
 	/* local settings */
 	int udelay;		/* half clock cycle time in us,
-- 
cgit v1.2.3


From 3f995f317f7070e81e8e38bb11357d6671ab6969 Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors@pelagicore.com>
Date: Tue, 9 Mar 2010 09:17:36 +0100
Subject: Add the platform data include for the Xilinx XPS IIC Bus Interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This file was missed in the original patch that went into Linus' tree.

Cc: "Ben Dooks (embedded platforms)" <ben-linux@fluff.org>
Cc: linux-i2c@vger.kernel.org
Signed-off-by: Richard Röjfors <richard.rojfors@pelagicore.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/i2c-xiic.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 include/linux/i2c-xiic.h

(limited to 'include/linux')

diff --git a/include/linux/i2c-xiic.h b/include/linux/i2c-xiic.h
new file mode 100644
index 000000000000..4f9f2256a97e
--- /dev/null
+++ b/include/linux/i2c-xiic.h
@@ -0,0 +1,43 @@
+/*
+ * i2c-xiic.h
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Xilinx IIC
+ */
+
+#ifndef _LINUX_I2C_XIIC_H
+#define _LINUX_I2C_XIIC_H
+
+/**
+ * struct xiic_i2c_platform_data - Platform data of the Xilinx I2C driver
+ * @num_devices:	Number of devices that shall be added when the driver
+ *			is probed.
+ * @devices:		The actuall devices to add.
+ *
+ * This purpose of this platform data struct is to be able to provide a number
+ * of devices that should be added to the I2C bus. The reason is that sometimes
+ * the I2C board info is not enough, a new PCI board can for instance be
+ * plugged into a standard PC, and the bus number might be unknown at
+ * early init time.
+ */
+struct xiic_i2c_platform_data {
+	u8				num_devices;
+	struct i2c_board_info const	*devices;
+};
+
+#endif /* _LINUX_I2C_XIIC_H */
-- 
cgit v1.2.3


From 3f17522ce461a31e7ced6311b28fcf5b8a763316 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 12 Feb 2010 14:32:01 +0000
Subject: Video: ARM CLCD: Better fix for swapped IENB and CNTL registers

On PL111, as found on Realview and other platforms, these registers are
always arranged as CNTL then IENB.  On PL110, these registers are IENB
then CNTL, except on Versatile platforms.

Re-arrange the handling of these register swaps so that PL111 always
gets it right without resorting to ifdefs, leaving the only case needing
special handling being PL110 on Versatile.

Fill out amba/clcd.h with the PL110/PL111 register definition
differences in case someone tries to use the PL110 specific definitions
on PL111.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/video/amba-clcd.c | 31 ++++++++++++++++++++++++-------
 include/linux/amba/clcd.h | 33 +++++++++++++++++----------------
 2 files changed, 41 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index a21efcd10b78..afe21e6eb544 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -65,16 +65,16 @@ static void clcdfb_disable(struct clcd_fb *fb)
 	if (fb->board->disable)
 		fb->board->disable(fb);
 
-	val = readl(fb->regs + CLCD_CNTL);
+	val = readl(fb->regs + fb->off_cntl);
 	if (val & CNTL_LCDPWR) {
 		val &= ~CNTL_LCDPWR;
-		writel(val, fb->regs + CLCD_CNTL);
+		writel(val, fb->regs + fb->off_cntl);
 
 		clcdfb_sleep(20);
 	}
 	if (val & CNTL_LCDEN) {
 		val &= ~CNTL_LCDEN;
-		writel(val, fb->regs + CLCD_CNTL);
+		writel(val, fb->regs + fb->off_cntl);
 	}
 
 	/*
@@ -94,7 +94,7 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
 	 * Bring up by first enabling..
 	 */
 	cntl |= CNTL_LCDEN;
-	writel(cntl, fb->regs + CLCD_CNTL);
+	writel(cntl, fb->regs + fb->off_cntl);
 
 	clcdfb_sleep(20);
 
@@ -102,7 +102,7 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
 	 * and now apply power.
 	 */
 	cntl |= CNTL_LCDPWR;
-	writel(cntl, fb->regs + CLCD_CNTL);
+	writel(cntl, fb->regs + fb->off_cntl);
 
 	/*
 	 * finally, enable the interface.
@@ -233,7 +233,7 @@ static int clcdfb_set_par(struct fb_info *info)
 		readl(fb->regs + CLCD_TIM0), readl(fb->regs + CLCD_TIM1),
 		readl(fb->regs + CLCD_TIM2), readl(fb->regs + CLCD_TIM3),
 		readl(fb->regs + CLCD_UBAS), readl(fb->regs + CLCD_LBAS),
-		readl(fb->regs + CLCD_IENB), readl(fb->regs + CLCD_CNTL));
+		readl(fb->regs + fb->off_ienb), readl(fb->regs + fb->off_cntl));
 #endif
 
 	return 0;
@@ -345,6 +345,23 @@ static int clcdfb_register(struct clcd_fb *fb)
 {
 	int ret;
 
+	/*
+	 * ARM PL111 always has IENB at 0x1c; it's only PL110
+	 * which is reversed on some platforms.
+	 */
+	if (amba_manf(fb->dev) == 0x41 && amba_part(fb->dev) == 0x111) {
+		fb->off_ienb = CLCD_PL111_IENB;
+		fb->off_cntl = CLCD_PL111_CNTL;
+	} else {
+#ifdef CONFIG_ARCH_VERSATILE
+		fb->off_ienb = CLCD_PL111_IENB;
+		fb->off_cntl = CLCD_PL111_CNTL;
+#else
+		fb->off_ienb = CLCD_PL110_IENB;
+		fb->off_cntl = CLCD_PL110_CNTL;
+#endif
+	}
+
 	fb->clk = clk_get(&fb->dev->dev, NULL);
 	if (IS_ERR(fb->clk)) {
 		ret = PTR_ERR(fb->clk);
@@ -416,7 +433,7 @@ static int clcdfb_register(struct clcd_fb *fb)
 	/*
 	 * Ensure interrupts are disabled.
 	 */
-	writel(0, fb->regs + CLCD_IENB);
+	writel(0, fb->regs + fb->off_ienb);
 
 	fb_set_var(&fb->fb, &fb->fb.var);
 
diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h
index 29c0448265cf..ca16c3801a1e 100644
--- a/include/linux/amba/clcd.h
+++ b/include/linux/amba/clcd.h
@@ -21,22 +21,21 @@
 #define CLCD_UBAS 		0x00000010
 #define CLCD_LBAS 		0x00000014
 
-#if !defined(CONFIG_ARCH_VERSATILE) && !defined(CONFIG_ARCH_REALVIEW)
-#define CLCD_IENB 		0x00000018
-#define CLCD_CNTL 		0x0000001c
-#else
-/*
- * Someone rearranged these two registers on the Versatile
- * platform...
- */
-#define CLCD_IENB 		0x0000001c
-#define CLCD_CNTL 		0x00000018
-#endif
-
-#define CLCD_STAT 		0x00000020
-#define CLCD_INTR 		0x00000024
-#define CLCD_UCUR 		0x00000028
-#define CLCD_LCUR 		0x0000002C
+#define CLCD_PL110_IENB		0x00000018
+#define CLCD_PL110_CNTL		0x0000001c
+#define CLCD_PL110_STAT		0x00000020
+#define CLCD_PL110_INTR 	0x00000024
+#define CLCD_PL110_UCUR		0x00000028
+#define CLCD_PL110_LCUR		0x0000002C
+
+#define CLCD_PL111_CNTL		0x00000018
+#define CLCD_PL111_IENB		0x0000001c
+#define CLCD_PL111_RIS		0x00000020
+#define CLCD_PL111_MIS		0x00000024
+#define CLCD_PL111_ICR		0x00000028
+#define CLCD_PL111_UCUR		0x0000002c
+#define CLCD_PL111_LCUR		0x00000030
+
 #define CLCD_PALL 		0x00000200
 #define CLCD_PALETTE		0x00000200
 
@@ -147,6 +146,8 @@ struct clcd_fb {
 	struct clcd_board	*board;
 	void			*board_data;
 	void __iomem		*regs;
+	u16			off_ienb;
+	u16			off_cntl;
 	u32			clcd_cntl;
 	u32			cmap[16];
 };
-- 
cgit v1.2.3


From cd7e9fcd1f7c9c397f747cf506c66f7dca11d1c6 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:26 -0700
Subject: resource: expand IORESOURCE_TYPE_BITS to make room for bus resource
 type

No functional change; this just makes room for another resource type.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/ioport.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index dda98410d588..b126209a40e2 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -34,20 +34,20 @@ struct resource_list {
  */
 #define IORESOURCE_BITS		0x000000ff	/* Bus-specific bits */
 
-#define IORESOURCE_TYPE_BITS	0x00000f00	/* Resource type */
+#define IORESOURCE_TYPE_BITS	0x00001f00	/* Resource type */
 #define IORESOURCE_IO		0x00000100
 #define IORESOURCE_MEM		0x00000200
 #define IORESOURCE_IRQ		0x00000400
 #define IORESOURCE_DMA		0x00000800
 
-#define IORESOURCE_PREFETCH	0x00001000	/* No side effects */
-#define IORESOURCE_READONLY	0x00002000
-#define IORESOURCE_CACHEABLE	0x00004000
-#define IORESOURCE_RANGELENGTH	0x00008000
-#define IORESOURCE_SHADOWABLE	0x00010000
+#define IORESOURCE_PREFETCH	0x00002000	/* No side effects */
+#define IORESOURCE_READONLY	0x00004000
+#define IORESOURCE_CACHEABLE	0x00008000
+#define IORESOURCE_RANGELENGTH	0x00010000
+#define IORESOURCE_SHADOWABLE	0x00020000
 
-#define IORESOURCE_SIZEALIGN	0x00020000	/* size indicates alignment */
-#define IORESOURCE_STARTALIGN	0x00040000	/* start field is alignment */
+#define IORESOURCE_SIZEALIGN	0x00040000	/* size indicates alignment */
+#define IORESOURCE_STARTALIGN	0x00080000	/* start field is alignment */
 
 #define IORESOURCE_MEM_64	0x00100000
 
-- 
cgit v1.2.3


From 0f4050c7d3ba0275e5f39513c0670a717d43048c Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:42 -0700
Subject: resource: add bus number support

Add support for bus number resources.  This is for bridges with a range of
bus numbers behind them.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/ioport.h | 1 +
 lib/vsprintf.c         | 9 +++++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index b126209a40e2..510e4ac918dd 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -39,6 +39,7 @@ struct resource_list {
 #define IORESOURCE_MEM		0x00000200
 #define IORESOURCE_IRQ		0x00000400
 #define IORESOURCE_DMA		0x00000800
+#define IORESOURCE_BUS		0x00001000
 
 #define IORESOURCE_PREFETCH	0x00002000	/* No side effects */
 #define IORESOURCE_READONLY	0x00004000
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0d461c7c14db..ebbecf90d5d7 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -609,6 +609,12 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 		.precision = -1,
 		.flags = SPECIAL | SMALL | ZEROPAD,
 	};
+	static const struct printf_spec bus_spec = {
+		.base = 16,
+		.field_width = 2,
+		.precision = -1,
+		.flags = SMALL | ZEROPAD,
+	};
 	static const struct printf_spec dec_spec = {
 		.base = 10,
 		.precision = -1,
@@ -651,6 +657,9 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 	} else if (res->flags & IORESOURCE_DMA) {
 		p = string(p, pend, "dma ", str_spec);
 		specp = &dec_spec;
+	} else if (res->flags & IORESOURCE_BUS) {
+		p = string(p, pend, "bus ", str_spec);
+		specp = &bus_spec;
 	} else {
 		p = string(p, pend, "??? ", str_spec);
 		specp = &mem_spec;
-- 
cgit v1.2.3


From 9d7cca04211d4eb104eaaa424b98f650bc29c730 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:47 -0700
Subject: resource: add window support

Add support for resource windows.  This is for bridge resources, i.e.,
regions where a bridge forwards transactions from the primary to the
secondary side.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/ioport.h | 1 +
 lib/vsprintf.c         | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 510e4ac918dd..71ab79da7e7f 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -51,6 +51,7 @@ struct resource_list {
 #define IORESOURCE_STARTALIGN	0x00080000	/* start field is alignment */
 
 #define IORESOURCE_MEM_64	0x00100000
+#define IORESOURCE_WINDOW	0x00200000	/* forwarded by bridge */
 
 #define IORESOURCE_EXCLUSIVE	0x08000000	/* Userland may not map this resource */
 #define IORESOURCE_DISABLED	0x10000000
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index ebbecf90d5d7..24112e5a5780 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -635,7 +635,7 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 	 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
 #define RSRC_BUF_SIZE		((2 * sizeof(resource_size_t)) + 4)
 #define FLAG_BUF_SIZE		(2 * sizeof(res->flags))
-#define DECODED_BUF_SIZE	sizeof("[mem - 64bit pref disabled]")
+#define DECODED_BUF_SIZE	sizeof("[mem - 64bit pref window disabled]")
 #define RAW_BUF_SIZE		sizeof("[mem - flags 0x]")
 	char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
 		     2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
@@ -675,6 +675,8 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 			p = string(p, pend, " 64bit", str_spec);
 		if (res->flags & IORESOURCE_PREFETCH)
 			p = string(p, pend, " pref", str_spec);
+		if (res->flags & IORESOURCE_WINDOW)
+			p = string(p, pend, " window", str_spec);
 		if (res->flags & IORESOURCE_DISABLED)
 			p = string(p, pend, " disabled", str_spec);
 	} else {
-- 
cgit v1.2.3


From e3818b8dce2a934cd1521dbc4827e5238d8f45d8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 15 Mar 2010 17:03:43 -0700
Subject: rcu: Make rcu_read_lock_bh_held() allow for disabled BH

Disabling BH can stand in for rcu_read_lock_bh(), and this patch
updates rcu_read_lock_bh_held() to allow for this.  In order to
avoid include-file hell, this function is moved out of line to
kernel/rcupdate.c.

This fixes a false positive RCU warning.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <20100316000343.GA25857@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 19 ++++---------------
 kernel/rcupdate.c        | 23 +++++++++++++++++++++++
 2 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3024050c82a1..e1bdc4bfd275 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -123,22 +123,11 @@ static inline int rcu_read_lock_held(void)
 	return lock_is_held(&rcu_lock_map);
 }
 
-/**
- * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
- *
- * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
- * an RCU-bh read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
- * this assumes we are in an RCU-bh read-side critical section unless it can
- * prove otherwise.
- *
- * Check rcu_scheduler_active to prevent false positives during boot.
+/*
+ * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
+ * hell.
  */
-static inline int rcu_read_lock_bh_held(void)
-{
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	return lock_is_held(&rcu_bh_lock_map);
-}
+extern int rcu_read_lock_bh_held(void);
 
 /**
  * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section?
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f1125c1a6321..63fe25433980 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,6 +45,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/kernel_stat.h>
+#include <linux/hardirq.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -66,6 +67,28 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
 int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+/**
+ * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ *
+ * Check for bottom half being disabled, which covers both the
+ * CONFIG_PROVE_RCU and not cases.  Note that if someone uses
+ * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
+ * will show the situation.
+ *
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
+ */
+int rcu_read_lock_bh_held(void)
+{
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	return in_softirq();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 /*
  * This function is invoked towards the end of the scheduler's initialization
  * process.  Before this is called, the idle task might contain
-- 
cgit v1.2.3


From 7f5b774275df8c76a959eae7488128b637fcbfc8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 16 Mar 2010 17:00:29 +0800
Subject: rcu: Fix tracepoints & lockdep false positive

tracepoint.h uses rcu_dereference(), which triggers this warning:

[    0.701161] ===================================================
[    0.702211] [ INFO: suspicious rcu_dereference_check() usage. ]
[    0.702716] ---------------------------------------------------
[    0.703203] include/trace/events/workqueue.h:68 invoked rcu_dereference_check() without protection!
[    0.703971] [ 0.703990] other info that might help us debug this:
[    0.703993]
[    0.705590]
[    0.705604] rcu_scheduler_active = 1, debug_locks = 0
[    0.706712] 1 lock held by swapper/1:
[    0.707229]  #0:  (cpu_add_remove_lock){+.+.+.}, at: [<c0142f54>] cpu_maps_update_begin+0x14/0x20
[    0.710097]
[    0.710106] stack backtrace:
[    0.712602] Pid: 1, comm: swapper Not tainted 2.6.34-rc1-tip-01613-g72662bb #168
[    0.713231] Call Trace:
[    0.713997]  [<c017174d>] lockdep_rcu_dereference+0x9d/0xb0
[    0.714746]  [<c015a117>] create_workqueue_thread+0x107/0x110
[    0.715353]  [<c015aee0>] ? worker_thread+0x0/0x340
[    0.715845]  [<c015a8e8>] __create_workqueue_key+0x138/0x240
[    0.716427]  [<c0142f92>] ? cpu_maps_update_done+0x12/0x20
[    0.717012]  [<c086b12f>] init_workqueues+0x6f/0x80
[    0.717530]  [<c08576d2>] kernel_init+0x102/0x1f0
[    0.717570]  [<c08575d0>] ? kernel_init+0x0/0x1f0
[    0.718944]  [<c01030fa>] kernel_thread_helper+0x6/0x10

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B9F48AD.4000404@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index f59604ed0ec6..78b4bd3be496 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -49,7 +49,7 @@ struct tracepoint {
 		void **it_func;						\
 									\
 		rcu_read_lock_sched_notrace();				\
-		it_func = rcu_dereference((tp)->funcs);			\
+		it_func = rcu_dereference_sched((tp)->funcs);		\
 		if (it_func) {						\
 			do {						\
 				((void(*)(proto))(*it_func))(args);	\
-- 
cgit v1.2.3


From e7fb9c4ad351a8da7c09e182bd2e7ccd043daf08 Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Fri, 18 Dec 2009 16:42:11 +0100
Subject: backlight: Add Epson L4F00242T03 LCD driver

The Epson LCD L4F00242T03 is mounted on the Freescale i.MX31 PDK board.
Based upon Marek Vasut work in l4f00242t03.c, this driver provides
basic init and power on/off functionality for this device through the
sysfs lcd interface.

Unfortunately Datasheet for this device are not available and
all the control sequences sent to the display were copied from the
freescale driver that in the i.MX31 Linux BSP.

As in the i.MX31PDK board the core and io suppliers are voltage
regulators, that functionality is embedded here, but not strict.

Signed-off-by: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/Kconfig       |   7 +
 drivers/video/backlight/Makefile      |   1 +
 drivers/video/backlight/l4f00242t03.c | 256 ++++++++++++++++++++++++++++++++++
 include/linux/spi/l4f00242t03.h       |  31 ++++
 4 files changed, 295 insertions(+)
 create mode 100644 drivers/video/backlight/l4f00242t03.c
 create mode 100644 include/linux/spi/l4f00242t03.h

(limited to 'include/linux')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 0c77fc610212..c025c84601b0 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -31,6 +31,13 @@ config LCD_CORGI
 	  Say y here to support the LCD panels usually found on SHARP
 	  corgi (C7x0) and spitz (Cxx00) models.
 
+config LCD_L4F00242T03
+	tristate "Epson L4F00242T03 LCD"
+	depends on LCD_CLASS_DEVICE && SPI_MASTER && GENERIC_GPIO
+	help
+	  SPI driver for Epson L4F00242T03. This provides basic support
+	  for init and powering the LCD up/down through a sysfs interface.
+
 config LCD_LMS283GF05
 	tristate "Samsung LMS283GF05 LCD"
 	depends on LCD_CLASS_DEVICE && SPI_MASTER && GENERIC_GPIO
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 6c704d41462d..09d1f14d6257 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -3,6 +3,7 @@
 obj-$(CONFIG_LCD_CLASS_DEVICE)     += lcd.o
 obj-$(CONFIG_LCD_CORGI)		   += corgi_lcd.o
 obj-$(CONFIG_LCD_HP700)		   += jornada720_lcd.o
+obj-$(CONFIG_LCD_L4F00242T03)	   += l4f00242t03.o
 obj-$(CONFIG_LCD_LMS283GF05)	   += lms283gf05.o
 obj-$(CONFIG_LCD_LTV350QV)	   += ltv350qv.o
 obj-$(CONFIG_LCD_ILI9320)	   += ili9320.o
diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c
new file mode 100644
index 000000000000..42d061e1403b
--- /dev/null
+++ b/drivers/video/backlight/l4f00242t03.c
@@ -0,0 +1,256 @@
+/*
+ * l4f00242t03.c -- support for Epson L4F00242T03 LCD
+ *
+ * Copyright 2007-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * Copyright (c) 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
+ * 	Inspired by Marek Vasut work in l4f00242t03.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/lcd.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/l4f00242t03.h>
+
+struct l4f00242t03_priv {
+	struct spi_device	*spi;
+	struct lcd_device	*ld;
+	int lcd_on:1;
+	struct regulator *io_reg;
+	struct regulator *core_reg;
+};
+
+
+static void l4f00242t03_reset(unsigned int gpio)
+{
+	pr_debug("l4f00242t03_reset.\n");
+	gpio_set_value(gpio, 1);
+	mdelay(100);
+	gpio_set_value(gpio, 0);
+	mdelay(10);	/* tRES >= 100us */
+	gpio_set_value(gpio, 1);
+	mdelay(20);
+}
+
+#define param(x) ((x) | 0x100)
+
+static void l4f00242t03_lcd_init(struct spi_device *spi)
+{
+	struct l4f00242t03_pdata *pdata = spi->dev.platform_data;
+	struct l4f00242t03_priv *priv = dev_get_drvdata(&spi->dev);
+	const u16 cmd[] = { 0x36, param(0), 0x3A, param(0x60) };
+
+	dev_dbg(&spi->dev, "initializing LCD\n");
+
+	if (priv->io_reg) {
+		regulator_set_voltage(priv->io_reg, 1800000, 1800000);
+		regulator_enable(priv->io_reg);
+	}
+
+	if (priv->core_reg) {
+		regulator_set_voltage(priv->core_reg, 2800000, 2800000);
+		regulator_enable(priv->core_reg);
+	}
+
+	gpio_set_value(pdata->data_enable_gpio, 1);
+	msleep(60);
+	spi_write(spi, (const u8 *)cmd, ARRAY_SIZE(cmd) * sizeof(u16));
+}
+
+static int l4f00242t03_lcd_power_set(struct lcd_device *ld, int power)
+{
+	struct l4f00242t03_priv *priv = lcd_get_data(ld);
+	struct spi_device *spi = priv->spi;
+
+	const u16 slpout = 0x11;
+	const u16 dison = 0x29;
+
+	const u16 slpin = 0x10;
+	const u16 disoff = 0x28;
+
+	if (power) {
+		if (priv->lcd_on)
+			return 0;
+
+		dev_dbg(&spi->dev, "turning on LCD\n");
+
+		spi_write(spi, (const u8 *)&slpout, sizeof(u16));
+		msleep(60);
+		spi_write(spi, (const u8 *)&dison, sizeof(u16));
+
+		priv->lcd_on = 1;
+	} else {
+		if (!priv->lcd_on)
+			return 0;
+
+		dev_dbg(&spi->dev, "turning off LCD\n");
+
+		spi_write(spi, (const u8 *)&disoff, sizeof(u16));
+		msleep(60);
+		spi_write(spi, (const u8 *)&slpin, sizeof(u16));
+
+		priv->lcd_on = 0;
+	}
+
+	return 0;
+}
+
+static struct lcd_ops l4f_ops = {
+	.set_power	= l4f00242t03_lcd_power_set,
+	.get_power	= NULL,
+};
+
+static int __devinit l4f00242t03_probe(struct spi_device *spi)
+{
+	struct l4f00242t03_priv *priv;
+	struct l4f00242t03_pdata *pdata = spi->dev.platform_data;
+	int ret;
+
+	if (pdata == NULL) {
+		dev_err(&spi->dev, "Uninitialized platform data.\n");
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sizeof(struct l4f00242t03_priv), GFP_KERNEL);
+
+	if (priv == NULL) {
+		dev_err(&spi->dev, "No memory for this device.\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	dev_set_drvdata(&spi->dev, priv);
+	spi->bits_per_word = 9;
+	spi_setup(spi);
+
+	priv->spi = spi;
+
+	ret = gpio_request(pdata->reset_gpio, "lcd l4f00242t03 reset");
+	if (ret) {
+		dev_err(&spi->dev,
+			"Unable to get the lcd l4f00242t03 reset gpio.\n");
+		return ret;
+	}
+
+	ret = gpio_direction_output(pdata->reset_gpio, 1);
+	if (ret)
+		goto err2;
+
+	ret = gpio_request(pdata->data_enable_gpio,
+				"lcd l4f00242t03 data enable");
+	if (ret) {
+		dev_err(&spi->dev,
+			"Unable to get the lcd l4f00242t03 data en gpio.\n");
+		return ret;
+	}
+
+	ret = gpio_direction_output(pdata->data_enable_gpio, 0);
+	if (ret)
+		goto err3;
+
+	if (pdata->io_supply) {
+		priv->io_reg = regulator_get(NULL, pdata->io_supply);
+
+		if (IS_ERR(priv->io_reg)) {
+			pr_err("%s: Unable to get the IO regulator\n",
+								__func__);
+			goto err3;
+		}
+	}
+
+	if (pdata->core_supply) {
+		priv->core_reg = regulator_get(NULL, pdata->core_supply);
+
+		if (IS_ERR(priv->core_reg)) {
+			pr_err("%s: Unable to get the core regulator\n",
+								__func__);
+			goto err4;
+		}
+	}
+
+	priv->ld = lcd_device_register("l4f00242t03",
+					&spi->dev, priv, &l4f_ops);
+	if (IS_ERR(priv->ld)) {
+		ret = PTR_ERR(priv->ld);
+		goto err5;
+	}
+
+	/* Init the LCD */
+	l4f00242t03_reset(pdata->reset_gpio);
+	l4f00242t03_lcd_init(spi);
+	l4f00242t03_lcd_power_set(priv->ld, 1);
+
+	dev_info(&spi->dev, "Epson l4f00242t03 lcd probed.\n");
+
+	return 0;
+
+err5:
+	if (priv->core_reg)
+		regulator_put(priv->core_reg);
+err4:
+	if (priv->io_reg)
+		regulator_put(priv->io_reg);
+err3:
+	gpio_free(pdata->data_enable_gpio);
+err2:
+	gpio_free(pdata->reset_gpio);
+err:
+	kfree(priv);
+
+	return ret;
+}
+
+static int __devexit l4f00242t03_remove(struct spi_device *spi)
+{
+	struct l4f00242t03_priv *priv = dev_get_drvdata(&spi->dev);
+	struct l4f00242t03_pdata *pdata = priv->spi->dev.platform_data;
+
+	l4f00242t03_lcd_power_set(priv->ld, 0);
+	lcd_device_unregister(priv->ld);
+
+	gpio_free(pdata->data_enable_gpio);
+	gpio_free(pdata->reset_gpio);
+
+	if (priv->io_reg)
+		regulator_put(priv->core_reg);
+	if (priv->core_reg)
+		regulator_put(priv->io_reg);
+
+	kfree(priv);
+
+	return 0;
+}
+
+static struct spi_driver l4f00242t03_driver = {
+	.driver = {
+		.name	= "l4f00242t03",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= l4f00242t03_probe,
+	.remove		= __devexit_p(l4f00242t03_remove),
+};
+
+static __init int l4f00242t03_init(void)
+{
+	return spi_register_driver(&l4f00242t03_driver);
+}
+
+static __exit void l4f00242t03_exit(void)
+{
+	spi_unregister_driver(&l4f00242t03_driver);
+}
+
+module_init(l4f00242t03_init);
+module_exit(l4f00242t03_exit);
+
+MODULE_AUTHOR("Alberto Panizzo <maramaopercheseimorto@gmail.com>");
+MODULE_DESCRIPTION("EPSON L4F00242T03 LCD");
diff --git a/include/linux/spi/l4f00242t03.h b/include/linux/spi/l4f00242t03.h
new file mode 100644
index 000000000000..aee1dbda4edc
--- /dev/null
+++ b/include/linux/spi/l4f00242t03.h
@@ -0,0 +1,31 @@
+/*
+ * l4f00242t03.h -- Platform glue for Epson L4F00242T03 LCD
+ *
+ * Copyright (c) 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
+ * Based on Marek Vasut work in lms283gf05.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef _INCLUDE_LINUX_SPI_L4F00242T03_H_
+#define _INCLUDE_LINUX_SPI_L4F00242T03_H_
+
+struct l4f00242t03_pdata {
+	unsigned int	reset_gpio;
+	unsigned int	data_enable_gpio;
+	const char 	*io_supply;	/* will be set to 1.8 V */
+	const char 	*core_supply;	/* will be set to 2.8 V */
+};
+
+#endif /* _INCLUDE_LINUX_SPI_L4F00242T03_H_ */
-- 
cgit v1.2.3


From b4144e4f6e3b448d322095ca08af393682a69e33 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@linux.intel.com>
Date: Mon, 18 Jan 2010 14:16:07 +0000
Subject: backlight: Revert some const qualifiers

9905a43b2d563e6f89e4c63c4278ada03f2ebb14 went a little to far with const
qualifiers as there are legitiment cases where the function pointers
can change (machine specific setup code for example).

Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/backlight.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 8c4f884db6b4..ee377d791996 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -36,18 +36,18 @@ struct backlight_device;
 struct fb_info;
 
 struct backlight_ops {
-	const unsigned int options;
+	unsigned int options;
 
 #define BL_CORE_SUSPENDRESUME	(1 << 0)
 
 	/* Notify the backlight driver some property has changed */
-	int (* const update_status)(struct backlight_device *);
+	int (*update_status)(struct backlight_device *);
 	/* Return the current backlight brightness (accounting for power,
 	   fb_blank etc.) */
-	int (* const get_brightness)(struct backlight_device *);
+	int (*get_brightness)(struct backlight_device *);
 	/* Check if given framebuffer device is the one bound to this backlight;
 	   return 0 if not, !=0 if it is. If NULL, backlight always matches the fb. */
-	int (* const check_fb)(struct fb_info *);
+	int (*check_fb)(struct fb_info *);
 };
 
 /* This structure defines all the properties of a backlight */
-- 
cgit v1.2.3


From 57e148b6a975980944f4466ccb669b1d02dfc6a1 Mon Sep 17 00:00:00 2001
From: Bruno Prémont <bonbons@linux-vserver.org>
Date: Sun, 21 Feb 2010 00:20:01 +0100
Subject: backlight: Add backlight_device parameter to check_fb
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

check_fb from backlight_ops lacks a reference to the backlight_device
that's being referred to. Add this parameter so a backlight_device
can be mapped to a single framebuffer, especially if the same driver
handles multiple devices on a single system.

Signed-off-by: Bruno Prémont <bonbons@linux-vserver.org>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/adx_bl.c    | 2 +-
 drivers/video/backlight/backlight.c | 2 +-
 include/linux/backlight.h           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/adx_bl.c b/drivers/video/backlight/adx_bl.c
index d769b0bab21a..a683dd1be4bc 100644
--- a/drivers/video/backlight/adx_bl.c
+++ b/drivers/video/backlight/adx_bl.c
@@ -56,7 +56,7 @@ static int adx_backlight_get_brightness(struct backlight_device *bldev)
 	return brightness & 0xff;
 }
 
-static int adx_backlight_check_fb(struct fb_info *fb)
+static int adx_backlight_check_fb(struct backlight_device *bldev, struct fb_info *fb)
 {
 	return 1;
 }
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 18829cf68b1b..b800cd4eeec8 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -38,7 +38,7 @@ static int fb_notifier_callback(struct notifier_block *self,
 	mutex_lock(&bd->ops_lock);
 	if (bd->ops)
 		if (!bd->ops->check_fb ||
-		    bd->ops->check_fb(evdata->info)) {
+		    bd->ops->check_fb(bd, evdata->info)) {
 			bd->props.fb_blank = *(int *)evdata->data;
 			if (bd->props.fb_blank == FB_BLANK_UNBLANK)
 				bd->props.state &= ~BL_CORE_FBBLANK;
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index ee377d791996..21cd866d24cd 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -47,7 +47,7 @@ struct backlight_ops {
 	int (*get_brightness)(struct backlight_device *);
 	/* Check if given framebuffer device is the one bound to this backlight;
 	   return 0 if not, !=0 if it is. If NULL, backlight always matches the fb. */
-	int (*check_fb)(struct fb_info *);
+	int (*check_fb)(struct backlight_device *, struct fb_info *);
 };
 
 /* This structure defines all the properties of a backlight */
-- 
cgit v1.2.3


From a19a6ee6cad2b20292a774c2f56ba8039b0fac9c Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 17 Feb 2010 16:39:44 -0500
Subject: backlight: Allow properties to be passed at registration

Values such as max_brightness should be set before backlights are
registered, but the current API doesn't allow that. Add a parameter to
backlight_device_register and update drivers to ensure that they
set this correctly.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/acpi/video.c                            |  9 ++++++---
 drivers/gpu/drm/nouveau/nouveau_backlight.c     | 12 ++++++++----
 drivers/macintosh/via-pmu-backlight.c           |  7 +++++--
 drivers/platform/x86/acer-wmi.c                 |  7 +++++--
 drivers/platform/x86/asus-laptop.c              |  7 +++++--
 drivers/platform/x86/asus_acpi.c                |  7 +++++--
 drivers/platform/x86/classmate-laptop.c         |  8 +++++---
 drivers/platform/x86/compal-laptop.c            | 11 +++++++----
 drivers/platform/x86/dell-laptop.c              | 13 ++++++++-----
 drivers/platform/x86/eeepc-laptop.c             |  8 +++++---
 drivers/platform/x86/fujitsu-laptop.c           | 14 +++++++++-----
 drivers/platform/x86/msi-laptop.c               |  7 +++++--
 drivers/platform/x86/msi-wmi.c                  |  9 ++++++---
 drivers/platform/x86/panasonic-laptop.c         | 24 ++++++++++++------------
 drivers/platform/x86/sony-laptop.c              |  8 +++++---
 drivers/platform/x86/thinkpad_acpi.c            | 12 +++++++-----
 drivers/platform/x86/toshiba_acpi.c             | 10 ++++++----
 drivers/staging/samsung-laptop/samsung-laptop.c |  7 +++++--
 drivers/usb/misc/appledisplay.c                 |  7 ++++---
 drivers/video/atmel_lcdfb.c                     |  8 +++++---
 drivers/video/aty/aty128fb.c                    |  7 +++++--
 drivers/video/aty/atyfb_base.c                  |  7 +++++--
 drivers/video/aty/radeon_backlight.c            |  7 +++++--
 drivers/video/backlight/88pm860x_bl.c           |  6 ++++--
 drivers/video/backlight/adp5520_bl.c            | 11 ++++++-----
 drivers/video/backlight/adx_bl.c                |  8 +++++---
 drivers/video/backlight/atmel-pwm-bl.c          |  8 +++++---
 drivers/video/backlight/backlight.c             |  8 +++++++-
 drivers/video/backlight/corgi_lcd.c             |  8 +++++---
 drivers/video/backlight/cr_bllcd.c              |  8 ++++----
 drivers/video/backlight/da903x_bl.c             |  7 ++++---
 drivers/video/backlight/generic_bl.c            |  8 +++++---
 drivers/video/backlight/hp680_bl.c              |  8 +++++---
 drivers/video/backlight/jornada720_bl.c         |  7 +++++--
 drivers/video/backlight/kb3886_bl.c             |  8 ++++++--
 drivers/video/backlight/locomolcd.c             |  8 ++++++--
 drivers/video/backlight/max8925_bl.c            |  6 ++++--
 drivers/video/backlight/mbp_nvidia_bl.c         | 10 +++++++---
 drivers/video/backlight/omap1_bl.c              |  7 +++++--
 drivers/video/backlight/progear_bl.c            |  7 +++++--
 drivers/video/backlight/pwm_bl.c                |  8 +++++---
 drivers/video/backlight/tosa_bl.c               |  8 +++++---
 drivers/video/backlight/wm831x_bl.c             |  7 ++++---
 drivers/video/bf54x-lq043fb.c                   |  9 +++++----
 drivers/video/bfin-t350mcqb-fb.c                |  9 +++++----
 drivers/video/nvidia/nv_backlight.c             |  7 +++++--
 drivers/video/omap2/displays/panel-taal.c       | 15 +++++++++------
 drivers/video/riva/fbdev.c                      |  7 +++++--
 include/linux/backlight.h                       |  3 ++-
 49 files changed, 271 insertions(+), 151 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 2ff2b6ab5b6c..cbe6f3924a10 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -998,6 +998,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 	}
 
 	if (acpi_video_backlight_support()) {
+		struct backlight_properties props;
 		int result;
 		static int count = 0;
 		char *name;
@@ -1010,12 +1011,14 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 			return;
 
 		sprintf(name, "acpi_video%d", count++);
-		device->backlight = backlight_device_register(name,
-			NULL, device, &acpi_backlight_ops);
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = device->brightness->count - 3;
+		device->backlight = backlight_device_register(name, NULL, device,
+							      &acpi_backlight_ops,
+							      &props);
 		kfree(name);
 		if (IS_ERR(device->backlight))
 			return;
-		device->backlight->props.max_brightness = device->brightness->count-3;
 
 		result = sysfs_create_link(&device->backlight->dev.kobj,
 					   &device->dev->dev.kobj, "device");
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 20564f8cb0ec..406228f4a2a0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -89,19 +89,21 @@ static struct backlight_ops nv50_bl_ops = {
 
 static int nouveau_nv40_backlight_init(struct drm_device *dev)
 {
+	struct backlight_properties props;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct backlight_device *bd;
 
 	if (!(nv_rd32(dev, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK))
 		return 0;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 31;
 	bd = backlight_device_register("nv_backlight", &dev->pdev->dev, dev,
-				       &nv40_bl_ops);
+				       &nv40_bl_ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
 
 	dev_priv->backlight = bd;
-	bd->props.max_brightness = 31;
 	bd->props.brightness = nv40_get_intensity(bd);
 	backlight_update_status(bd);
 
@@ -110,19 +112,21 @@ static int nouveau_nv40_backlight_init(struct drm_device *dev)
 
 static int nouveau_nv50_backlight_init(struct drm_device *dev)
 {
+	struct backlight_properties props;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct backlight_device *bd;
 
 	if (!nv_rd32(dev, NV50_PDISPLAY_SOR_BACKLIGHT))
 		return 0;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 1025;
 	bd = backlight_device_register("nv_backlight", &dev->pdev->dev, dev,
-				       &nv50_bl_ops);
+				       &nv50_bl_ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
 
 	dev_priv->backlight = bd;
-	bd->props.max_brightness = 1025;
 	bd->props.brightness = nv50_get_intensity(bd);
 	backlight_update_status(bd);
 	return 0;
diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c
index 4f3c4479c16a..1cec02f6c431 100644
--- a/drivers/macintosh/via-pmu-backlight.c
+++ b/drivers/macintosh/via-pmu-backlight.c
@@ -144,6 +144,7 @@ void pmu_backlight_set_sleep(int sleep)
 
 void __init pmu_backlight_init()
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 	char name[10];
 	int level, autosave;
@@ -161,13 +162,15 @@ void __init pmu_backlight_init()
 
 	snprintf(name, sizeof(name), "pmubl");
 
-	bd = backlight_device_register(name, NULL, NULL, &pmu_backlight_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, NULL, NULL, &pmu_backlight_data,
+				       &props);
 	if (IS_ERR(bd)) {
 		printk(KERN_ERR "PMU Backlight registration failed\n");
 		return;
 	}
 	uses_pmu_bl = 1;
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	pmu_backlight_init_curve(0x7F, 0x46, 0x0E);
 
 	level = bd->props.max_brightness;
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 226b3e93498c..cbca40aa4006 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -922,9 +922,13 @@ static struct backlight_ops acer_bl_ops = {
 
 static int __devinit acer_backlight_init(struct device *dev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
-	bd = backlight_device_register("acer-wmi", dev, NULL, &acer_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = max_brightness;
+	bd = backlight_device_register("acer-wmi", dev, NULL, &acer_bl_ops,
+				       &props);
 	if (IS_ERR(bd)) {
 		printk(ACER_ERR "Could not register Acer backlight device\n");
 		acer_backlight_device = NULL;
@@ -935,7 +939,6 @@ static int __devinit acer_backlight_init(struct device *dev)
 
 	bd->props.power = FB_BLANK_UNBLANK;
 	bd->props.brightness = read_brightness(bd);
-	bd->props.max_brightness = max_brightness;
 	backlight_update_status(bd);
 	return 0;
 }
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 791fcf321506..db5f7db2ba33 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -639,12 +639,16 @@ static int asus_backlight_init(struct asus_laptop *asus)
 {
 	struct backlight_device *bd;
 	struct device *dev = &asus->platform_device->dev;
+	struct backlight_properties props;
 
 	if (!acpi_check_handle(asus->handle, METHOD_BRIGHTNESS_GET, NULL) &&
 	    !acpi_check_handle(asus->handle, METHOD_BRIGHTNESS_SET, NULL) &&
 	    lcd_switch_handle) {
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = 15;
+
 		bd = backlight_device_register(ASUS_LAPTOP_FILE, dev,
-					       asus, &asusbl_ops);
+					       asus, &asusbl_ops, &props);
 		if (IS_ERR(bd)) {
 			pr_err("Could not register asus backlight device\n");
 			asus->backlight_device = NULL;
@@ -653,7 +657,6 @@ static int asus_backlight_init(struct asus_laptop *asus)
 
 		asus->backlight_device = bd;
 
-		bd->props.max_brightness = 15;
 		bd->props.power = FB_BLANK_UNBLANK;
 		bd->props.brightness = asus_read_brightness(bd);
 		backlight_update_status(bd);
diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index 1381430e1105..ee520357abaa 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
@@ -1481,6 +1481,7 @@ static void asus_acpi_exit(void)
 
 static int __init asus_acpi_init(void)
 {
+	struct backlight_properties props;
 	int result;
 
 	result = acpi_bus_register_driver(&asus_hotk_driver);
@@ -1507,15 +1508,17 @@ static int __init asus_acpi_init(void)
 		return -ENODEV;
 	}
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 15;
 	asus_backlight_device = backlight_device_register("asus", NULL, NULL,
-							  &asus_backlight_data);
+							  &asus_backlight_data,
+							  &props);
 	if (IS_ERR(asus_backlight_device)) {
 		printk(KERN_ERR "Could not register asus backlight device\n");
 		asus_backlight_device = NULL;
 		asus_acpi_exit();
 		return -ENODEV;
 	}
-	asus_backlight_device->props.max_brightness = 15;
 
 	return 0;
 }
diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c
index 035a7dd65a3f..6670ed8f9e5b 100644
--- a/drivers/platform/x86/classmate-laptop.c
+++ b/drivers/platform/x86/classmate-laptop.c
@@ -462,11 +462,13 @@ static struct backlight_ops cmpc_bl_ops = {
 
 static int cmpc_bl_add(struct acpi_device *acpi)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
-	bd = backlight_device_register("cmpc_bl", &acpi->dev,
-				       acpi->handle, &cmpc_bl_ops);
-	bd->props.max_brightness = 7;
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 7;
+	bd = backlight_device_register("cmpc_bl", &acpi->dev, acpi->handle,
+				       &cmpc_bl_ops, &props);
 	dev_set_drvdata(&acpi->dev, bd);
 	return 0;
 }
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 2740b40aad9b..71ff1545a93e 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -291,12 +291,15 @@ static int __init compal_init(void)
 	/* Register backlight stuff */
 
 	if (!acpi_video_backlight_support()) {
-		compalbl_device = backlight_device_register("compal-laptop", NULL, NULL,
-							    &compalbl_ops);
+		struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = COMPAL_LCD_LEVEL_MAX - 1;
+		compalbl_device = backlight_device_register("compal-laptop",
+							    NULL, NULL,
+							    &compalbl_ops,
+							    &props);
 		if (IS_ERR(compalbl_device))
 			return PTR_ERR(compalbl_device);
-
-		compalbl_device->props.max_brightness = COMPAL_LCD_LEVEL_MAX-1;
 	}
 
 	ret = platform_driver_register(&compal_driver);
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index ef614979afe9..46435ac4684f 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -559,10 +559,14 @@ static int __init dell_init(void)
 	release_buffer();
 
 	if (max_intensity) {
-		dell_backlight_device = backlight_device_register(
-			"dell_backlight",
-			&platform_device->dev, NULL,
-			&dell_ops);
+		struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = max_intensity;
+		dell_backlight_device = backlight_device_register("dell_backlight",
+								  &platform_device->dev,
+								  NULL,
+								  &dell_ops,
+								  &props);
 
 		if (IS_ERR(dell_backlight_device)) {
 			ret = PTR_ERR(dell_backlight_device);
@@ -570,7 +574,6 @@ static int __init dell_init(void)
 			goto fail_backlight;
 		}
 
-		dell_backlight_device->props.max_brightness = max_intensity;
 		dell_backlight_device->props.brightness =
 			dell_get_intensity(dell_backlight_device);
 		backlight_update_status(dell_backlight_device);
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 9a844caa3756..3fdf21e0052e 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -1131,18 +1131,20 @@ static int eeepc_backlight_notify(struct eeepc_laptop *eeepc)
 
 static int eeepc_backlight_init(struct eeepc_laptop *eeepc)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 15;
 	bd = backlight_device_register(EEEPC_LAPTOP_FILE,
-				       &eeepc->platform_device->dev,
-				       eeepc, &eeepcbl_ops);
+				       &eeepc->platform_device->dev, eeepc,
+				       &eeepcbl_ops, &props);
 	if (IS_ERR(bd)) {
 		pr_err("Could not register eeepc backlight device\n");
 		eeepc->backlight_device = NULL;
 		return PTR_ERR(bd);
 	}
 	eeepc->backlight_device = bd;
-	bd->props.max_brightness = 15;
 	bd->props.brightness = read_brightness(bd);
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 5f3320d468f6..c1074b32490e 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1126,16 +1126,20 @@ static int __init fujitsu_init(void)
 	/* Register backlight stuff */
 
 	if (!acpi_video_backlight_support()) {
-		fujitsu->bl_device =
-			backlight_device_register("fujitsu-laptop", NULL, NULL,
-						  &fujitsubl_ops);
+		struct backlight_properties props;
+
+		memset(&props, 0, sizeof(struct backlight_properties));
+		max_brightness = fujitsu->max_brightness;
+		props.max_brightness = max_brightness - 1;
+		fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
+							       NULL, NULL,
+							       &fujitsubl_ops,
+							       &props);
 		if (IS_ERR(fujitsu->bl_device)) {
 			ret = PTR_ERR(fujitsu->bl_device);
 			fujitsu->bl_device = NULL;
 			goto fail_sysfs_group;
 		}
-		max_brightness = fujitsu->max_brightness;
-		fujitsu->bl_device->props.max_brightness = max_brightness - 1;
 		fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
 	}
 
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index c2b05da4289a..996223a7c009 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -683,11 +683,14 @@ static int __init msi_init(void)
 		printk(KERN_INFO "MSI: Brightness ignored, must be controlled "
 		       "by ACPI video driver\n");
 	} else {
+		struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = MSI_LCD_LEVEL_MAX - 1;
 		msibl_device = backlight_device_register("msi-laptop-bl", NULL,
-							 NULL, &msibl_ops);
+							 NULL, &msibl_ops,
+							 &props);
 		if (IS_ERR(msibl_device))
 			return PTR_ERR(msibl_device);
-		msibl_device->props.max_brightness = MSI_LCD_LEVEL_MAX-1;
 	}
 
 	ret = platform_driver_register(&msipf_driver);
diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index f5f70d4c6913..fb7ccaae6563 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c
@@ -249,12 +249,15 @@ static int __init msi_wmi_init(void)
 		goto err_uninstall_notifier;
 
 	if (!acpi_video_backlight_support()) {
-		backlight = backlight_device_register(DRV_NAME,
-				NULL, NULL, &msi_backlight_ops);
+		struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = ARRAY_SIZE(backlight_map) - 1;
+		backlight = backlight_device_register(DRV_NAME, NULL, NULL,
+						      &msi_backlight_ops,
+						      &props);
 		if (IS_ERR(backlight))
 			goto err_free_input;
 
-		backlight->props.max_brightness = ARRAY_SIZE(backlight_map) - 1;
 		err = bl_get(NULL);
 		if (err < 0)
 			goto err_free_backlight;
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index c9fc479fc290..ab5c9cea1462 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -600,6 +600,7 @@ static int acpi_pcc_hotkey_resume(struct acpi_device *device)
 
 static int acpi_pcc_hotkey_add(struct acpi_device *device)
 {
+	struct backlight_properties props;
 	struct pcc_acpi *pcc;
 	int num_sifr, result;
 
@@ -637,24 +638,23 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
 	if (result) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				  "Error installing keyinput handler\n"));
-		goto out_sinf;
+		goto out_hotkey;
 	}
 
-	/* initialize backlight */
-	pcc->backlight = backlight_device_register("panasonic", NULL, pcc,
-						   &pcc_backlight_ops);
-	if (IS_ERR(pcc->backlight))
-		goto out_input;
-
 	if (!acpi_pcc_retrieve_biosdata(pcc, pcc->sinf)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				 "Couldn't retrieve BIOS data\n"));
-		goto out_backlight;
+		goto out_input;
 	}
+	/* initialize backlight */
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = pcc->sinf[SINF_AC_MAX_BRIGHT];
+	pcc->backlight = backlight_device_register("panasonic", NULL, pcc,
+						   &pcc_backlight_ops, &props);
+	if (IS_ERR(pcc->backlight))
+		goto out_sinf;
 
 	/* read the initial brightness setting from the hardware */
-	pcc->backlight->props.max_brightness =
-					pcc->sinf[SINF_AC_MAX_BRIGHT];
 	pcc->backlight->props.brightness = pcc->sinf[SINF_AC_CUR_BRIGHT];
 
 	/* read the initial sticky key mode from the hardware */
@@ -669,12 +669,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
 
 out_backlight:
 	backlight_device_unregister(pcc->backlight);
+out_sinf:
+	kfree(pcc->sinf);
 out_input:
 	input_unregister_device(pcc->input_dev);
 	/* no need to input_free_device() since core input API refcount and
 	 * free()s the device */
-out_sinf:
-	kfree(pcc->sinf);
 out_hotkey:
 	kfree(pcc);
 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 5a3d8514c66d..6553b91caaa4 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1291,9 +1291,13 @@ static int sony_nc_add(struct acpi_device *device)
 		       "controlled by ACPI video driver\n");
 	} else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT",
 						&handle))) {
+							struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = SONY_MAX_BRIGHTNESS - 1;
 		sony_backlight_device = backlight_device_register("sony", NULL,
 								  NULL,
-								  &sony_backlight_ops);
+								  &sony_backlight_ops,
+								  &props);
 
 		if (IS_ERR(sony_backlight_device)) {
 			printk(KERN_WARNING DRV_PFX "unable to register backlight device\n");
@@ -1302,8 +1306,6 @@ static int sony_nc_add(struct acpi_device *device)
 			sony_backlight_device->props.brightness =
 			    sony_backlight_get_brightness
 			    (sony_backlight_device);
-			sony_backlight_device->props.max_brightness =
-			    SONY_MAX_BRIGHTNESS - 1;
 		}
 
 	}
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index c64e3528889b..770b85327f84 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -6170,6 +6170,7 @@ static const struct tpacpi_quirk brightness_quirk_table[] __initconst = {
 
 static int __init brightness_init(struct ibm_init_struct *iibm)
 {
+	struct backlight_properties props;
 	int b;
 	unsigned long quirks;
 
@@ -6259,9 +6260,12 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
 		printk(TPACPI_INFO
 		       "detected a 16-level brightness capable ThinkPad\n");
 
-	ibm_backlight_device = backlight_device_register(
-					TPACPI_BACKLIGHT_DEV_NAME, NULL, NULL,
-					&ibm_backlight_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = (tp_features.bright_16levels) ? 15 : 7;
+	ibm_backlight_device = backlight_device_register(TPACPI_BACKLIGHT_DEV_NAME,
+							 NULL, NULL,
+							 &ibm_backlight_data,
+							 &props);
 	if (IS_ERR(ibm_backlight_device)) {
 		int rc = PTR_ERR(ibm_backlight_device);
 		ibm_backlight_device = NULL;
@@ -6280,8 +6284,6 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
 			"or not on your ThinkPad\n", TPACPI_MAIL);
 	}
 
-	ibm_backlight_device->props.max_brightness =
-				(tp_features.bright_16levels)? 15 : 7;
 	ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK;
 	backlight_update_status(ibm_backlight_device);
 
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 789240d1b577..def4841183be 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -924,6 +924,7 @@ static int __init toshiba_acpi_init(void)
 	u32 hci_result;
 	bool bt_present;
 	int ret = 0;
+	struct backlight_properties props;
 
 	if (acpi_disabled)
 		return -ENODEV;
@@ -974,10 +975,12 @@ static int __init toshiba_acpi_init(void)
 		}
 	}
 
+	props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
 	toshiba_backlight_device = backlight_device_register("toshiba",
-						&toshiba_acpi.p_dev->dev,
-						NULL,
-						&toshiba_backlight_data);
+							     &toshiba_acpi.p_dev->dev,
+							     NULL,
+							     &toshiba_backlight_data,
+							     &props);
         if (IS_ERR(toshiba_backlight_device)) {
 		ret = PTR_ERR(toshiba_backlight_device);
 
@@ -986,7 +989,6 @@ static int __init toshiba_acpi_init(void)
 		toshiba_acpi_exit();
 		return ret;
 	}
-        toshiba_backlight_device->props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
 
 	/* Register rfkill switch for Bluetooth */
 	if (hci_get_bt_present(&bt_present) == HCI_SUCCESS && bt_present) {
diff --git a/drivers/staging/samsung-laptop/samsung-laptop.c b/drivers/staging/samsung-laptop/samsung-laptop.c
index dd7ea4c075db..eb44b60e1eb5 100644
--- a/drivers/staging/samsung-laptop/samsung-laptop.c
+++ b/drivers/staging/samsung-laptop/samsung-laptop.c
@@ -394,6 +394,7 @@ MODULE_DEVICE_TABLE(dmi, samsung_dmi_table);
 
 static int __init samsung_init(void)
 {
+	struct backlight_properties props;
 	struct sabi_retval sretval;
 	const char *testStr = "SECLINUX";
 	void __iomem *memcheck;
@@ -486,12 +487,14 @@ static int __init samsung_init(void)
 		goto error_no_platform;
 
 	/* create a backlight device to talk to this one */
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = MAX_BRIGHT;
 	backlight_device = backlight_device_register("samsung", &sdev->dev,
-						     NULL, &backlight_ops);
+						     NULL, &backlight_ops,
+						     &props);
 	if (IS_ERR(backlight_device))
 		goto error_no_backlight;
 
-	backlight_device->props.max_brightness = MAX_BRIGHT;
 	backlight_device->props.brightness = read_brightness();
 	backlight_device->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(backlight_device);
diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index 4d2952f1fb13..3adab041355a 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -202,6 +202,7 @@ static void appledisplay_work(struct work_struct *work)
 static int appledisplay_probe(struct usb_interface *iface,
 	const struct usb_device_id *id)
 {
+	struct backlight_properties props;
 	struct appledisplay *pdata;
 	struct usb_device *udev = interface_to_usbdev(iface);
 	struct usb_host_interface *iface_desc;
@@ -279,16 +280,16 @@ static int appledisplay_probe(struct usb_interface *iface,
 	/* Register backlight device */
 	snprintf(bl_name, sizeof(bl_name), "appledisplay%d",
 		atomic_inc_return(&count_displays) - 1);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 0xff;
 	pdata->bd = backlight_device_register(bl_name, NULL, pdata,
-						&appledisplay_bl_data);
+					      &appledisplay_bl_data, &props);
 	if (IS_ERR(pdata->bd)) {
 		dev_err(&iface->dev, "Backlight registration failed\n");
 		retval = PTR_ERR(pdata->bd);
 		goto error;
 	}
 
-	pdata->bd->props.max_brightness = 0xff;
-
 	/* Try to get brightness */
 	brightness = appledisplay_bl_get_brightness(pdata->bd);
 
diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index 3d886c6902f9..11de3bfd4e54 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -117,6 +117,7 @@ static struct backlight_ops atmel_lcdc_bl_ops = {
 
 static void init_backlight(struct atmel_lcdfb_info *sinfo)
 {
+	struct backlight_properties props;
 	struct backlight_device	*bl;
 
 	sinfo->bl_power = FB_BLANK_UNBLANK;
@@ -124,8 +125,10 @@ static void init_backlight(struct atmel_lcdfb_info *sinfo)
 	if (sinfo->backlight)
 		return;
 
-	bl = backlight_device_register("backlight", &sinfo->pdev->dev,
-			sinfo, &atmel_lcdc_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 0xff;
+	bl = backlight_device_register("backlight", &sinfo->pdev->dev, sinfo,
+				       &atmel_lcdc_bl_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&sinfo->pdev->dev, "error %ld on backlight register\n",
 				PTR_ERR(bl));
@@ -135,7 +138,6 @@ static void init_backlight(struct atmel_lcdfb_info *sinfo)
 
 	bl->props.power = FB_BLANK_UNBLANK;
 	bl->props.fb_blank = FB_BLANK_UNBLANK;
-	bl->props.max_brightness = 0xff;
 	bl->props.brightness = atmel_bl_get_brightness(bl);
 }
 
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 9ee67d6da710..a489be0c4614 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -1802,6 +1802,7 @@ static void aty128_bl_set_power(struct fb_info *info, int power)
 
 static void aty128_bl_init(struct aty128fb_par *par)
 {
+	struct backlight_properties props;
 	struct fb_info *info = pci_get_drvdata(par->pdev);
 	struct backlight_device *bd;
 	char name[12];
@@ -1817,7 +1818,10 @@ static void aty128_bl_init(struct aty128fb_par *par)
 
 	snprintf(name, sizeof(name), "aty128bl%d", info->node);
 
-	bd = backlight_device_register(name, info->dev, par, &aty128_bl_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, info->dev, par, &aty128_bl_data,
+				       &props);
 	if (IS_ERR(bd)) {
 		info->bl_dev = NULL;
 		printk(KERN_WARNING "aty128: Backlight registration failed\n");
@@ -1829,7 +1833,6 @@ static void aty128_bl_init(struct aty128fb_par *par)
 		 63 * FB_BACKLIGHT_MAX / MAX_LEVEL,
 		219 * FB_BACKLIGHT_MAX / MAX_LEVEL);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index e45ab8db2ddc..29d72851f85b 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2232,6 +2232,7 @@ static struct backlight_ops aty_bl_data = {
 
 static void aty_bl_init(struct atyfb_par *par)
 {
+	struct backlight_properties props;
 	struct fb_info *info = pci_get_drvdata(par->pdev);
 	struct backlight_device *bd;
 	char name[12];
@@ -2243,7 +2244,10 @@ static void aty_bl_init(struct atyfb_par *par)
 
 	snprintf(name, sizeof(name), "atybl%d", info->node);
 
-	bd = backlight_device_register(name, info->dev, par, &aty_bl_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, info->dev, par, &aty_bl_data,
+				       &props);
 	if (IS_ERR(bd)) {
 		info->bl_dev = NULL;
 		printk(KERN_WARNING "aty: Backlight registration failed\n");
@@ -2255,7 +2259,6 @@ static void aty_bl_init(struct atyfb_par *par)
 			    0x3F * FB_BACKLIGHT_MAX / MAX_LEVEL,
 			    0xFF * FB_BACKLIGHT_MAX / MAX_LEVEL);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/video/aty/radeon_backlight.c b/drivers/video/aty/radeon_backlight.c
index fa1198c4ccc5..9fc8c66be3ce 100644
--- a/drivers/video/aty/radeon_backlight.c
+++ b/drivers/video/aty/radeon_backlight.c
@@ -134,6 +134,7 @@ static struct backlight_ops radeon_bl_data = {
 
 void radeonfb_bl_init(struct radeonfb_info *rinfo)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 	struct radeon_bl_privdata *pdata;
 	char name[12];
@@ -155,7 +156,10 @@ void radeonfb_bl_init(struct radeonfb_info *rinfo)
 
 	snprintf(name, sizeof(name), "radeonbl%d", rinfo->info->node);
 
-	bd = backlight_device_register(name, rinfo->info->dev, pdata, &radeon_bl_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, rinfo->info->dev, pdata,
+				       &radeon_bl_data, &props);
 	if (IS_ERR(bd)) {
 		rinfo->info->bl_dev = NULL;
 		printk("radeonfb: Backlight registration failed\n");
@@ -185,7 +189,6 @@ void radeonfb_bl_init(struct radeonfb_info *rinfo)
 		 63 * FB_BACKLIGHT_MAX / MAX_RADEON_LEVEL,
 		217 * FB_BACKLIGHT_MAX / MAX_RADEON_LEVEL);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/video/backlight/88pm860x_bl.c b/drivers/video/backlight/88pm860x_bl.c
index b8f705cca438..93e25c77aeb2 100644
--- a/drivers/video/backlight/88pm860x_bl.c
+++ b/drivers/video/backlight/88pm860x_bl.c
@@ -187,6 +187,7 @@ static int pm860x_backlight_probe(struct platform_device *pdev)
 	struct pm860x_backlight_data *data;
 	struct backlight_device *bl;
 	struct resource *res;
+	struct backlight_properties props;
 	unsigned char value;
 	char name[MFD_NAME_SIZE];
 	int ret;
@@ -223,14 +224,15 @@ static int pm860x_backlight_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = MAX_BRIGHTNESS;
 	bl = backlight_device_register(name, &pdev->dev, data,
-					&pm860x_backlight_ops);
+					&pm860x_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
-	bl->props.max_brightness = MAX_BRIGHTNESS;
 	bl->props.brightness = MAX_BRIGHTNESS;
 
 	platform_set_drvdata(pdev, bl);
diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index 86d95c228adb..5183f0e4d314 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -278,6 +278,7 @@ static const struct attribute_group adp5520_bl_attr_group = {
 
 static int __devinit adp5520_bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bl;
 	struct adp5520_bl *data;
 	int ret = 0;
@@ -300,17 +301,17 @@ static int __devinit adp5520_bl_probe(struct platform_device *pdev)
 
 	mutex_init(&data->lock);
 
-	bl = backlight_device_register(pdev->name, data->master,
-			data, &adp5520_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = ADP5020_MAX_BRIGHTNESS;
+	bl = backlight_device_register(pdev->name, data->master, data,
+				       &adp5520_bl_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
 
-	bl->props.max_brightness =
-		bl->props.brightness = ADP5020_MAX_BRIGHTNESS;
-
+	bl->props.brightness = ADP5020_MAX_BRIGHTNESS;
 	if (data->pdata->en_ambl_sens)
 		ret = sysfs_create_group(&bl->dev.kobj,
 			&adp5520_bl_attr_group);
diff --git a/drivers/video/backlight/adx_bl.c b/drivers/video/backlight/adx_bl.c
index a683dd1be4bc..b0624b983889 100644
--- a/drivers/video/backlight/adx_bl.c
+++ b/drivers/video/backlight/adx_bl.c
@@ -70,6 +70,7 @@ static const struct backlight_ops adx_backlight_ops = {
 
 static int __devinit adx_backlight_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bldev;
 	struct resource *res;
 	struct adxbl *bl;
@@ -101,14 +102,15 @@ static int __devinit adx_backlight_probe(struct platform_device *pdev)
 		goto out;
 	}
 
-	bldev = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, bl,
-			&adx_backlight_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 0xff;
+	bldev = backlight_device_register(dev_name(&pdev->dev), &pdev->dev,
+					  bl, &adx_backlight_ops, &props);
 	if (!bldev) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	bldev->props.max_brightness = 0xff;
 	bldev->props.brightness = 0xff;
 	bldev->props.power = FB_BLANK_UNBLANK;
 
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index f625ffc69ad3..2d9760551a4b 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -120,6 +120,7 @@ static const struct backlight_ops atmel_pwm_bl_ops = {
 
 static int atmel_pwm_bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	const struct atmel_pwm_bl_platform_data *pdata;
 	struct backlight_device *bldev;
 	struct atmel_pwm_bl *pwmbl;
@@ -165,8 +166,10 @@ static int atmel_pwm_bl_probe(struct platform_device *pdev)
 			goto err_free_gpio;
 	}
 
-	bldev = backlight_device_register("atmel-pwm-bl",
-			&pdev->dev, pwmbl, &atmel_pwm_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
+	bldev = backlight_device_register("atmel-pwm-bl", &pdev->dev, pwmbl,
+					  &atmel_pwm_bl_ops, &props);
 	if (IS_ERR(bldev)) {
 		retval = PTR_ERR(bldev);
 		goto err_free_gpio;
@@ -178,7 +181,6 @@ static int atmel_pwm_bl_probe(struct platform_device *pdev)
 
 	/* Power up the backlight by default at middle intesity. */
 	bldev->props.power = FB_BLANK_UNBLANK;
-	bldev->props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
 	bldev->props.brightness = bldev->props.max_brightness / 2;
 
 	retval = atmel_pwm_bl_init_pwm(pwmbl);
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index b800cd4eeec8..68bb838b9f11 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -269,7 +269,8 @@ EXPORT_SYMBOL(backlight_force_update);
  * ERR_PTR() or a pointer to the newly allocated device.
  */
 struct backlight_device *backlight_device_register(const char *name,
-		struct device *parent, void *devdata, const struct backlight_ops *ops)
+	struct device *parent, void *devdata, const struct backlight_ops *ops,
+	const struct backlight_properties *props)
 {
 	struct backlight_device *new_bd;
 	int rc;
@@ -289,6 +290,11 @@ struct backlight_device *backlight_device_register(const char *name,
 	dev_set_name(&new_bd->dev, name);
 	dev_set_drvdata(&new_bd->dev, devdata);
 
+	/* Set default properties */
+	if (props)
+		memcpy(&new_bd->props, props,
+		       sizeof(struct backlight_properties));
+
 	rc = device_register(&new_bd->dev);
 	if (rc) {
 		kfree(new_bd);
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index b4bcf8043797..73bdd8454c94 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -533,6 +533,7 @@ err_free_backlight_on:
 
 static int __devinit corgi_lcd_probe(struct spi_device *spi)
 {
+	struct backlight_properties props;
 	struct corgi_lcd_platform_data *pdata = spi->dev.platform_data;
 	struct corgi_lcd *lcd;
 	int ret = 0;
@@ -559,13 +560,14 @@ static int __devinit corgi_lcd_probe(struct spi_device *spi)
 	lcd->power = FB_BLANK_POWERDOWN;
 	lcd->mode = (pdata) ? pdata->init_mode : CORGI_LCD_MODE_VGA;
 
-	lcd->bl_dev = backlight_device_register("corgi_bl", &spi->dev,
-					lcd, &corgi_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = pdata->max_intensity;
+	lcd->bl_dev = backlight_device_register("corgi_bl", &spi->dev, lcd,
+						&corgi_bl_ops, &props);
 	if (IS_ERR(lcd->bl_dev)) {
 		ret = PTR_ERR(lcd->bl_dev);
 		goto err_unregister_lcd;
 	}
-	lcd->bl_dev->props.max_brightness = pdata->max_intensity;
 	lcd->bl_dev->props.brightness = pdata->default_intensity;
 	lcd->bl_dev->props.power = FB_BLANK_UNBLANK;
 
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index da86db4374a0..1cce6031bff2 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -170,6 +170,7 @@ static struct lcd_ops cr_lcd_ops = {
 
 static int cr_backlight_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bdp;
 	struct lcd_device *ldp;
 	struct cr_panel *crp;
@@ -190,8 +191,9 @@ static int cr_backlight_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	bdp = backlight_device_register("cr-backlight",
-					&pdev->dev, NULL, &cr_backlight_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	bdp = backlight_device_register("cr-backlight", &pdev->dev, NULL,
+					&cr_backlight_ops, &props);
 	if (IS_ERR(bdp)) {
 		pci_dev_put(lpc_dev);
 		return PTR_ERR(bdp);
@@ -220,9 +222,7 @@ static int cr_backlight_probe(struct platform_device *pdev)
 	crp->cr_lcd_device = ldp;
 	crp->cr_backlight_device->props.power = FB_BLANK_UNBLANK;
 	crp->cr_backlight_device->props.brightness = 0;
-	crp->cr_backlight_device->props.max_brightness = 0;
 	cr_backlight_set_intensity(crp->cr_backlight_device);
-
 	cr_lcd_set_power(crp->cr_lcd_device, FB_BLANK_UNBLANK);
 
 	platform_set_drvdata(pdev, crp);
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 74cdc640173d..686e4a789238 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -105,6 +105,7 @@ static int da903x_backlight_probe(struct platform_device *pdev)
 	struct da9034_backlight_pdata *pdata = pdev->dev.platform_data;
 	struct da903x_backlight_data *data;
 	struct backlight_device *bl;
+	struct backlight_properties props;
 	int max_brightness;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -134,15 +135,15 @@ static int da903x_backlight_probe(struct platform_device *pdev)
 		da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2,
 				DA9034_WLED_ISET(pdata->output_current));
 
-	bl = backlight_device_register(pdev->name, data->da903x_dev,
-			data, &da903x_backlight_ops);
+	props.max_brightness = max_brightness;
+	bl = backlight_device_register(pdev->name, data->da903x_dev, data,
+				       &da903x_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
 
-	bl->props.max_brightness = max_brightness;
 	bl->props.brightness = max_brightness;
 
 	platform_set_drvdata(pdev, bl);
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index e6d348e63596..312ca619735d 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c
@@ -78,6 +78,7 @@ static const struct backlight_ops genericbl_ops = {
 
 static int genericbl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct generic_bl_info *machinfo = pdev->dev.platform_data;
 	const char *name = "generic-bl";
 	struct backlight_device *bd;
@@ -89,14 +90,15 @@ static int genericbl_probe(struct platform_device *pdev)
 	if (machinfo->name)
 		name = machinfo->name;
 
-	bd = backlight_device_register (name,
-		&pdev->dev, NULL, &genericbl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = machinfo->max_intensity;
+	bd = backlight_device_register(name, &pdev->dev, NULL, &genericbl_ops,
+				       &props);
 	if (IS_ERR (bd))
 		return PTR_ERR (bd);
 
 	platform_set_drvdata(pdev, bd);
 
-	bd->props.max_brightness = machinfo->max_intensity;
 	bd->props.power = FB_BLANK_UNBLANK;
 	bd->props.brightness = machinfo->default_intensity;
 	backlight_update_status(bd);
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index f7cc528d5be7..267d23f8d645 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -105,16 +105,18 @@ static const struct backlight_ops hp680bl_ops = {
 
 static int __devinit hp680bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
-	bd = backlight_device_register ("hp680-bl", &pdev->dev, NULL,
-		    &hp680bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = HP680_MAX_INTENSITY;
+	bd = backlight_device_register("hp680-bl", &pdev->dev, NULL,
+				       &hp680bl_ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
 
 	platform_set_drvdata(pdev, bd);
 
-	bd->props.max_brightness = HP680_MAX_INTENSITY;
 	bd->props.brightness = HP680_DEFAULT_INTENSITY;
 	hp680bl_send_intensity(bd);
 
diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index db9071fc5665..2f177b3a4885 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -101,10 +101,14 @@ static const struct backlight_ops jornada_bl_ops = {
 
 static int jornada_bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	int ret;
 	struct backlight_device *bd;
 
-	bd = backlight_device_register(S1D_DEVICENAME, &pdev->dev, NULL, &jornada_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = BL_MAX_BRIGHT;
+	bd = backlight_device_register(S1D_DEVICENAME, &pdev->dev, NULL,
+				       &jornada_bl_ops, &props);
 
 	if (IS_ERR(bd)) {
 		ret = PTR_ERR(bd);
@@ -117,7 +121,6 @@ static int jornada_bl_probe(struct platform_device *pdev)
 	/* note. make sure max brightness is set otherwise
 	   you will get seemingly non-related errors when
 	   trying to change brightness */
-	bd->props.max_brightness = BL_MAX_BRIGHT;
 	jornada_bl_update_status(bd);
 
 	platform_set_drvdata(pdev, bd);
diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c
index 939e7b830cf3..f439a8632287 100644
--- a/drivers/video/backlight/kb3886_bl.c
+++ b/drivers/video/backlight/kb3886_bl.c
@@ -141,20 +141,24 @@ static const struct backlight_ops kb3886bl_ops = {
 
 static int kb3886bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct kb3886bl_machinfo *machinfo = pdev->dev.platform_data;
 
 	bl_machinfo = machinfo;
 	if (!machinfo->limit_mask)
 		machinfo->limit_mask = -1;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = machinfo->max_intensity;
 	kb3886_backlight_device = backlight_device_register("kb3886-bl",
-		&pdev->dev, NULL, &kb3886bl_ops);
+							    &pdev->dev, NULL,
+							    &kb3886bl_ops,
+							    &props);
 	if (IS_ERR(kb3886_backlight_device))
 		return PTR_ERR(kb3886_backlight_device);
 
 	platform_set_drvdata(pdev, kb3886_backlight_device);
 
-	kb3886_backlight_device->props.max_brightness = machinfo->max_intensity;
 	kb3886_backlight_device->props.power = FB_BLANK_UNBLANK;
 	kb3886_backlight_device->props.brightness = machinfo->default_intensity;
 	backlight_update_status(kb3886_backlight_device);
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 00a9591b0003..7571bc26071e 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -167,6 +167,7 @@ static int locomolcd_resume(struct locomo_dev *dev)
 
 static int locomolcd_probe(struct locomo_dev *ldev)
 {
+	struct backlight_properties props;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -182,13 +183,16 @@ static int locomolcd_probe(struct locomo_dev *ldev)
 
 	local_irq_restore(flags);
 
-	locomolcd_bl_device = backlight_device_register("locomo-bl", &ldev->dev, NULL, &locomobl_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 4;
+	locomolcd_bl_device = backlight_device_register("locomo-bl",
+							&ldev->dev, NULL,
+							&locomobl_data, &props);
 
 	if (IS_ERR (locomolcd_bl_device))
 		return PTR_ERR (locomolcd_bl_device);
 
 	/* Set up frontlight so that screen is readable */
-	locomolcd_bl_device->props.max_brightness = 4,
 	locomolcd_bl_device->props.brightness = 2;
 	locomolcd_set_intensity(locomolcd_bl_device);
 
diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c
index c267069a52a3..c91adaf492cf 100644
--- a/drivers/video/backlight/max8925_bl.c
+++ b/drivers/video/backlight/max8925_bl.c
@@ -104,6 +104,7 @@ static int __devinit max8925_backlight_probe(struct platform_device *pdev)
 	struct max8925_backlight_pdata *pdata = NULL;
 	struct max8925_backlight_data *data;
 	struct backlight_device *bl;
+	struct backlight_properties props;
 	struct resource *res;
 	char name[MAX8925_NAME_SIZE];
 	unsigned char value;
@@ -133,14 +134,15 @@ static int __devinit max8925_backlight_probe(struct platform_device *pdev)
 	data->chip = chip;
 	data->current_brightness = 0;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = MAX_BRIGHTNESS;
 	bl = backlight_device_register(name, &pdev->dev, data,
-					&max8925_backlight_ops);
+					&max8925_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
-	bl->props.max_brightness = MAX_BRIGHTNESS;
 	bl->props.brightness = MAX_BRIGHTNESS;
 
 	platform_set_drvdata(pdev, bl);
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index 2e78b0784bdc..0881358eeace 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -250,6 +250,7 @@ static const struct dmi_system_id __initdata mbp_device_table[] = {
 
 static int __init mbp_init(void)
 {
+	struct backlight_properties props;
 	if (!dmi_check_system(mbp_device_table))
 		return -ENODEV;
 
@@ -257,14 +258,17 @@ static int __init mbp_init(void)
 						"Macbook Pro backlight"))
 		return -ENXIO;
 
-	mbp_backlight_device = backlight_device_register("mbp_backlight",
-					NULL, NULL, &driver_data->backlight_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 15;
+	mbp_backlight_device = backlight_device_register("mbp_backlight", NULL,
+							 NULL,
+							 &driver_data->backlight_ops,
+							 &props);
 	if (IS_ERR(mbp_backlight_device)) {
 		release_region(driver_data->iostart, driver_data->iolen);
 		return PTR_ERR(mbp_backlight_device);
 	}
 
-	mbp_backlight_device->props.max_brightness = 15;
 	mbp_backlight_device->props.brightness =
 		driver_data->backlight_ops.get_brightness(mbp_backlight_device);
 	backlight_update_status(mbp_backlight_device);
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index a3a7f8938175..333d28e6b062 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -132,6 +132,7 @@ static const struct backlight_ops omapbl_ops = {
 
 static int omapbl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *dev;
 	struct omap_backlight *bl;
 	struct omap_backlight_config *pdata = pdev->dev.platform_data;
@@ -143,7 +144,10 @@ static int omapbl_probe(struct platform_device *pdev)
 	if (unlikely(!bl))
 		return -ENOMEM;
 
-	dev = backlight_device_register("omap-bl", &pdev->dev, bl, &omapbl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = OMAPBL_MAX_INTENSITY;
+	dev = backlight_device_register("omap-bl", &pdev->dev, bl, &omapbl_ops,
+					&props);
 	if (IS_ERR(dev)) {
 		kfree(bl);
 		return PTR_ERR(dev);
@@ -160,7 +164,6 @@ static int omapbl_probe(struct platform_device *pdev)
 	omap_cfg_reg(PWL);	/* Conflicts with UART3 */
 
 	dev->props.fb_blank = FB_BLANK_UNBLANK;
-	dev->props.max_brightness = OMAPBL_MAX_INTENSITY;
 	dev->props.brightness = pdata->default_intensity;
 	omapbl_update_status(dev);
 
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 2ec16deb2397..809278c90738 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -61,6 +61,7 @@ static const struct backlight_ops progearbl_ops = {
 
 static int progearbl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	u8 temp;
 	struct backlight_device *progear_backlight_device;
 	int ret;
@@ -82,9 +83,12 @@ static int progearbl_probe(struct platform_device *pdev)
 	pci_read_config_byte(sb_dev, SB_MPS1, &temp);
 	pci_write_config_byte(sb_dev, SB_MPS1, temp | 0x20);
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = HW_LEVEL_MAX - HW_LEVEL_MIN;
 	progear_backlight_device = backlight_device_register("progear-bl",
 							     &pdev->dev, NULL,
-							     &progearbl_ops);
+							     &progearbl_ops,
+							     &props);
 	if (IS_ERR(progear_backlight_device)) {
 		ret = PTR_ERR(progear_backlight_device);
 		goto put_sb;
@@ -94,7 +98,6 @@ static int progearbl_probe(struct platform_device *pdev)
 
 	progear_backlight_device->props.power = FB_BLANK_UNBLANK;
 	progear_backlight_device->props.brightness = HW_LEVEL_MAX - HW_LEVEL_MIN;
-	progear_backlight_device->props.max_brightness = HW_LEVEL_MAX - HW_LEVEL_MIN;
 	progearbl_set_intensity(progear_backlight_device);
 
 	return 0;
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 9d2ec2a1cce8..b89eebc3f77d 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -65,6 +65,7 @@ static const struct backlight_ops pwm_backlight_ops = {
 
 static int pwm_backlight_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct platform_pwm_backlight_data *data = pdev->dev.platform_data;
 	struct backlight_device *bl;
 	struct pwm_bl_data *pb;
@@ -100,15 +101,16 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	} else
 		dev_dbg(&pdev->dev, "got pwm for backlight\n");
 
-	bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev,
-			pb, &pwm_backlight_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = data->max_brightness;
+	bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, pb,
+				       &pwm_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		ret = PTR_ERR(bl);
 		goto err_bl;
 	}
 
-	bl->props.max_brightness = data->max_brightness;
 	bl->props.brightness = data->dft_brightness;
 	backlight_update_status(bl);
 
diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c
index e14ce4d469f5..f57bbf170049 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c
@@ -80,6 +80,7 @@ static const struct backlight_ops bl_ops = {
 static int __devinit tosa_bl_probe(struct i2c_client *client,
 		const struct i2c_device_id *id)
 {
+	struct backlight_properties props;
 	struct tosa_bl_data *data = kzalloc(sizeof(struct tosa_bl_data), GFP_KERNEL);
 	int ret = 0;
 	if (!data)
@@ -99,15 +100,16 @@ static int __devinit tosa_bl_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, data);
 	data->i2c = client;
 
-	data->bl = backlight_device_register("tosa-bl", &client->dev,
-			data, &bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 512 - 1;
+	data->bl = backlight_device_register("tosa-bl", &client->dev, data,
+					     &bl_ops, &props);
 	if (IS_ERR(data->bl)) {
 		ret = PTR_ERR(data->bl);
 		goto err_reg;
 	}
 
 	data->bl->props.brightness = 69;
-	data->bl->props.max_brightness = 512 - 1;
 	data->bl->props.power = FB_BLANK_UNBLANK;
 
 	backlight_update_status(data->bl);
diff --git a/drivers/video/backlight/wm831x_bl.c b/drivers/video/backlight/wm831x_bl.c
index e32add37a203..a4312709fb1b 100644
--- a/drivers/video/backlight/wm831x_bl.c
+++ b/drivers/video/backlight/wm831x_bl.c
@@ -125,6 +125,7 @@ static int wm831x_backlight_probe(struct platform_device *pdev)
 	struct wm831x_backlight_pdata *pdata;
 	struct wm831x_backlight_data *data;
 	struct backlight_device *bl;
+	struct backlight_properties props;
 	int ret, i, max_isel, isink_reg, dcdc_cfg;
 
 	/* We need platform data */
@@ -191,15 +192,15 @@ static int wm831x_backlight_probe(struct platform_device *pdev)
 	data->current_brightness = 0;
 	data->isink_reg = isink_reg;
 
-	bl = backlight_device_register("wm831x", &pdev->dev,
-			data, &wm831x_backlight_ops);
+	props.max_brightness = max_isel;
+	bl = backlight_device_register("wm831x", &pdev->dev, data,
+				       &wm831x_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
 
-	bl->props.max_brightness = max_isel;
 	bl->props.brightness = max_isel;
 
 	platform_set_drvdata(pdev, bl);
diff --git a/drivers/video/bf54x-lq043fb.c b/drivers/video/bf54x-lq043fb.c
index 814312a7452f..54df3d44af8f 100644
--- a/drivers/video/bf54x-lq043fb.c
+++ b/drivers/video/bf54x-lq043fb.c
@@ -501,6 +501,7 @@ static irqreturn_t bfin_bf54x_irq_error(int irq, void *dev_id)
 
 static int __devinit bfin_bf54x_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct bfin_bf54xfb_info *info;
 	struct fb_info *fbinfo;
 	int ret;
@@ -645,10 +646,10 @@ static int __devinit bfin_bf54x_probe(struct platform_device *pdev)
 		goto out8;
 	}
 #ifndef NO_BL_SUPPORT
-	bl_dev =
-	    backlight_device_register("bf54x-bl", NULL, NULL,
-				      &bfin_lq043fb_bl_ops);
-	bl_dev->props.max_brightness = 255;
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 255;
+	bl_dev = backlight_device_register("bf54x-bl", NULL, NULL,
+					   &bfin_lq043fb_bl_ops, &props);
 
 	lcd_dev = lcd_device_register(DRIVER_NAME, &pdev->dev, NULL, &bfin_lcd_ops);
 	lcd_dev->props.max_contrast = 255, printk(KERN_INFO "Done.\n");
diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c
index 5653d083a983..3a8e811a7e9f 100644
--- a/drivers/video/bfin-t350mcqb-fb.c
+++ b/drivers/video/bfin-t350mcqb-fb.c
@@ -419,6 +419,7 @@ static irqreturn_t bfin_t350mcqb_irq_error(int irq, void *dev_id)
 
 static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct bfin_t350mcqbfb_info *info;
 	struct fb_info *fbinfo;
 	int ret;
@@ -540,10 +541,10 @@ static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev)
 		goto out8;
 	}
 #ifndef NO_BL_SUPPORT
-	bl_dev =
-	    backlight_device_register("bf52x-bl", NULL, NULL,
-				      &bfin_lq043fb_bl_ops);
-	bl_dev->props.max_brightness = 255;
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 255;
+	bl_dev = backlight_device_register("bf52x-bl", NULL, NULL,
+					   &bfin_lq043fb_bl_ops, &props);
 
 	lcd_dev = lcd_device_register(DRIVER_NAME, NULL, &bfin_lcd_ops);
 	lcd_dev->props.max_contrast = 255, printk(KERN_INFO "Done.\n");
diff --git a/drivers/video/nvidia/nv_backlight.c b/drivers/video/nvidia/nv_backlight.c
index 443e3c85a9a0..2fb552a6f32c 100644
--- a/drivers/video/nvidia/nv_backlight.c
+++ b/drivers/video/nvidia/nv_backlight.c
@@ -94,6 +94,7 @@ static struct backlight_ops nvidia_bl_ops = {
 
 void nvidia_bl_init(struct nvidia_par *par)
 {
+	struct backlight_properties props;
 	struct fb_info *info = pci_get_drvdata(par->pci_dev);
 	struct backlight_device *bd;
 	char name[12];
@@ -109,7 +110,10 @@ void nvidia_bl_init(struct nvidia_par *par)
 
 	snprintf(name, sizeof(name), "nvidiabl%d", info->node);
 
-	bd = backlight_device_register(name, info->dev, par, &nvidia_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, info->dev, par, &nvidia_bl_ops,
+				       &props);
 	if (IS_ERR(bd)) {
 		info->bl_dev = NULL;
 		printk(KERN_WARNING "nvidia: Backlight registration failed\n");
@@ -121,7 +125,6 @@ void nvidia_bl_init(struct nvidia_par *par)
 		0x158 * FB_BACKLIGHT_MAX / MAX_LEVEL,
 		0x534 * FB_BACKLIGHT_MAX / MAX_LEVEL);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/video/omap2/displays/panel-taal.c b/drivers/video/omap2/displays/panel-taal.c
index fcd6a61a91eb..59769e85d41c 100644
--- a/drivers/video/omap2/displays/panel-taal.c
+++ b/drivers/video/omap2/displays/panel-taal.c
@@ -486,6 +486,7 @@ static struct attribute_group taal_attr_group = {
 
 static int taal_probe(struct omap_dss_device *dssdev)
 {
+	struct backlight_properties props;
 	struct taal_data *td;
 	struct backlight_device *bldev;
 	int r;
@@ -520,11 +521,16 @@ static int taal_probe(struct omap_dss_device *dssdev)
 
 	/* if no platform set_backlight() defined, presume DSI backlight
 	 * control */
+	memset(&props, 0, sizeof(struct backlight_properties));
 	if (!dssdev->set_backlight)
 		td->use_dsi_bl = true;
 
+	if (td->use_dsi_bl)
+		props.max_brightness = 255;
+	else
+		props.max_brightness = 127;
 	bldev = backlight_device_register("taal", &dssdev->dev, dssdev,
-			&taal_bl_ops);
+					  &taal_bl_ops, &props);
 	if (IS_ERR(bldev)) {
 		r = PTR_ERR(bldev);
 		goto err2;
@@ -534,13 +540,10 @@ static int taal_probe(struct omap_dss_device *dssdev)
 
 	bldev->props.fb_blank = FB_BLANK_UNBLANK;
 	bldev->props.power = FB_BLANK_UNBLANK;
-	if (td->use_dsi_bl) {
-		bldev->props.max_brightness = 255;
+	if (td->use_dsi_bl)
 		bldev->props.brightness = 255;
-	} else {
-		bldev->props.max_brightness = 127;
+	else
 		bldev->props.brightness = 127;
-	}
 
 	taal_bl_update_status(bldev);
 
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index d94c57ffbdb1..618f36bec10d 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -338,6 +338,7 @@ static struct backlight_ops riva_bl_ops = {
 
 static void riva_bl_init(struct riva_par *par)
 {
+	struct backlight_properties props;
 	struct fb_info *info = pci_get_drvdata(par->pdev);
 	struct backlight_device *bd;
 	char name[12];
@@ -353,7 +354,10 @@ static void riva_bl_init(struct riva_par *par)
 
 	snprintf(name, sizeof(name), "rivabl%d", info->node);
 
-	bd = backlight_device_register(name, info->dev, par, &riva_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, info->dev, par, &riva_bl_ops,
+				       &props);
 	if (IS_ERR(bd)) {
 		info->bl_dev = NULL;
 		printk(KERN_WARNING "riva: Backlight registration failed\n");
@@ -365,7 +369,6 @@ static void riva_bl_init(struct riva_par *par)
 		MIN_LEVEL * FB_BACKLIGHT_MAX / MAX_LEVEL,
 		FB_BACKLIGHT_MAX);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 21cd866d24cd..4a3d52e545e1 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -103,7 +103,8 @@ static inline void backlight_update_status(struct backlight_device *bd)
 }
 
 extern struct backlight_device *backlight_device_register(const char *name,
-	struct device *dev, void *devdata, const struct backlight_ops *ops);
+	struct device *dev, void *devdata, const struct backlight_ops *ops,
+	const struct backlight_properties *props);
 extern void backlight_device_unregister(struct backlight_device *bd);
 extern void backlight_force_update(struct backlight_device *bd,
 				   enum backlight_update_reason reason);
-- 
cgit v1.2.3


From bc32df00894f0e1dbf583cc3dab210d2969b078a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 15 Mar 2010 00:35:03 -0400
Subject: memory hotplug: allow setting of phys_device

/sys/devices/system/memory/memoryX/phys_device is supposed to contain the
number of the physical device that the corresponding piece of memory
belongs to.

In case a physical device should be replaced or taken offline for whatever
reason it is necessary to set all corresponding memory pieces offline.
The current implementation always sets phys_device to '0' and there is no
way or hook to change that.  Seems like there was a plan to implement that
but it wasn't finished for whatever reason.

So add a weak function which architectures can override to actually set
the phys_device from within add_memory_block().

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c  | 15 ++++++++++-----
 include/linux/memory.h |  2 ++
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 2f8691511190..db0848e54cc6 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -429,12 +429,16 @@ static inline int memory_fail_init(void)
  * differentiation between which *physical* devices each
  * section belongs to...
  */
+int __weak arch_get_memory_phys_device(unsigned long start_pfn)
+{
+	return 0;
+}
 
 static int add_memory_block(int nid, struct mem_section *section,
-			unsigned long state, int phys_device,
-			enum mem_add_context context)
+			unsigned long state, enum mem_add_context context)
 {
 	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	unsigned long start_pfn;
 	int ret = 0;
 
 	if (!mem)
@@ -443,7 +447,8 @@ static int add_memory_block(int nid, struct mem_section *section,
 	mem->phys_index = __section_nr(section);
 	mem->state = state;
 	mutex_init(&mem->state_mutex);
-	mem->phys_device = phys_device;
+	start_pfn = section_nr_to_pfn(mem->phys_index);
+	mem->phys_device = arch_get_memory_phys_device(start_pfn);
 
 	ret = register_memory(mem, section);
 	if (!ret)
@@ -515,7 +520,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
  */
 int register_new_memory(int nid, struct mem_section *section)
 {
-	return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG);
+	return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
 }
 
 int unregister_memory_section(struct mem_section *section)
@@ -548,7 +553,7 @@ int __init memory_dev_init(void)
 		if (!present_section_nr(i))
 			continue;
 		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
-					0, BOOT);
+				       BOOT);
 		if (!ret)
 			ret = err;
 	}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 1adfe779eb99..85582e1bcee9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -36,6 +36,8 @@ struct memory_block {
 	struct sys_device sysdev;
 };
 
+int arch_get_memory_phys_device(unsigned long start_pfn);
+
 /* These states are exposed to userspace as text strings in sysfs */
 #define	MEM_ONLINE		(1<<0) /* exposed to userspace */
 #define	MEM_GOING_OFFLINE	(1<<1) /* exposed to userspace */
-- 
cgit v1.2.3


From 1976152fd8e706135deed6cf333e347c08416056 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 18 Mar 2010 07:30:31 -0600
Subject: of: Fix comparison of "compatible" properties

Commit 7c7b60cb87547b1664a4385c187f029bf514a737
"of: put default string compare and #a/s-cell values into common header"

Breaks various things on powerpc due to using strncasecmp instead of
strcasecmp for comparing against "compatible" strings.

This causes things like the 4xx PCI code to fail miserably due to the
partial matches in code like this:

       for_each_compatible_node(np, NULL, "ibm,plb-pcix")
               ppc4xx_probe_pcix_bridge(np);
       for_each_compatible_node(np, NULL, "ibm,plb-pci")
               ppc4xx_probe_pci_bridge(np);

It's not quite right to do partial name match. Entries in a compatible
list are meant to be matched whole. If a device is compatible with both
"foo" and "foo1", then the device should have both strings in its
"compatible" property.

This patch reverts powerpc and microblaze us to use strcasecmp.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      (for patch description)
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Michal Simek <michal.simek@petalogix.com>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f6d9cbc39c9c..a367e19bb3af 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -127,7 +127,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 
 /* Default string compare functions, Allow arch asm/prom.h to override */
 #if !defined(of_compat_cmp)
-#define of_compat_cmp(s1, s2, l)	strncasecmp((s1), (s2), (l))
+#define of_compat_cmp(s1, s2, l)	strcasecmp((s1), (s2))
 #define of_prop_cmp(s1, s2)		strcmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
 #endif
-- 
cgit v1.2.3


From 0cff810f54b3b52075c27f7a7021d5b195264b6c Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 18 Mar 2010 12:25:33 -0700
Subject: rcu: Fix local_irq_disable() CONFIG_PROVE_RCU=y false positives

It is documented that local_irq_disable() also delimits RCU_SCHED
read-site critical sections.

See the document of synchronize_sched() or
Documentation/RCU/whatisRCU.txt.

So we have to test irqs_disabled() in rcu_read_lock_sched_held().
Otherwise rcu-lockdep brings incorrect complaint.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: eric.dumazet@gmail.com
LKML-Reference: <1268940334-10892-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e1bdc4bfd275..872a98e13d6a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -149,7 +149,7 @@ static inline int rcu_read_lock_sched_held(void)
 		return 1;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || preempt_count() != 0;
+	return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
@@ -180,7 +180,7 @@ static inline int rcu_read_lock_bh_held(void)
 #ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
-	return !rcu_scheduler_active || preempt_count() != 0;
+	return !rcu_scheduler_active || preempt_count() != 0 || irqs_disabled();
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
-- 
cgit v1.2.3


From 0641e4fbf2f824faee00ea74c459a088d94905fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 21:16:45 -0700
Subject: net: Potential null skb->dev dereference

When doing "ifenslave -d bond0 eth0", there is chance to get NULL
dereference in netif_receive_skb(), because dev->master suddenly becomes
NULL after we tested it.

We should use ACCESS_ONCE() to avoid this (or rcu_dereference())

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++----
 net/8021q/vlan_core.c     | 4 ++--
 net/core/dev.c            | 8 +++++---
 3 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c79a88be7c33..fa8b47637997 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2059,12 +2059,12 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
  * ARP on active-backup slaves with arp_validate enabled.
  */
-static inline int skb_bond_should_drop(struct sk_buff *skb)
+static inline int skb_bond_should_drop(struct sk_buff *skb,
+				       struct net_device *master)
 {
-	struct net_device *dev = skb->dev;
-	struct net_device *master = dev->master;
-
 	if (master) {
+		struct net_device *dev = skb->dev;
+
 		if (master->priv_flags & IFF_MASTER_ARPMON)
 			dev->last_rx = jiffies;
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c0316e0ca6e8..c584a0af77d3 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -11,7 +11,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (skb_bond_should_drop(skb))
+	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
 		goto drop;
 
 	skb->skb_iif = skb->dev->ifindex;
@@ -83,7 +83,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 {
 	struct sk_buff *p;
 
-	if (skb_bond_should_drop(skb))
+	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
 		goto drop;
 
 	skb->skb_iif = skb->dev->ifindex;
diff --git a/net/core/dev.c b/net/core/dev.c
index bcc490cc9452..59d4394d2ce8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2483,6 +2483,7 @@ int netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
+	struct net_device *master;
 	struct net_device *null_or_orig;
 	struct net_device *null_or_bond;
 	int ret = NET_RX_DROP;
@@ -2503,11 +2504,12 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
-	if (orig_dev->master) {
-		if (skb_bond_should_drop(skb))
+	master = ACCESS_ONCE(orig_dev->master);
+	if (master) {
+		if (skb_bond_should_drop(skb, master))
 			null_or_orig = orig_dev; /* deliver only exact match */
 		else
-			skb->dev = orig_dev->master;
+			skb->dev = master;
 	}
 
 	__get_cpu_var(netdev_rx_stat).total++;
-- 
cgit v1.2.3


From 87a6aca504d65f242589583e04df5e74b5eae1fe Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 15 Mar 2010 17:14:15 -0700
Subject: Revert "tty: Add a new VT mode which is like VT_PROCESS but doesn't
 require a VT_RELDISP ioctl call"

This reverts commit eec9fe7d1ab4a0dfac4cb43047a7657fffd0002f.

Ari writes as the reason this should be reverted:
	The problems with this patch include:
	1. There's at least one subtlety I overlooked - switching
	between X servers (i.e. from one X VT to another) still requires
	the cooperation of both X servers. I was assuming that KMS
	eliminated this.
	2. It hasn't been tested at all (no X server patch exists which
	uses the new mode).

As he was the original author of the patch, I'll revert it.

Cc: Ari Entlich <atrigent@ccs.neu.edu>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt_ioctl.c | 39 +++++++++++++++++++--------------------
 include/linux/vt.h      |  3 +--
 2 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 87778dcf8727..6aa10284104a 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -888,7 +888,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 			ret = -EFAULT;
 			goto out;
 		}
-		if (tmp.mode != VT_AUTO && tmp.mode != VT_PROCESS && tmp.mode != VT_PROCESS_AUTO) {
+		if (tmp.mode != VT_AUTO && tmp.mode != VT_PROCESS) {
 			ret = -EINVAL;
 			goto out;
 		}
@@ -1622,7 +1622,7 @@ static void complete_change_console(struct vc_data *vc)
 	 * telling it that it has acquired. Also check if it has died and
 	 * clean up (similar to logic employed in change_console())
 	 */
-	if (vc->vt_mode.mode == VT_PROCESS || vc->vt_mode.mode == VT_PROCESS_AUTO) {
+	if (vc->vt_mode.mode == VT_PROCESS) {
 		/*
 		 * Send the signal as privileged - kill_pid() will
 		 * tell us if the process has gone or something else
@@ -1682,7 +1682,7 @@ void change_console(struct vc_data *new_vc)
 	 * vt to auto control.
 	 */
 	vc = vc_cons[fg_console].d;
-	if (vc->vt_mode.mode == VT_PROCESS || vc->vt_mode.mode == VT_PROCESS_AUTO) {
+	if (vc->vt_mode.mode == VT_PROCESS) {
 		/*
 		 * Send the signal as privileged - kill_pid() will
 		 * tell us if the process has gone or something else
@@ -1693,28 +1693,27 @@ void change_console(struct vc_data *new_vc)
 		 */
 		vc->vt_newvt = new_vc->vc_num;
 		if (kill_pid(vc->vt_pid, vc->vt_mode.relsig, 1) == 0) {
-			if(vc->vt_mode.mode == VT_PROCESS)
-				/*
-				 * It worked. Mark the vt to switch to and
-				 * return. The process needs to send us a
-				 * VT_RELDISP ioctl to complete the switch.
-				 */
-				return;
-		} else {
 			/*
-			 * The controlling process has died, so we revert back to
-			 * normal operation. In this case, we'll also change back
-			 * to KD_TEXT mode. I'm not sure if this is strictly correct
-			 * but it saves the agony when the X server dies and the screen
-			 * remains blanked due to KD_GRAPHICS! It would be nice to do
-			 * this outside of VT_PROCESS but there is no single process
-			 * to account for and tracking tty count may be undesirable.
+			 * It worked. Mark the vt to switch to and
+			 * return. The process needs to send us a
+			 * VT_RELDISP ioctl to complete the switch.
 			 */
-			reset_vc(vc);
+			return;
 		}
 
 		/*
-		 * Fall through to normal (VT_AUTO and VT_PROCESS_AUTO) handling of the switch...
+		 * The controlling process has died, so we revert back to
+		 * normal operation. In this case, we'll also change back
+		 * to KD_TEXT mode. I'm not sure if this is strictly correct
+		 * but it saves the agony when the X server dies and the screen
+		 * remains blanked due to KD_GRAPHICS! It would be nice to do
+		 * this outside of VT_PROCESS but there is no single process
+		 * to account for and tracking tty count may be undesirable.
+		 */
+		reset_vc(vc);
+
+		/*
+		 * Fall through to normal (VT_AUTO) handling of the switch...
 		 */
 	}
 
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 778b7b2a47d4..d5dd0bc408fd 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -27,7 +27,7 @@ struct vt_mode {
 #define VT_SETMODE	0x5602	/* set mode of active vt */
 #define		VT_AUTO		0x00	/* auto vt switching */
 #define		VT_PROCESS	0x01	/* process controls switching */
-#define		VT_PROCESS_AUTO 0x02	/* process is notified of switching */
+#define		VT_ACKACQ	0x02	/* acknowledge switch */
 
 struct vt_stat {
 	unsigned short v_active;	/* active vt */
@@ -38,7 +38,6 @@ struct vt_stat {
 #define VT_SENDSIG	0x5604	/* signal to send to bitmask of vts */
 
 #define VT_RELDISP	0x5605	/* release display */
-#define		VT_ACKACQ	0x02	/* acknowledge switch */
 
 #define VT_ACTIVATE	0x5606	/* make vt active */
 #define VT_WAITACTIVE	0x5607	/* wait for vt active */
-- 
cgit v1.2.3


From 352fa6ad16b89f8ffd1a93b4419b1a8f2259feab Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 2 Mar 2010 22:24:19 +0000
Subject: tty: Take a 256 byte padding into account when buffering below
 sub-page units

The TTY layer takes some care to ensure that only sub-page allocations
are made with interrupts disabled. It does this by setting a goal of
"TTY_BUFFER_PAGE" to allocate. Unfortunately, while TTY_BUFFER_PAGE takes the
size of tty_buffer into account, it fails to account that tty_buffer_find()
rounds the buffer size out to the next 256 byte boundary before adding on
the size of the tty_buffer.

This patch adjusts the TTY_BUFFER_PAGE calculation to take into account the
size of the tty_buffer and the padding. Once applied, tty_buffer_alloc()
should not require high-order allocations.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/tty.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 568369a86306..593228a520e1 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -70,12 +70,13 @@ struct tty_buffer {
 
 /*
  * We default to dicing tty buffer allocations to this many characters
- * in order to avoid multiple page allocations. We assume tty_buffer itself
- * is under 256 bytes. See tty_buffer_find for the allocation logic this
- * must match
+ * in order to avoid multiple page allocations. We know the size of
+ * tty_buffer itself but it must also be taken into account that the
+ * the buffer is 256 byte aligned. See tty_buffer_find for the allocation
+ * logic this must match
  */
 
-#define TTY_BUFFER_PAGE		((PAGE_SIZE  - 256) / 2)
+#define TTY_BUFFER_PAGE	(((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~0xFF)
 
 
 struct tty_bufhead {
-- 
cgit v1.2.3


From 336cee42dd52824e360ab419eab4e8888eb054ec Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 8 Mar 2010 21:50:11 -0600
Subject: tty_port,usb-console: Fix usb serial console open/close regression

Commit e1108a63e10d344284011cccc06328b2cd3e5da3 ("usb_serial: Use the
shutdown() operation") breaks the ability to use a usb console
starting in 2.6.33.  This was observed when using
console=ttyUSB0,115200 as a boot argument with an FTDI device.  The
error is:

ftdi_sio ttyUSB0: ftdi_submit_read_urb - failed submitting read urb, error -22

The handling of the ASYNCB_INITIALIZED changed in 2.6.32 such that in
tty_port_shutdown() it always clears the flag if it is set.  The fix
is to add a variable to the tty_port struct to indicate when the tty
port is a console.

CC: Alan Cox <alan@linux.intel.com>
CC: Alan Stern <stern@rowland.harvard.edu>
CC: Oliver Neukum <oliver@neukum.org>
CC: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_port.c      | 2 +-
 drivers/usb/serial/console.c | 1 +
 include/linux/tty.h          | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index be492dd66437..a3bd1d0b66cf 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(tty_port_tty_set);
 static void tty_port_shutdown(struct tty_port *port)
 {
 	mutex_lock(&port->mutex);
-	if (port->ops->shutdown &&
+	if (port->ops->shutdown && !port->console &&
 		test_and_clear_bit(ASYNCB_INITIALIZED, &port->flags))
 			port->ops->shutdown(port);
 	mutex_unlock(&port->mutex);
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index b22ac3258523..f347da2ef00a 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -181,6 +181,7 @@ static int usb_console_setup(struct console *co, char *options)
 	/* The console is special in terms of closing the device so
 	 * indicate this port is now acting as a system console. */
 	port->console = 1;
+	port->port.console = 1;
 
 	mutex_unlock(&serial->disc_mutex);
 	return retval;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 593228a520e1..4409967db0c4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -224,6 +224,7 @@ struct tty_port {
 	wait_queue_head_t	close_wait;	/* Close waiters */
 	wait_queue_head_t	delta_msr_wait;	/* Modem status change */
 	unsigned long		flags;		/* TTY flags ASY_*/
+	unsigned char		console:1;	/* port is a console */
 	struct mutex		mutex;		/* Locking */
 	struct mutex		buf_mutex;	/* Buffer alloc lock */
 	unsigned char		*xmit_buf;	/* Optional buffer */
-- 
cgit v1.2.3


From f09a15e6e69884cedec4d1c022089a973aa01f1e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 16 Mar 2010 12:55:44 -0700
Subject: USB: Fix usb_fill_int_urb for SuperSpeed devices

USB 3 and Wireless USB specify a logarithmic encoding of the endpoint
interval that matches the USB 2 specification.  usb_fill_int_urb() didn't
know that and was filling in the interval as if it was USB 1.1.  Fix
usb_fill_int_urb() for SuperSpeed devices, but leave the wireless case
alone, because David Vrabel wants to keep the old encoding.

Update the struct urb kernel doc to note that SuperSpeed URBs must have
urb->interval specified in microframes.

Add a missing break statement in the usb_submit_urb() interrupt URB
checking, since wireless USB and SuperSpeed USB encode urb->interval
differently.  This allows xHCI roothubs to actually register with khubd.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/urb.c |  1 +
 include/linux/usb.h    | 18 +++++++++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 27080561a1c2..45a32dadb406 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -453,6 +453,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 			if (urb->interval > (1 << 15))
 				return -EINVAL;
 			max = 1 << 15;
+			break;
 		case USB_SPEED_WIRELESS:
 			if (urb->interval > 16)
 				return -EINVAL;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 8c9f053111bb..ce1323c4e47c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1055,7 +1055,8 @@ typedef void (*usb_complete_t)(struct urb *);
  * @number_of_packets: Lists the number of ISO transfer buffers.
  * @interval: Specifies the polling interval for interrupt or isochronous
  *	transfers.  The units are frames (milliseconds) for full and low
- *	speed devices, and microframes (1/8 millisecond) for highspeed ones.
+ *	speed devices, and microframes (1/8 millisecond) for highspeed
+ *	and SuperSpeed devices.
  * @error_count: Returns the number of ISO transfers that reported errors.
  * @context: For use in completion functions.  This normally points to
  *	request-specific driver context.
@@ -1286,9 +1287,16 @@ static inline void usb_fill_bulk_urb(struct urb *urb,
  *
  * Initializes a interrupt urb with the proper information needed to submit
  * it to a device.
- * Note that high speed interrupt endpoints use a logarithmic encoding of
- * the endpoint interval, and express polling intervals in microframes
- * (eight per millisecond) rather than in frames (one per millisecond).
+ *
+ * Note that High Speed and SuperSpeed interrupt endpoints use a logarithmic
+ * encoding of the endpoint interval, and express polling intervals in
+ * microframes (eight per millisecond) rather than in frames (one per
+ * millisecond).
+ *
+ * Wireless USB also uses the logarithmic encoding, but specifies it in units of
+ * 128us instead of 125us.  For Wireless USB devices, the interval is passed
+ * through to the host controller, rather than being translated into microframe
+ * units.
  */
 static inline void usb_fill_int_urb(struct urb *urb,
 				    struct usb_device *dev,
@@ -1305,7 +1313,7 @@ static inline void usb_fill_int_urb(struct urb *urb,
 	urb->transfer_buffer_length = buffer_length;
 	urb->complete = complete_fn;
 	urb->context = context;
-	if (dev->speed == USB_SPEED_HIGH)
+	if (dev->speed == USB_SPEED_HIGH || dev->speed == USB_SPEED_SUPER)
 		urb->interval = 1 << (interval - 1);
 	else
 		urb->interval = interval;
-- 
cgit v1.2.3


From 1a50307ba1826e4da0024e64b245ce4eadf7688a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Mar 2010 14:24:42 +0000
Subject: netlink: fix NETLINK_RECV_NO_ENOBUFS in netlink_set_err()

Currently, ENOBUFS errors are reported to the socket via
netlink_set_err() even if NETLINK_RECV_NO_ENOBUFS is set. However,
that should not happen. This fixes this problem and it changes the
prototype of netlink_set_err() to return the number of sockets that
have set the NETLINK_RECV_NO_ENOBUFS socket option. This return
value is used in the next patch in these bugfix series.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  2 +-
 net/netlink/af_netlink.c | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index fde27c017326..6eaca5e1e8ca 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -188,7 +188,7 @@ extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
 			     __u32 group, gfp_t allocation);
-extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
+extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
 extern int netlink_register_notifier(struct notifier_block *nb);
 extern int netlink_unregister_notifier(struct notifier_block *nb);
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 320d0423a240..acbbae1e89b5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1093,6 +1093,7 @@ static inline int do_one_set_err(struct sock *sk,
 				 struct netlink_set_err_data *p)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	int ret = 0;
 
 	if (sk == p->exclude_sk)
 		goto out;
@@ -1104,10 +1105,15 @@ static inline int do_one_set_err(struct sock *sk,
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+		ret = 1;
+		goto out;
+	}
+
 	sk->sk_err = p->code;
 	sk->sk_error_report(sk);
 out:
-	return 0;
+	return ret;
 }
 
 /**
@@ -1116,12 +1122,16 @@ out:
  * @pid: the PID of a process that we want to skip (if any)
  * @groups: the broadcast group that will notice the error
  * @code: error code, must be negative (as usual in kernelspace)
+ *
+ * This function returns the number of broadcast listeners that have set the
+ * NETLINK_RECV_NO_ENOBUFS socket option.
  */
-void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 {
 	struct netlink_set_err_data info;
 	struct hlist_node *node;
 	struct sock *sk;
+	int ret = 0;
 
 	info.exclude_sk = ssk;
 	info.pid = pid;
@@ -1132,9 +1142,10 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 	read_lock(&nl_table_lock);
 
 	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
-		do_one_set_err(sk, &info);
+		ret += do_one_set_err(sk, &info);
 
 	read_unlock(&nl_table_lock);
+	return ret;
 }
 EXPORT_SYMBOL(netlink_set_err);
 
-- 
cgit v1.2.3


From 37b7ef7203240b3aba577bb1ff6765fe15225976 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 16 Mar 2010 13:30:21 +0000
Subject: netfilter: ctnetlink: fix reliable event delivery if message building
 fails

This patch fixes a bug that allows to lose events when reliable
event delivery mode is used, ie. if NETLINK_BROADCAST_SEND_ERROR
and NETLINK_RECV_NO_ENOBUFS socket options are set.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h  | 2 +-
 net/netfilter/nf_conntrack_netlink.c | 4 +++-
 net/netfilter/nfnetlink.c            | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 53923868c9bd..361d6b5630ee 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -76,7 +76,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
 extern int nfnetlink_has_listeners(struct net *net, unsigned int group);
 extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group,
 			  int echo, gfp_t flags);
-extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error);
+extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error);
 extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags);
 
 extern void nfnl_lock(void);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b2af631d2b8..569410a85953 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -582,7 +582,9 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(net, 0, group, -ENOBUFS);
+	if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0)
+		return -ENOBUFS;
+
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8eb0cc23ada3..6afa3d52ea5f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -113,9 +113,9 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
+int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
 {
-	netlink_set_err(net->nfnl, pid, group, error);
+	return netlink_set_err(net->nfnl, pid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
-- 
cgit v1.2.3


From 9bf35c8dddd56f7f247a27346f74f5adc18071f4 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Date: Sun, 21 Mar 2010 21:19:02 -0700
Subject: if_tunnel.h: add missing ams/byteorder.h include

When compiling userspace application which includes
if_tunnel.h and uses GRE_* defines you will get undefined
reference to __cpu_to_be16.

Fix this by adding missing #include <asm/byteorder.h>

Cc: stable@kernel.org
Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 1822d635be6b..16b92d008bed 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -2,6 +2,7 @@
 #define _IF_TUNNEL_H_
 
 #include <linux/types.h>
+#include <asm/byteorder.h>
 
 #ifdef __KERNEL__
 #include <linux/ip.h>
-- 
cgit v1.2.3


From c9acb42ef1904d15d0fb315061cefbe638f67f3a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Mar 2010 15:36:22 -0400
Subject: SUNRPC: Fix a use after free bug with the NFSv4.1 backchannel

The ->release_request() callback was designed to allow the transport layer
to do housekeeping after the RPC call is done. It cannot be used to free
the request itself, and doing so leads to a use-after-free bug in
xprt_release().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/bc_xprt.h |  5 ++++-
 net/sunrpc/bc_svc.c            | 15 ---------------
 net/sunrpc/xprt.c              | 22 +++++++++-------------
 net/sunrpc/xprtsock.c          |  3 ---
 4 files changed, 13 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index d7152b451e21..7c91260c44a9 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -36,7 +36,6 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs);
-void bc_release_request(struct rpc_task *);
 int bc_send(struct rpc_rqst *req);
 
 /*
@@ -59,6 +58,10 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 {
 	return 0;
 }
+
+static inline void xprt_free_bc_request(struct rpc_rqst *req)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* _LINUX_SUNRPC_BC_XPRT_H */
 
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 13f214f53120..f0c05d3311c1 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -37,21 +37,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define RPCDBG_FACILITY	RPCDBG_SVCDSP
 
-void bc_release_request(struct rpc_task *task)
-{
-	struct rpc_rqst *req = task->tk_rqstp;
-
-	dprintk("RPC:       bc_release_request: task= %p\n", task);
-
-	/*
-	 * Release this request only if it's a backchannel
-	 * preallocated request
-	 */
-	if (!bc_prealloc(req))
-		return;
-	xprt_free_bc_request(req);
-}
-
 /* Empty callback ops */
 static const struct rpc_call_ops nfs41_callback_ops = {
 };
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 469de292c23c..42f09ade0044 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -46,6 +46,7 @@
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
+#include <linux/sunrpc/bc_xprt.h>
 
 #include "sunrpc.h"
 
@@ -1032,21 +1033,16 @@ void xprt_release(struct rpc_task *task)
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
 
-	/*
-	 * Early exit if this is a backchannel preallocated request.
-	 * There is no need to have it added to the RPC slot list.
-	 */
-	if (is_bc_request)
-		return;
-
-	memset(req, 0, sizeof(*req));	/* mark unused */
-
 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
+	if (likely(!is_bc_request)) {
+		memset(req, 0, sizeof(*req));	/* mark unused */
 
-	spin_lock(&xprt->reserve_lock);
-	list_add(&req->rq_list, &xprt->free);
-	rpc_wake_up_next(&xprt->backlog);
-	spin_unlock(&xprt->reserve_lock);
+		spin_lock(&xprt->reserve_lock);
+		list_add(&req->rq_list, &xprt->free);
+		rpc_wake_up_next(&xprt->backlog);
+		spin_unlock(&xprt->reserve_lock);
+	} else
+		xprt_free_bc_request(req);
 }
 
 /**
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e4839c07c913..9847c30b5001 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2251,9 +2251,6 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
-#if defined(CONFIG_NFS_V4_1)
-	.release_request	= bc_release_request,
-#endif /* CONFIG_NFS_V4_1 */
 	.close			= xs_tcp_close,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_tcp_print_stats,
-- 
cgit v1.2.3


From ae6be51ed01d6c4aaf249a207b4434bc7785853b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 22 Mar 2010 13:12:33 -0700
Subject: Fix up prototype for sys_ipc breakage

Commit 45575f5a426c ("ppc64 sys_ipc breakage in 2.6.34-rc2") fixed the
definition of the sys_ipc() helper, but didn't fix the prototype in
<linux/syscalls.h>

Reported-and-tested-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f994ae58a002..057929b0a651 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -688,7 +688,7 @@ asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg);
 asmlinkage long sys_shmget(key_t key, size_t size, int flag);
 asmlinkage long sys_shmdt(char __user *shmaddr);
 asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
-asmlinkage long sys_ipc(unsigned int call, int first, int second,
+asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second,
 		unsigned long third, void __user *ptr, long fifth);
 
 asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr);
-- 
cgit v1.2.3


From 66f1207bce10fd80ee8ce99b67d617644612f05e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 11 Mar 2010 17:01:09 -0700
Subject: resources: add interfaces that return conflict information

request_resource() and insert_resource() only return success or failure,
which no information about what existing resource conflicted with the
proposed new reservation.  This patch adds request_resource_conflict()
and insert_resource_conflict(), which return the conflicting resource.

Callers may use this for better error messages or to adjust the new
resource and retry the request.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/ioport.h |  2 ++
 kernel/resource.c      | 44 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 71ab79da7e7f..26fad187d661 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -112,12 +112,14 @@ struct resource_list {
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
 
+extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
 extern int release_resource(struct resource *new);
 void release_child_resources(struct resource *new);
 extern void reserve_region_with_split(struct resource *root,
 			     resource_size_t start, resource_size_t end,
 			     const char *name);
+extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new);
 extern int insert_resource(struct resource *parent, struct resource *new);
 extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
 extern int allocate_resource(struct resource *root, struct resource *new,
diff --git a/kernel/resource.c b/kernel/resource.c
index 2d5be5d9bf5f..9c358e263534 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -219,19 +219,34 @@ void release_child_resources(struct resource *r)
 }
 
 /**
- * request_resource - request and reserve an I/O or memory resource
+ * request_resource_conflict - request and reserve an I/O or memory resource
  * @root: root resource descriptor
  * @new: resource descriptor desired by caller
  *
- * Returns 0 for success, negative error code on error.
+ * Returns 0 for success, conflict resource on error.
  */
-int request_resource(struct resource *root, struct resource *new)
+struct resource *request_resource_conflict(struct resource *root, struct resource *new)
 {
 	struct resource *conflict;
 
 	write_lock(&resource_lock);
 	conflict = __request_resource(root, new);
 	write_unlock(&resource_lock);
+	return conflict;
+}
+
+/**
+ * request_resource - request and reserve an I/O or memory resource
+ * @root: root resource descriptor
+ * @new: resource descriptor desired by caller
+ *
+ * Returns 0 for success, negative error code on error.
+ */
+int request_resource(struct resource *root, struct resource *new)
+{
+	struct resource *conflict;
+
+	conflict = request_resource_conflict(root, new);
 	return conflict ? -EBUSY : 0;
 }
 
@@ -474,25 +489,40 @@ static struct resource * __insert_resource(struct resource *parent, struct resou
 }
 
 /**
- * insert_resource - Inserts a resource in the resource tree
+ * insert_resource_conflict - Inserts resource in the resource tree
  * @parent: parent of the new resource
  * @new: new resource to insert
  *
- * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ * Returns 0 on success, conflict resource if the resource can't be inserted.
  *
- * This function is equivalent to request_resource when no conflict
+ * This function is equivalent to request_resource_conflict when no conflict
  * happens. If a conflict happens, and the conflicting resources
  * entirely fit within the range of the new resource, then the new
  * resource is inserted and the conflicting resources become children of
  * the new resource.
  */
-int insert_resource(struct resource *parent, struct resource *new)
+struct resource *insert_resource_conflict(struct resource *parent, struct resource *new)
 {
 	struct resource *conflict;
 
 	write_lock(&resource_lock);
 	conflict = __insert_resource(parent, new);
 	write_unlock(&resource_lock);
+	return conflict;
+}
+
+/**
+ * insert_resource - Inserts a resource in the resource tree
+ * @parent: parent of the new resource
+ * @new: new resource to insert
+ *
+ * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ */
+int insert_resource(struct resource *parent, struct resource *new)
+{
+	struct resource *conflict;
+
+	conflict = insert_resource_conflict(parent, new);
 	return conflict ? -EBUSY : 0;
 }
 
-- 
cgit v1.2.3


From 03e6d819c2cb2cc8ce5642669a0a7c72336ee7a2 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 23 Mar 2010 20:40:50 +0000
Subject: skbuff: remove unused dma_head & dma_maps fields

The dma map fields in the skb_shared_info structure no longer has any users
and can be dropped since it is making the skb_shared_info unecessarily larger.

Running slabtop show that we were using 4K slabs for the skb->head on x86_64 w/
an allocation size of 1522.  It turns out that the dma_head and dma_maps array
made skb_shared large enough that we had crossed over the 2k boundary with
standard frames and as such we were using 4k blocks of memory for all skbs.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 03f816a9b659..124f90cd5a38 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -190,9 +190,6 @@ struct skb_shared_info {
 	atomic_t	dataref;
 	unsigned short	nr_frags;
 	unsigned short	gso_size;
-#ifdef CONFIG_HAS_DMA
-	dma_addr_t	dma_head;
-#endif
 	/* Warning: this field is not always filled in (UFO)! */
 	unsigned short	gso_segs;
 	unsigned short  gso_type;
@@ -201,9 +198,6 @@ struct skb_shared_info {
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
 	skb_frag_t	frags[MAX_SKB_FRAGS];
-#ifdef CONFIG_HAS_DMA
-	dma_addr_t	dma_maps[MAX_SKB_FRAGS];
-#endif
 	/* Intermediate layers must ensure that destructor_arg
 	 * remains valid until skb destructor */
 	void *		destructor_arg;
-- 
cgit v1.2.3


From 4c87684d32e8f95715d53039dcd2d998dc63d1eb Mon Sep 17 00:00:00 2001
From: David Härdeman <david@hardeman.nu>
Date: Tue, 23 Mar 2010 13:35:22 -0700
Subject: kfifo: fix KFIFO_INIT in include/linux/kfifo.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

include/linux/kfifo.h first defines and then undefines __kfifo_initializer
which is used by INIT_KFIFO (which is also a macro, so building a module
which uses INIT_KFIFO will fail).

Signed-off-by: David Härdeman <david@hardeman.nu>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index bc0fc795bd35..ece0b1c33816 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -102,8 +102,6 @@ union { \
 	unsigned char name##kfifo_buffer[size]; \
 	struct kfifo name = __kfifo_initializer(size, name##kfifo_buffer)
 
-#undef __kfifo_initializer
-
 extern void kfifo_init(struct kfifo *fifo, void *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
-- 
cgit v1.2.3


From 6cb4aff0a77cc0e6bae9475d62205319e3ebbf3f Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 23 Mar 2010 13:35:38 -0700
Subject: reiserfs: fix oops while creating privroot with selinux enabled

Commit 57fe60df ("reiserfs: add atomic addition of selinux attributes
during inode creation") contains a bug that will cause it to oops when
mounting a file system that didn't previously contain extended attributes
on a system using security.* xattrs.

The issue is that while creating the privroot during mount
reiserfs_security_init calls reiserfs_xattr_jcreate_nblocks which
dereferences the xattr root.  The xattr root doesn't exist, so we get an
oops.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=15309

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/xattr_security.c   | 2 +-
 include/linux/reiserfs_xattr.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index d8b5bfcbdd30..de1fcffd906b 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -76,7 +76,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
 		return error;
 	}
 
-	if (sec->length) {
+	if (sec->length && reiserfs_xattrs_initialized(inode->i_sb)) {
 		blocks = reiserfs_xattr_jcreate_nblocks(inode) +
 			 reiserfs_xattr_nblocks(inode, sec->length);
 		/* We don't want to count the directories twice if we have
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 99928dce37ea..7fa02b4af838 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -70,6 +70,11 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th,
 void reiserfs_security_free(struct reiserfs_security_handle *sec);
 #endif
 
+static inline int reiserfs_xattrs_initialized(struct super_block *sb)
+{
+	return REISERFS_SB(sb)->priv_root != NULL;
+}
+
 #define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
 static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
 {
-- 
cgit v1.2.3


From 7198f3c9b13c7aa1e5d9f7ff74c0ea303174feff Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 23 Mar 2010 13:35:40 -0700
Subject: mmc: fix incorrect interpretation of card type bits

In the extended CSD register the CARD_TYPE is an 8-bit value of which the
upper 6 bits were reserved in JEDEC specifications prior to version 4.4.
In version 4.4 two of the reserved bits were designated for identifying
support for the newly added High-Speed Dual Data Rate.  Unfortunately the
mmc_read_ext_csd() function required that the reserved bits be zero
instead of ignoring them as it should.

This patch makes mmc_read_ext_csd() ignore the CARD_TYPE bits that are
reserved or not yet supported.  It also stops the function jumping to the
end as though an error occurred, when it is only warns that the CARD_TYPE
bits (that it does interpret) are invalid.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/mmc.c  | 3 +--
 include/linux/mmc/mmc.h | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 0eac6c814904..e041c003db22 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -225,7 +225,7 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 			mmc_card_set_blockaddr(card);
 	}
 
-	switch (ext_csd[EXT_CSD_CARD_TYPE]) {
+	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
 	case EXT_CSD_CARD_TYPE_52 | EXT_CSD_CARD_TYPE_26:
 		card->ext_csd.hs_max_dtr = 52000000;
 		break;
@@ -237,7 +237,6 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 		printk(KERN_WARNING "%s: card is mmc v4 but doesn't "
 			"support any high-speed modes.\n",
 			mmc_hostname(card->host));
-		goto out;
 	}
 
 	if (card->ext_csd.rev >= 3) {
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index c02c8db73701..8a49cbf0376d 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -268,6 +268,7 @@ struct _mmc_csd {
 
 #define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
 #define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
+#define EXT_CSD_CARD_TYPE_MASK	0x3	/* Mask out reserved and DDR bits */
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
-- 
cgit v1.2.3


From 90fddabf5818367c6bd1fe1b256a10e01827862f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 24 Mar 2010 09:43:00 +0000
Subject: Document Linux's circular buffering capabilities

Document the circular buffering capabilities available in Linux.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Randy Dunlap <rdunlap@xenotime.net>
Reviewed-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/circular-buffers.txt | 234 +++++++++++++++++++++++++++++++++++++
 Documentation/memory-barriers.txt  |  20 ++++
 include/linux/circ_buf.h           |   4 +
 3 files changed, 258 insertions(+)
 create mode 100644 Documentation/circular-buffers.txt

(limited to 'include/linux')

diff --git a/Documentation/circular-buffers.txt b/Documentation/circular-buffers.txt
new file mode 100644
index 000000000000..8117e5bf6065
--- /dev/null
+++ b/Documentation/circular-buffers.txt
@@ -0,0 +1,234 @@
+			       ================
+			       CIRCULAR BUFFERS
+			       ================
+
+By: David Howells <dhowells@redhat.com>
+    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+
+Linux provides a number of features that can be used to implement circular
+buffering.  There are two sets of such features:
+
+ (1) Convenience functions for determining information about power-of-2 sized
+     buffers.
+
+ (2) Memory barriers for when the producer and the consumer of objects in the
+     buffer don't want to share a lock.
+
+To use these facilities, as discussed below, there needs to be just one
+producer and just one consumer.  It is possible to handle multiple producers by
+serialising them, and to handle multiple consumers by serialising them.
+
+
+Contents:
+
+ (*) What is a circular buffer?
+
+ (*) Measuring power-of-2 buffers.
+
+ (*) Using memory barriers with circular buffers.
+     - The producer.
+     - The consumer.
+
+
+==========================
+WHAT IS A CIRCULAR BUFFER?
+==========================
+
+First of all, what is a circular buffer?  A circular buffer is a buffer of
+fixed, finite size into which there are two indices:
+
+ (1) A 'head' index - the point at which the producer inserts items into the
+     buffer.
+
+ (2) A 'tail' index - the point at which the consumer finds the next item in
+     the buffer.
+
+Typically when the tail pointer is equal to the head pointer, the buffer is
+empty; and the buffer is full when the head pointer is one less than the tail
+pointer.
+
+The head index is incremented when items are added, and the tail index when
+items are removed.  The tail index should never jump the head index, and both
+indices should be wrapped to 0 when they reach the end of the buffer, thus
+allowing an infinite amount of data to flow through the buffer.
+
+Typically, items will all be of the same unit size, but this isn't strictly
+required to use the techniques below.  The indices can be increased by more
+than 1 if multiple items or variable-sized items are to be included in the
+buffer, provided that neither index overtakes the other.  The implementer must
+be careful, however, as a region more than one unit in size may wrap the end of
+the buffer and be broken into two segments.
+
+
+============================
+MEASURING POWER-OF-2 BUFFERS
+============================
+
+Calculation of the occupancy or the remaining capacity of an arbitrarily sized
+circular buffer would normally be a slow operation, requiring the use of a
+modulus (divide) instruction.  However, if the buffer is of a power-of-2 size,
+then a much quicker bitwise-AND instruction can be used instead.
+
+Linux provides a set of macros for handling power-of-2 circular buffers.  These
+can be made use of by:
+
+	#include <linux/circ_buf.h>
+
+The macros are:
+
+ (*) Measure the remaining capacity of a buffer:
+
+	CIRC_SPACE(head_index, tail_index, buffer_size);
+
+     This returns the amount of space left in the buffer[1] into which items
+     can be inserted.
+
+
+ (*) Measure the maximum consecutive immediate space in a buffer:
+
+	CIRC_SPACE_TO_END(head_index, tail_index, buffer_size);
+
+     This returns the amount of consecutive space left in the buffer[1] into
+     which items can be immediately inserted without having to wrap back to the
+     beginning of the buffer.
+
+
+ (*) Measure the occupancy of a buffer:
+
+	CIRC_CNT(head_index, tail_index, buffer_size);
+
+     This returns the number of items currently occupying a buffer[2].
+
+
+ (*) Measure the non-wrapping occupancy of a buffer:
+
+	CIRC_CNT_TO_END(head_index, tail_index, buffer_size);
+
+     This returns the number of consecutive items[2] that can be extracted from
+     the buffer without having to wrap back to the beginning of the buffer.
+
+
+Each of these macros will nominally return a value between 0 and buffer_size-1,
+however:
+
+ [1] CIRC_SPACE*() are intended to be used in the producer.  To the producer
+     they will return a lower bound as the producer controls the head index,
+     but the consumer may still be depleting the buffer on another CPU and
+     moving the tail index.
+
+     To the consumer it will show an upper bound as the producer may be busy
+     depleting the space.
+
+ [2] CIRC_CNT*() are intended to be used in the consumer.  To the consumer they
+     will return a lower bound as the consumer controls the tail index, but the
+     producer may still be filling the buffer on another CPU and moving the
+     head index.
+
+     To the producer it will show an upper bound as the consumer may be busy
+     emptying the buffer.
+
+ [3] To a third party, the order in which the writes to the indices by the
+     producer and consumer become visible cannot be guaranteed as they are
+     independent and may be made on different CPUs - so the result in such a
+     situation will merely be a guess, and may even be negative.
+
+
+===========================================
+USING MEMORY BARRIERS WITH CIRCULAR BUFFERS
+===========================================
+
+By using memory barriers in conjunction with circular buffers, you can avoid
+the need to:
+
+ (1) use a single lock to govern access to both ends of the buffer, thus
+     allowing the buffer to be filled and emptied at the same time; and
+
+ (2) use atomic counter operations.
+
+There are two sides to this: the producer that fills the buffer, and the
+consumer that empties it.  Only one thing should be filling a buffer at any one
+time, and only one thing should be emptying a buffer at any one time, but the
+two sides can operate simultaneously.
+
+
+THE PRODUCER
+------------
+
+The producer will look something like this:
+
+	spin_lock(&producer_lock);
+
+	unsigned long head = buffer->head;
+	unsigned long tail = ACCESS_ONCE(buffer->tail);
+
+	if (CIRC_SPACE(head, tail, buffer->size) >= 1) {
+		/* insert one item into the buffer */
+		struct item *item = buffer[head];
+
+		produce_item(item);
+
+		smp_wmb(); /* commit the item before incrementing the head */
+
+		buffer->head = (head + 1) & (buffer->size - 1);
+
+		/* wake_up() will make sure that the head is committed before
+		 * waking anyone up */
+		wake_up(consumer);
+	}
+
+	spin_unlock(&producer_lock);
+
+This will instruct the CPU that the contents of the new item must be written
+before the head index makes it available to the consumer and then instructs the
+CPU that the revised head index must be written before the consumer is woken.
+
+Note that wake_up() doesn't have to be the exact mechanism used, but whatever
+is used must guarantee a (write) memory barrier between the update of the head
+index and the change of state of the consumer, if a change of state occurs.
+
+
+THE CONSUMER
+------------
+
+The consumer will look something like this:
+
+	spin_lock(&consumer_lock);
+
+	unsigned long head = ACCESS_ONCE(buffer->head);
+	unsigned long tail = buffer->tail;
+
+	if (CIRC_CNT(head, tail, buffer->size) >= 1) {
+		/* read index before reading contents at that index */
+		smp_read_barrier_depends();
+
+		/* extract one item from the buffer */
+		struct item *item = buffer[tail];
+
+		consume_item(item);
+
+		smp_mb(); /* finish reading descriptor before incrementing tail */
+
+		buffer->tail = (tail + 1) & (buffer->size - 1);
+	}
+
+	spin_unlock(&consumer_lock);
+
+This will instruct the CPU to make sure the index is up to date before reading
+the new item, and then it shall make sure the CPU has finished reading the item
+before it writes the new tail pointer, which will erase the item.
+
+
+Note the use of ACCESS_ONCE() in both algorithms to read the opposition index.
+This prevents the compiler from discarding and reloading its cached value -
+which some compilers will do across smp_read_barrier_depends().  This isn't
+strictly needed if you can be sure that the opposition index will _only_ be
+used the once.
+
+
+===============
+FURTHER READING
+===============
+
+See also Documentation/memory-barriers.txt for a description of Linux's memory
+barrier facilities.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 7f5809eddee6..631ad2f1b229 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -3,6 +3,7 @@
 			 ============================
 
 By: David Howells <dhowells@redhat.com>
+    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 
 Contents:
 
@@ -60,6 +61,10 @@ Contents:
 
      - And then there's the Alpha.
 
+ (*) Example uses.
+
+     - Circular buffers.
+
  (*) References.
 
 
@@ -2226,6 +2231,21 @@ The Alpha defines the Linux kernel's memory barrier model.
 See the subsection on "Cache Coherency" above.
 
 
+============
+EXAMPLE USES
+============
+
+CIRCULAR BUFFERS
+----------------
+
+Memory barriers can be used to implement circular buffering without the need
+of a lock to serialise the producer with the consumer.  See:
+
+	Documentation/circular-buffers.txt
+
+for details.
+
+
 ==========
 REFERENCES
 ==========
diff --git a/include/linux/circ_buf.h b/include/linux/circ_buf.h
index a2ed0591fb19..90f2471dc6f2 100644
--- a/include/linux/circ_buf.h
+++ b/include/linux/circ_buf.h
@@ -1,3 +1,7 @@
+/*
+ * See Documentation/circular-buffers.txt for more information.
+ */
+
 #ifndef _LINUX_CIRC_BUF_H
 #define _LINUX_CIRC_BUF_H 1
 
-- 
cgit v1.2.3


From 9c13886665c43600bd0af4b38e33c654e648e078 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 25 Mar 2010 11:17:26 +0100
Subject: netfilter: ip6table_raw: fix table priority

The order of the IPv6 raw table is currently reversed, that makes impossible
to use the NOTRACK target in IPv6: for example if someone enters

ip6tables -t raw -A PREROUTING -p tcp --dport 80 -j NOTRACK

and if we receive fragmented packets then the first fragment will be
untracked and thus skip nf_ct_frag6_gather (and conntrack), while all
subsequent fragments enter nf_ct_frag6_gather and reassembly will never
successfully be finished.

Singed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_ipv6.h    | 1 +
 net/ipv6/netfilter/ip6table_raw.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index d654873aa25a..1f7e300094cd 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -59,6 +59,7 @@
 enum nf_ip6_hook_priorities {
 	NF_IP6_PRI_FIRST = INT_MIN,
 	NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
+	NF_IP6_PRI_RAW = -300,
 	NF_IP6_PRI_SELINUX_FIRST = -225,
 	NF_IP6_PRI_CONNTRACK = -200,
 	NF_IP6_PRI_MANGLE = -150,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index aef31a29de9e..b9cf7cd61923 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -13,7 +13,7 @@ static const struct xt_table packet_raw = {
 	.valid_hooks = RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV6,
-	.priority = NF_IP6_PRI_FIRST,
+	.priority = NF_IP6_PRI_RAW,
 };
 
 /* The work comes in here from netfilter.c. */
-- 
cgit v1.2.3


From 71c5c1595c04852d6fbf3c4882b47b30b61a4d32 Mon Sep 17 00:00:00 2001
From: Brandon L Black <blblack@gmail.com>
Date: Fri, 26 Mar 2010 16:18:03 +0000
Subject: net: Add MSG_WAITFORONE flag to recvmmsg

Add new flag MSG_WAITFORONE for the recvmmsg() syscall.
When this flag is specified for a blocking socket, recvmmsg()
will only block until at least 1 packet is available.  The
default behavior is to block until all vlen packets are
available.  This flag has no effect on non-blocking sockets
or when used in combination with MSG_DONTWAIT.

Signed-off-by: Brandon L Black <blblack@gmail.com>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 1 +
 net/socket.c           | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 7b3aae2052a6..354cc5617f8b 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -255,6 +255,7 @@ struct ucred {
 #define MSG_ERRQUEUE	0x2000	/* Fetch message from error queue */
 #define MSG_NOSIGNAL	0x4000	/* Do not generate SIGPIPE */
 #define MSG_MORE	0x8000	/* Sender will send more */
+#define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
 
 #define MSG_EOF         MSG_FIN
 
diff --git a/net/socket.c b/net/socket.c
index 769c386bd428..f55ffe9f8c87 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2135,6 +2135,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			break;
 		++datagrams;
 
+		/* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
+		if (flags & MSG_WAITFORONE)
+			flags |= MSG_DONTWAIT;
+
 		if (timeout) {
 			ktime_get_ts(timeout);
 			*timeout = timespec_sub(end_time, *timeout);
-- 
cgit v1.2.3


From a53f4f9efaeb1d87cfae066346979d4d70e1abe9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 29 Mar 2010 13:08:52 +0100
Subject: SLOW_WORK: CONFIG_SLOW_WORK_PROC should be CONFIG_SLOW_WORK_DEBUG

CONFIG_SLOW_WORK_PROC was changed to CONFIG_SLOW_WORK_DEBUG, but not in all
instances.  Change the remaining instances.  This makes the debugfs file
display the time mark and the owner's description again.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fscache/object.c           | 6 +++---
 fs/fscache/operation.c        | 4 ++--
 include/linux/fscache-cache.h | 2 +-
 kernel/slow-work.h            | 8 ++++----
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index e513ac599c8e..0b589a9b4ffc 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -53,7 +53,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
 static void fscache_object_slow_work_put_ref(struct slow_work *);
 static int  fscache_object_slow_work_get_ref(struct slow_work *);
 static void fscache_object_slow_work_execute(struct slow_work *);
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
 #endif
 static void fscache_initialise_object(struct fscache_object *);
@@ -69,7 +69,7 @@ const struct slow_work_ops fscache_object_slow_work_ops = {
 	.get_ref	= fscache_object_slow_work_get_ref,
 	.put_ref	= fscache_object_slow_work_put_ref,
 	.execute	= fscache_object_slow_work_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	.desc		= fscache_object_slow_work_desc,
 #endif
 };
@@ -364,7 +364,7 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
 /*
  * describe an object for slow-work debugging
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_object_slow_work_desc(struct slow_work *work,
 					  struct seq_file *m)
 {
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 313e79a14266..9f6c928d4586 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -500,7 +500,7 @@ static void fscache_op_execute(struct slow_work *work)
 /*
  * describe an operation for slow-work debugging
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
 {
 	struct fscache_operation *op =
@@ -517,7 +517,7 @@ const struct slow_work_ops fscache_op_slow_work_ops = {
 	.get_ref	= fscache_op_get_ref,
 	.put_ref	= fscache_op_put_ref,
 	.execute	= fscache_op_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	.desc		= fscache_op_desc,
 #endif
 };
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 7be0c6fbe880..c57db27ac861 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -105,7 +105,7 @@ struct fscache_operation {
 	/* operation releaser */
 	fscache_operation_release_t release;
 
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	const char *name;		/* operation name */
 	const char *state;		/* operation state */
 #define fscache_set_op_name(OP, N)	do { (OP)->name  = (N); } while(0)
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
index 321f3c59d732..a29ebd1ef41d 100644
--- a/kernel/slow-work.h
+++ b/kernel/slow-work.h
@@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
  */
 static inline void slow_work_set_thread_pid(int id, pid_t pid)
 {
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	slow_work_pids[id] = pid;
 #endif
 }
 
 static inline void slow_work_mark_time(struct slow_work *work)
 {
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	work->mark = CURRENT_TIME;
 #endif
 }
 
 static inline void slow_work_begin_exec(int id, struct slow_work *work)
 {
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	slow_work_execs[id] = work;
 #endif
 }
 
 static inline void slow_work_end_exec(int id, struct slow_work *work)
 {
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	write_lock(&slow_work_execs_lock);
 	slow_work_execs[id] = NULL;
 	write_unlock(&slow_work_execs_lock);
-- 
cgit v1.2.3


From de329820e920cd9cfbc2127cad26a37026260cce Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 Mar 2010 14:30:19 -0700
Subject: ext3: fix broken handling of EXT3_STATE_NEW

In commit 9df93939b735 ("ext3: Use bitops to read/modify
EXT3_I(inode)->i_state") ext3 changed its internal 'i_state' variable to
use bitops for its state handling.  However, unline the same ext4
change, it didn't actually change the name of the field when it changed
the semantics of it.

As a result, an old use of 'i_state' remained in fs/ext3/ialloc.c that
initialized the field to EXT3_STATE_NEW.  And that does not work
_at_all_ when we're now working with individually named bits rather than
values that get masked.  So the code tried to mark the state to be new,
but in actual fact set the field to EXT3_STATE_JDATA.  Which makes no
sense at all, and screws up all the code that checks whether the inode
was newly allocated.

In particular, it made the xattr code unhappy, and caused various random
behavior, like apparently

	https://bugzilla.redhat.com/show_bug.cgi?id=577911

So fix the initialization, and rename the field to match ext4 so that we
don't have this happen again.

Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Daniel J Walsh <dwalsh@redhat.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/ialloc.c          | 4 +++-
 fs/ext3/inode.c           | 2 +-
 include/linux/ext3_fs.h   | 6 +++---
 include/linux/ext3_fs_i.h | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index ef9008b885b5..0d0e97ed3ff6 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -582,7 +582,9 @@ got:
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
 
-	ei->i_state = EXT3_STATE_NEW;
+	ei->i_state_flags = 0;
+	ext3_set_inode_state(inode, EXT3_STATE_NEW);
+
 	ei->i_extra_isize =
 		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 7f920b7263a4..ea33bdf0a300 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 	inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
 	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
-	ei->i_state = 0;
+	ei->i_state_flags = 0;
 	ei->i_dir_start_lookup = 0;
 	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
 	/* We now have enough fields to check if the inode was active or not.
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index cac84b006667..5f494b465097 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -565,17 +565,17 @@ enum {
 
 static inline int ext3_test_inode_state(struct inode *inode, int bit)
 {
-	return test_bit(bit, &EXT3_I(inode)->i_state);
+	return test_bit(bit, &EXT3_I(inode)->i_state_flags);
 }
 
 static inline void ext3_set_inode_state(struct inode *inode, int bit)
 {
-	set_bit(bit, &EXT3_I(inode)->i_state);
+	set_bit(bit, &EXT3_I(inode)->i_state_flags);
 }
 
 static inline void ext3_clear_inode_state(struct inode *inode, int bit)
 {
-	clear_bit(bit, &EXT3_I(inode)->i_state);
+	clear_bit(bit, &EXT3_I(inode)->i_state_flags);
 }
 #else
 /* Assume that user mode programs are passing in an ext3fs superblock, not
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 7679acdb519a..f42c098aed8d 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -87,7 +87,7 @@ struct ext3_inode_info {
 	 * near to their parent directory's inode.
 	 */
 	__u32	i_block_group;
-	unsigned long	i_state;	/* Dynamic state flags for ext3 */
+	unsigned long	i_state_flags;	/* Dynamic state flags for ext3 */
 
 	/* block reservation info */
 	struct ext3_block_alloc_info *i_block_alloc_info;
-- 
cgit v1.2.3